Commit fdd29946 authored by Robert Schmidt's avatar Robert Schmidt

Merge remote-tracking branch 'origin/rotate-cpx-vector-benchmark' into integration_2024_w35

parents 468ba054 08839048
......@@ -2367,6 +2367,19 @@ if(ENABLE_TESTS)
add_library(GTest::gtest ALIAS gtest)
add_library(GTest::gtest_main ALIAS gtest_main)
endif()
find_package(benchmark)
if (NOT benchmark_FOUND)
message(STATUS "benchmark package not found, will download benchmark automatically. To prevent that install google benchmark on your system (libbenchmark-dev)")
include(FetchContent)
set(BENCHMARK_ENABLE_TESTING OFF)
FetchContent_Declare(
benchmark
GIT_REPOSITORY https://github.com/google/benchmark.git
GIT_TAG v1.9.0
)
FetchContent_MakeAvailable(benchmark)
endif()
endif()
add_subdirectory(common)
......
......@@ -107,3 +107,65 @@ that directory directly, which you might also use to compare to the
slight complication is due to using shell scripts. An easier way is to
directly declare the executable in `add_test()`, and `ctest` will locate and
run the executable properly.
# Benchmarking
Google benchmark can be used to profile and benchmark small pieces of code. See
`benchmark_rotate_vector` for reference implementation. To start benchmarking code,
write a benchmark first and compare your implementation against baseline result.
To ensure your results are reproducible see this [guide](https://github.com/google/benchmark/blob/main/docs/reducing_variance.md)
Example output follows:
```bash
2024-08-26T11:55:49+02:00
Running ./openair1/PHY/TOOLS/tests/benchmark_rotate_vector
Run on (8 X 4700 MHz CPU s)
CPU Caches:
L1 Data 48 KiB (x4)
L1 Instruction 32 KiB (x4)
L2 Unified 1280 KiB (x4)
L3 Unified 12288 KiB (x1)
Load Average: 0.51, 0.31, 0.29
***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
---------------------------------------------------------------------
Benchmark Time CPU Iterations
---------------------------------------------------------------------
BM_rotate_cpx_vector/100 43.1 ns 43.1 ns 16683136
BM_rotate_cpx_vector/256 70.1 ns 70.0 ns 9647446
BM_rotate_cpx_vector/1024 277 ns 277 ns 2378273
BM_rotate_cpx_vector/4096 1070 ns 1070 ns 654792
BM_rotate_cpx_vector/16384 4220 ns 4220 ns 169070
BM_rotate_cpx_vector/20000 5288 ns 5289 ns 136190
```
## Comparing results
Benchmark results can be output to json by using command line arguments, example below
```bash
./benchmark_rotate_vector --benchmark_out=file.json --benchmark_repetitions=10
```
These results can be compared by a tool provided with google benchmark
```bash
./compare.py benchmarks ../../file.json ../../file1.json
```
Example output:
```
Comparing ../../file.json to ../../file1.json
Benchmark Time CPU Time Old Time New CPU Old CPU New
--------------------------------------------------------------------------------------------------------------------------------
BM_rotate_cpx_vector/100 +0.3383 +0.3384 43 58 43 58
BM_rotate_cpx_vector/100 +0.2334 +0.2335 42 52 42 52
BM_rotate_cpx_vector/100 +0.1685 +0.1683 42 49 42 49
BM_rotate_cpx_vector/100 +0.1890 +0.1889 42 50 42 50
BM_rotate_cpx_vector/100 +0.0456 +0.0457 42 44 42 44
BM_rotate_cpx_vector/100 +0.0163 +0.0162 42 42 42 42
BM_rotate_cpx_vector/100 +0.0005 +0.0004 43 43 43 43
BM_rotate_cpx_vector/100 +0.0134 +0.0129 43 43 43 43
BM_rotate_cpx_vector/100 +0.0162 +0.0162 42 42 42 42
BM_rotate_cpx_vector/100 +0.0003 +0.0003 42 42 42 42
```
......@@ -56,18 +56,9 @@ void exit_function(const char *file, const char *function, const int line, const
#include "common/utils/LOG/log.h"
#include <cstdlib>
#include <memory>
#include "openair1/PHY/TOOLS/phy_test_tools.hpp"
#include <random>
constexpr bool is_power_of_two(uint64_t n)
{
return n > 0 && (n & (n - 1)) == 0;
}
size_t align_up(size_t a, size_t b)
{
return (a + b - 1) / b * b;
}
int16_t saturating_sub(int16_t a, int16_t b)
{
int32_t result = (int32_t)a - (int32_t)b;
......@@ -81,44 +72,6 @@ int16_t saturating_sub(int16_t a, int16_t b)
}
}
// Template adaptations for std::vector. This is needed because the avx functions expect 256 bit alignment.
template <typename T, size_t alignment>
class AlignedAllocator {
public:
static_assert(is_power_of_two(alignment), "Alignment should be power of 2");
static_assert(alignment >= 8, "Alignment must be at least 8 bits");
using value_type = T;
AlignedAllocator() = default;
AlignedAllocator(const AlignedAllocator &) = default;
AlignedAllocator &operator=(const AlignedAllocator &) = default;
template <typename U>
struct rebind {
using other = AlignedAllocator<U, alignment>;
};
T *allocate(size_t n)
{
size_t alignment_bytes = alignment / 8;
void *ptr = ::aligned_alloc(alignment_bytes, align_up(n * sizeof(T), alignment_bytes));
return static_cast<T *>(ptr);
}
void deallocate(T *p, size_t n)
{
::free(p);
}
};
// Using 512-aligned vector in case some functions use avx-512
template <typename T>
using AlignedAllocator512 = AlignedAllocator<T, 512>;
template <typename T>
using AlignedVector512 = std::vector<T, AlignedAllocator512<T>>;
void nr_ulsch_16qam_llr_ref(c16_t *rxdataF_comp, int32_t *ul_ch_mag, int16_t *ulsch_llr, uint32_t nb_re, uint8_t symbol)
{
int16_t *ul_ch_mag_i16 = (int16_t *)ul_ch_mag;
......@@ -190,27 +143,6 @@ void nr_ulsch_256qam_llr_ref(c16_t *rxdataF_comp,
}
}
AlignedVector512<c16_t> generate_random_c16(size_t num)
{
std::random_device rd;
std::mt19937 rng(rd());
std::uniform_int_distribution<int16_t> dist(INT16_MIN, INT16_MAX);
AlignedVector512<c16_t> vec;
vec.resize(num);
auto gen = [&]() { return (c16_t){dist(rng), dist(rng)}; };
std::generate(vec.begin(), vec.end(), gen);
return vec;
}
AlignedVector512<uint16_t> generate_random_uint16(size_t num)
{
AlignedVector512<uint16_t> vec;
vec.resize(num);
auto gen = [&]() { return static_cast<uint16_t>(std::rand()); };
std::generate(vec.begin(), vec.end(), gen);
return vec;
}
void test_function_16_qam(AlignedVector512<uint32_t> nb_res)
{
for (auto i = 0U; i < nb_res.size(); i++) {
......
/*
* Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The OpenAirInterface Software Alliance licenses this file to You under
* the OAI Public License, Version 1.1 (the "License"); you may not use this file
* except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.openairinterface.org/?page_id=698
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*-------------------------------------------------------------------------------
* For more information about the OpenAirInterface (OAI) Software Alliance:
* contact@openairinterface.org
*/
#ifndef __PHY_TEST_TOOLS_HPP__
#define __PHY_TEST_TOOLS_HPP__
#include <vector>
#include <random>
#include <algorithm>
extern "C" {
#include "openair1/PHY/TOOLS/tools_defs.h"
}
constexpr bool is_power_of_two(uint64_t n)
{
return n != 0 && (n & (n - 1)) == 0;
}
size_t align_up(size_t a, size_t b)
{
return (a + b - 1) / b * b;
}
// Template adaptations for std::vector. This is needed because the avx functions expect 256 bit alignment.
template <typename T, size_t alignment>
class AlignedAllocator {
public:
static_assert(is_power_of_two(alignment), "Alignment should be power of 2");
static_assert(alignment >= 8, "Alignment must be at least 8 bits");
using value_type = T;
AlignedAllocator() = default;
AlignedAllocator(const AlignedAllocator &) = default;
AlignedAllocator &operator=(const AlignedAllocator &) = default;
template <typename U>
struct rebind {
using other = AlignedAllocator<U, alignment>;
};
T *allocate(size_t n)
{
size_t alignment_bytes = alignment / 8;
void *ptr = ::aligned_alloc(alignment_bytes, align_up(n * sizeof(T), alignment_bytes));
return static_cast<T *>(ptr);
}
void deallocate(T *p, size_t n)
{
::free(p);
}
};
// Using 512-aligned vector in case some functions use avx-512
template <typename T>
using AlignedAllocator512 = AlignedAllocator<T, 512>;
template <typename T>
using AlignedVector512 = std::vector<T, AlignedAllocator512<T>>;
AlignedVector512<c16_t> generate_random_c16(size_t num)
{
std::random_device rd;
std::mt19937 rng(rd());
std::uniform_int_distribution<int16_t> dist(INT16_MIN, INT16_MAX);
AlignedVector512<c16_t> vec;
vec.resize(num);
auto gen = [&]() { return (c16_t){dist(rng), dist(rng)}; };
std::generate(vec.begin(), vec.end(), gen);
return vec;
}
AlignedVector512<uint16_t> generate_random_uint16(size_t num)
{
AlignedVector512<uint16_t> vec;
vec.resize(num);
auto gen = [&]() { return static_cast<uint16_t>(std::rand()); };
std::generate(vec.begin(), vec.end(), gen);
return vec;
}
#endif
......@@ -3,3 +3,9 @@ target_link_libraries(test_signal_energy PRIVATE GTest::gtest UTIL)
add_dependencies(tests test_signal_energy)
add_test(NAME test_signal_energy
COMMAND ./test_signal_energy)
add_executable(benchmark_rotate_vector benchmark_rotate_vector.cpp)
target_link_libraries(benchmark_rotate_vector PRIVATE benchmark::benchmark UTIL PHY_NR)
add_dependencies(tests benchmark_rotate_vector)
add_test(NAME benchmark_rotate_vector
COMMAND ./benchmark_rotate_vector)
/*
* Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The OpenAirInterface Software Alliance licenses this file to You under
* the OAI Public License, Version 1.1 (the "License"); you may not use this file
* except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.openairinterface.org/?page_id=698
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*-------------------------------------------------------------------------------
* For more information about the OpenAirInterface (OAI) Software Alliance:
* contact@openairinterface.org
*/
#include <stdint.h>
#include <vector>
#include <algorithm>
#include <numeric>
extern "C" {
#include "openair1/PHY/TOOLS/tools_defs.h"
struct configmodule_interface_s;
struct configmodule_interface_s *uniqCfg = NULL;
void exit_function(const char *file, const char *function, const int line, const char *s, const int assert)
{
if (assert) {
abort();
} else {
exit(EXIT_SUCCESS);
}
}
}
#include <cstdio>
#include "common/utils/LOG/log.h"
#include "benchmark/benchmark.h"
#include "openair1/PHY/TOOLS/phy_test_tools.hpp"
static void BM_rotate_cpx_vector(benchmark::State &state)
{
int vector_size = state.range(0);
auto input_complex_16 = generate_random_c16(vector_size);
auto input_alpha = generate_random_c16(vector_size);
AlignedVector512<c16_t> output;
output.resize(vector_size);
int shift = 2;
for (auto _ : state) {
rotate_cpx_vector(input_complex_16.data(), input_alpha.data(), output.data(), vector_size, shift);
}
}
BENCHMARK(BM_rotate_cpx_vector)->RangeMultiplier(4)->Range(100, 20000);
BENCHMARK_MAIN();
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment