Commit 6a5d8b86 authored by hardy's avatar hardy

Merge remote-tracking branch 'origin/NR_ldpc_cuda_softmodem' into integration_2021_wk44

parents af7d9cd9 8dd5346e
......@@ -309,6 +309,7 @@ endif()
#
# add autotools definitions that were maybe used!
add_definitions("-DSTDC_HEADERS=1 -DHAVE_SYS_TYPES_H=1 -DHAVE_SYS_STAT_H=1 -DHAVE_STDLIB_H=1 -DHAVE_STRING_H=1 -DHAVE_MEMORY_H=1 -DHAVE_STRINGS_H=1 -DHAVE_INTTYPES_H=1 -DHAVE_STDINT_H=1 -DHAVE_UNISTD_H=1 -DHAVE_FCNTL_H=1 -DHAVE_ARPA_INET_H=1 -DHAVE_SYS_TIME_H=1 -DHAVE_SYS_SOCKET_H=1 -DHAVE_STRERROR=1 -DHAVE_SOCKET=1 -DHAVE_MEMSET=1 -DHAVE_GETTIMEOFDAY=1 -DHAVE_STDLIB_H=1 -DHAVE_MALLOC=1 -DHAVE_LIBSCTP")
set(commonOpts "-pipe -Wno-packed-bitfield-compat -fPIC -Wall -fno-strict-aliasing -rdynamic")
......@@ -318,11 +319,7 @@ set(CMAKE_C_FLAGS
set(CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} ${C_FLAGS_PROCESSOR} ${commonOpts} -std=c++11")
# cuda compiler bug (limitation) on complex macro definition
if (CUDA_FOUND)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DCUDA_FLAG")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DCUDA_FLAG")
endif()
if (SANITIZE_ADDRESS)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -fno-common")
......@@ -1590,6 +1587,16 @@ set(PHY_LDPC_OPTIM8SEGMULTI_SRC
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder.c
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_encoder/ldpc_encoder_optim8segmulti.c
)
set(PHY_LDPC_CUDA_SRC
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder_LYC/nrLDPC_decoder_LYC.cu
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_encoder/ldpc_encoder_optim8segmulti.c
)
set(PHY_LDPC_CL_SRC
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder_CL.c
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_encoder/ldpc_encoder_optim8segmulti.c
)
set(PHY_NR_CODINGIF
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_load.c;
)
......@@ -1597,8 +1604,18 @@ set(PHY_NR_CODINGIF
add_library(ldpc_orig MODULE ${PHY_LDPC_ORIG_SRC} )
add_library(ldpc_optim MODULE ${PHY_LDPC_OPTIM_SRC} )
add_library(ldpc_optim8seg MODULE ${PHY_LDPC_OPTIM8SEG_SRC} )
add_library(ldpc_cl MODULE ${PHY_LDPC_CL_SRC} )
target_link_libraries(ldpc_cl OpenCL)
if (CUDA_FOUND)
cuda_add_library(ldpc_cuda MODULE ${PHY_LDPC_CUDA_SRC} )
set_target_properties(ldpc_cuda PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
# CUDA_ADD_CUFFT_TO_TARGET(ldpc_cuda)
endif (CUDA_FOUND)
add_library(ldpc MODULE ${PHY_LDPC_OPTIM8SEGMULTI_SRC} )
add_library(coding MODULE ${PHY_TURBOSRC} )
add_library(dfts MODULE ${OPENAIR1_DIR}/PHY/TOOLS/oai_dfts.c )
......@@ -3119,7 +3136,11 @@ target_link_libraries (nr-uesoftmodem ${LIB_LMS_LIBRARIES})
target_link_libraries (nr-uesoftmodem ${T_LIB})
add_dependencies( nr-uesoftmodem ldpc_orig ldpc_optim ldpc_optim8seg ldpc )
if (CUDA_FOUND)
add_dependencies( nr-uesoftmodem ldpc_cuda)
add_dependencies( nr-softmodem ldpc_cuda)
add_dependencies( ocp-gnb ldpc_cuda)
endif (CUDA_FOUND)
###################################"
# Addexecutables for tests
####################################
......@@ -3179,46 +3200,18 @@ target_link_libraries(smallblocktest
m pthread ${ATLAS_LIBRARIES} dl
)
if (CUDA_FOUND)
###################################################
# For CUDA library
###################################################
CUDA_ADD_LIBRARY(LDPC_CU
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder_LYC/nrLDPC_decoder_LYC.cu
)
CUDA_ADD_CUFFT_TO_TARGET(LDPC_CU)
cuda_add_executable(ldpctest
${OPENAIR1_DIR}/PHY/CODING/TESTBENCH/ldpctest.c
${T_SOURCE}
${SHLIB_LOADER_SOURCES}
)
target_link_libraries(ldpctest -ldl
-Wl,--start-group
LDPC_CU UTIL SIMU PHY_NR CONFIG_LIB
-Wl,--end-group
m pthread ${ATLAS_LIBRARIES} dl
)
else (CUDA_FOUND)
add_executable(ldpctest
add_executable(ldpctest
${PHY_NR_CODINGIF}
${OPENAIR1_DIR}/PHY/CODING/TESTBENCH/ldpctest.c
${T_SOURCE}
${SHLIB_LOADER_SOURCES}
)
endif ()
# add_executable(ldpctest
# ${PHY_NR_CODINGIF}
# ${OPENAIR1_DIR}/PHY/CODING/TESTBENCH/ldpctest.c
# ${T_SOURCE}
# ${SHLIB_LOADER_SOURCES}
# )
add_dependencies( ldpctest ldpc_orig ldpc_optim ldpc_optim8seg ldpc )
if (CUDA_FOUND)
add_dependencies( ldpctest ldpc_cuda)
endif (CUDA_FOUND)
target_link_libraries(ldpctest
-Wl,--start-group UTIL SIMU_COMMON SIMU PHY_NR PHY_COMMON PHY_NR_COMMON CONFIG_LIB -Wl,--end-group
m pthread ${ATLAS_LIBRARIES} dl
......
......@@ -10,6 +10,10 @@ int load_module_shlib(char *modname,loader_shlibfunc_t *farray, int numf)
* If the farray pointer is null, looks for `< modname >_getfarray` symbol, calls the corresponding function when the symbol is found. `< modname >_getfarray` takes one argument, a pointer to a `loader_shlibfunc_t` array, and returns the number of items in this array, as defined by the `getfarrayfunc_t` type. The `loader_shlibfunc_t` array returned by the shared library must be fully filled (both `fname` and `fptr` fields).
* looks for the `numf` function symbols listed in the `farray[i].fname` arguments and set the corresponding `farray[i].fptr`function pointers
```c
int load_module_version_shlib(char *modname, char *version, loader_shlibfunc_t *farray, int numf)
```
Allows loading a specific library version, as specified by the `version` parameter. When version is not NULL the version that is possibly specified as a config module parameter is ignored. This call has been introduced for phy simulators executables which do not use the config module. It is used, for example, by the ldcp initialization (`load_nrLDPClib` function in [nrLDPC_load.c](../../../../../openair1/PHY/CODING/nrLDPC_load.c) to allow the `ldpctest` simulator to select the cuda accelerated ldcp implementation. `load_module_shlib` is just a define macro to switch to a `load_module_version_shlib` call, adding a NULL pointer for the version parameter.
```c
void * get_shlibmodule_fptr(char *modname, char *fname)
......
......@@ -63,7 +63,7 @@ void loader_init(void) {
}
/* build the full shared lib name from the module name */
char *loader_format_shlibpath(char *modname)
char *loader_format_shlibpath(char *modname, char *version)
{
char *tmpstr;
......@@ -97,7 +97,10 @@ int ret;
shlibpath = loader_data.shlibpath ;
}
/* no specific shared lib version */
if (shlibversion == NULL) {
if (version != NULL) { // version specified as a function parameter
shlibversion=version;
}
if (shlibversion == NULL) { // no specific version specified, neither as a config param or as a function param
shlibversion = "" ;
}
/* alloc memory for full module shared lib file name */
......@@ -118,7 +121,7 @@ int ret;
return tmpstr;
}
int load_module_shlib(char *modname,loader_shlibfunc_t *farray, int numf, void *autoinit_arg)
int load_module_version_shlib(char *modname, char *version, loader_shlibfunc_t *farray, int numf, void *autoinit_arg)
{
void *lib_handle = NULL;
initfunc_t fpi;
......@@ -138,7 +141,7 @@ int load_module_shlib(char *modname,loader_shlibfunc_t *farray, int numf, void *
loader_init();
}
shlib_path = loader_format_shlibpath(modname);
shlib_path = loader_format_shlibpath(modname, version);
for (int i = 0; i < loader_data.numshlibs; i++) {
if (strcmp(loader_data.shlibs[i].name, modname) == 0) {
......
......@@ -84,10 +84,11 @@ loader_data_t loader_data;
/*-------------------------------------------------------------------------------------------------------------*/
#else /* LOAD_MODULE_SHLIB_MAIN */
extern int load_module_shlib(char *modname, loader_shlibfunc_t *farray, int numf, void *initfunc_arg);
extern int load_module_version_shlib(char *modname, char *version, loader_shlibfunc_t *farray, int numf, void *initfunc_arg);
extern void * get_shlibmodule_fptr(char *modname, char *fname);
extern loader_data_t loader_data;
#endif /* LOAD_MODULE_SHLIB_MAIN */
#define load_module_shlib(M, F, N, I) load_module_version_shlib(M, NULL, F, N, I)
#endif
......@@ -203,10 +203,7 @@ UE on machine 2:
[Selecting an alternative ldpc implementation at run time](../openair1/PHY/CODING/DOC/LDPCImplementation.md)
[oai wiki home](https://gitlab.eurecom.fr/oai/openairinterface5g/wikis/home)
......
......@@ -436,7 +436,7 @@ int main( int argc, char **argv ) {
itti_init(TASK_MAX, tasks_info);
init_opt() ;
load_nrLDPClib();
load_nrLDPClib(NULL);
if (ouput_vcd) {
vcd_signal_dumper_init("/tmp/openair_dump_nrUE.vcd");
......
#LDPC coder/decoder implementation
# LDPC coder/decoder implementation
The LDPC coder and decoder are implemented in a shared library, dynamically loaded at run-time using the [oai shared library loader](file://../../../../common/utils/DOC/loader.md). The code loading the LDPC library is in [nrLDPC_load.c](file://../nrLDPC_load.c), in function `load_nrLDPClib`, which must be called at init time.
## Selecting the LDPC library at run time
By default the function `int load_nrLDPClib(void)` looks for `libldpc.so`, this default behavior can be changed using the oai loader configuration options in the configuration file or from the command line as shown below:
>loading `libldpc_optim8seg.so` instead of `libldpc.so`
#### Examples of ldpc shared lib selection when running nr softmodem's:
loading `libldpc_optim8seg.so` instead of `libldpc.so`:
```
./nr-softmodem -O libconfig:gnb.band78.tm1.106PRB.usrpx300.conf:dbgl5 --loader.ldpc.shlibversion _optim8seg
......@@ -18,9 +20,160 @@ By default the function `int load_nrLDPClib(void)` looks for `libldpc.so`, this
........................
```
Today, this mechanism is not available in the `ldpctest` phy simulator which doesn't initialize the [configuration module](file://../../../../common/config/DOC/config.md). loads `libldpc.so` and `libldpc_orig.so` to compare the performance of the two implementations.
loading `libldpc_cl.so` instead of `libldpc.so`:
`make ldpc_cl`
`cp ../../../openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder_kernels_CL.cl`
`./nr-softmodem -O libconfig:gnb.band78.sa.fr1.106PRB.usrpb210.conf:dbgl5 --rfsim --rfsimulator.serveraddr server --sa --log_config.gtpu_log_level info --loader.ldpc.shlibversion _cl`
``` [LOADER] library libldpc_cl.so successfully loaded
------------------------------------------------
[HW] Platform 0, OpenCL profile FULL_PROFILE
[HW] Platform 0, OpenCL version OpenCL 2.1 LINUX
[HW] Device 0 is available
[HW] Device 0, type 2 = 0x00000002: cpu
[HW] Device 0, number of Compute Units: 8
[HW] Device 0, max Work Items dimension: 3
[HW] Device 0, max Work Items size for dimension: 0 8192
[HW] Device 0, max Work Items size for dimension: 1 8192
[HW] Device 0, max Work Items size for dimension: 2 8192
[New Thread 0x7fffcc258700 (LWP 3945123)]
[New Thread 0x7fffc3e57700 (LWP 3945124)]
[New Thread 0x7fffcbe57700 (LWP 3945125)]
[New Thread 0x7fffcba56700 (LWP 3945126)]
[New Thread 0x7fffcb254700 (LWP 3945128)]
[New Thread 0x7fffcb655700 (LWP 3945127)]
[New Thread 0x7fffcae53700 (LWP 3945129)]
[HW] Platform 1, OpenCL profile FULL_PROFILE
[HW] Platform 1, OpenCL version OpenCL 2.0 beignet 1.3
[New Thread 0x7fffc965a700 (LWP 3945130)]
[Thread 0x7fffc965a700 (LWP 3945130) exited]
[HW] Device 0 is available
[HW] Device 0, type 4 = 0x00000004: gpu
[HW] Device 0, number of Compute Units: 20
[HW] Device 0, max Work Items dimension: 3
[HW] Device 0, max Work Items size for dimension: 0 512
[HW] Device 0, max Work Items size for dimension: 1 512
[HW] Device 0, max Work Items size for dimension: 2 512
-----------------------------------------------------------------
```
`./nr-uesoftmodem -r 106 --numerology 1 --band 78 -C 3619200000 --rfsim --sa -O libconfig:/usr/local/oai/conf/nrue_sim.conf:dbgl5 --nokrnmod --loader.ldpc.shlibversion _cl --log_config.hw_log_level info`
```[CONFIG] shlibversion set to _cl from command line
............................................................
[CONFIG] loader.ldpc 1 options set from command line
[LOADER] library libldpc_cl.so successfully loaded
[HW] Platform 0, OpenCL profile FULL_PROFILE
[HW] Platform 0, OpenCL version OpenCL 2.1 LINUX
[HW] Device 0 is available
[HW] Device 0, type 2 = 0x00000002: cpu
[HW] Device 0, number of Compute Units: 8
[HW] Device 0, max Work Items dimension: 3
[HW] Device 0, max Work Items size for dimension: 0 8192
[HW] Device 0, max Work Items size for dimension: 1 8192
[HW] Device 0, max Work Items size for dimension: 2 8192
[New Thread 0x7fffecccc700 (LWP 3945413)]
[New Thread 0x7fffec8cb700 (LWP 3945415)]
[New Thread 0x7fffec4ca700 (LWP 3945414)]
[New Thread 0x7fffdf7fd700 (LWP 3945417)]
[New Thread 0x7fffdfbfe700 (LWP 3945418)]
[New Thread 0x7fffdffff700 (LWP 3945416)]
[New Thread 0x7fffd73fc700 (LWP 3945419)]
[HW] Platform 1, OpenCL profile FULL_PROFILE
[HW] Platform 1, OpenCL version OpenCL 2.0 beignet 1.3
[New Thread 0x7fffde105700 (LWP 3945420)]
[Thread 0x7fffde105700 (LWP 3945420) exited]
[HW] Device 0 is available
[HW] Device 0, type 4 = 0x00000004: gpu
[HW] Device 0, number of Compute Units: 20
[HW] Device 0, max Work Items dimension: 3
[HW] Device 0, max Work Items size for dimension: 0 512
[HW] Device 0, max Work Items size for dimension: 1 512
[HW] Device 0, max Work Items size for dimension: 2 512
------------------------------------------------------------
```
```
A mechanism to select ldpc implementation is also available in the `ldpctest` phy simulator via the `-v`option, which can be used to specify the version of the ldpc shared library to be used.
#### Examples of ldpc shared lib selection when running ldpctest:
Loading libldpc_cuda.so, the cuda implementation of the ldpc decoder:
```$ ./ldpctest -v _cuda
Initializing random number generator, seed 0
block length 8448:
n_trials 1:
SNR0 -2.000000:
[CONFIG] get parameters from cmdline , debug flags: 0x00400000
[CONFIG] log_config: 2/3 parameters successfully set
[CONFIG] log_config: 53/53 parameters successfully set
[CONFIG] log_config: 53/53 parameters successfully set
[CONFIG] log_config: 16/16 parameters successfully set
[CONFIG] log_config: 16/16 parameters successfully set
log init done
[CONFIG] loader: 2/2 parameters successfully set
[CONFIG] loader.ldpc: 1/2 parameters successfully set
[LOADER] library libldpc_cuda.so successfully loaded
...................................
​```
```
Loading libldpc_cl.so, the opencl implementation of the ldpc decoder:
`make ldpc_cl`
`cp ../../../openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder_kernels_CL.cl`
`./ldpctest -v _cl`
```$ ./ldpctest -v _cl
Initializing random number generator, seed 0
block length 8448:
n_trials 1:
SNR0 -2.000000:
[CONFIG] get parameters from cmdline , debug flags: 0x00400000
[CONFIG] log_config: 2/3 parameters successfully set
[CONFIG] log_config: 53/53 parameters successfully set
[CONFIG] log_config: 53/53 parameters successfully set
[CONFIG] log_config: 16/16 parameters successfully set
[CONFIG] log_config: 16/16 parameters successfully set
log init done
[CONFIG] loader: 2/2 parameters successfully set
[CONFIG] loader.ldpc: 1/2 parameters successfully set
[LOADER] library libldpc_cl.so successfully loaded
[HW] Platform 0, OpenCL profile FULL_PROFILE
[HW] Platform 0, OpenCL version OpenCL 2.1 LINUX
[HW] Device 0 is available
[HW] Device 0, type 2 = 0x00000002: cpu
[HW] Device 0, number of Compute Units: 8
[HW] Device 0, max Work Items dimension: 3
[HW] Device 0, max Work Items size for dimension: 0 8192
[HW] Device 0, max Work Items size for dimension: 1 8192
[HW] Device 0, max Work Items size for dimension: 2 8192
[HW] Platform 1, OpenCL profile FULL_PROFILE
[HW] Platform 1, OpenCL version OpenCL 2.0 beignet 1.3
[HW] Device 0 is available
[HW] Device 0, type 4 = 0x00000004: gpu
[HW] Device 0, number of Compute Units: 20
[HW] Device 0, max Work Items dimension: 3
[HW] Device 0, max Work Items size for dimension: 0 512
[HW] Device 0, max Work Items size for dimension: 1 512
[HW] Device 0, max Work Items size for dimension: 2 512
................................
​```
```
### LDPC libraries
Libraries implementing the LDPC algorithms must be named `libldpc<_version>.so`, they must implement three functions: `nrLDPC_initcall` `nrLDPC_decod` and `nrLDPC_encod`. The prototypes for these functions is defined in [nrLDPC_defs.h](file://nrLDPC_defs.h).
`libldpc_cuda.so`has been tested with the `ldpctest` executable, usage from the softmodem's has to be tested.
###LDPC libraries
Libraries implementing the LDPC algorithms must be named `libldpc<_version>.so`, they must implement two functions: `nrLDPC_decod` and `nrLDPC_encod`. The prototypes for these functions is defined in [nrLDPC_defs.h](file://nrLDPC_defs.h).
`libldpc_cl`is under development.
[oai Wikis home](https://gitlab.eurecom.fr/oai/openairinterface5g/wikis/home)
......@@ -101,8 +101,8 @@ int test_ldpc(short No_iteration,
unsigned int *crc_misses,
time_stats_t *time_optim,
time_stats_t *time_decoder,
n_iter_stats_t *dec_iter,
short run_cuda)
n_iter_stats_t *dec_iter
)
{
//clock initiate
//time_stats_t time,time_optim,tinput,tprep,tparity,toutput, time_decoder;
......@@ -393,27 +393,12 @@ int test_ldpc(short No_iteration,
decParams.R=code_rate_vec[R_ind];//13;
decParams.numMaxIter=No_iteration;
decParams.outMode = nrLDPC_outMode_BIT;
decParams.block_length=block_length;
//decParams.outMode =nrLDPC_outMode_LLRINT8;
#ifdef CUDA_FLAG
set_compact_BG(Zc,BG);
init_LLR_DMA_for_CUDA(&decParams, (int8_t*)channel_output_fixed[j], (int8_t*)estimated_output[j], block_length);
#endif
nrLDPC_initcall(&decParams, (int8_t*)channel_output_fixed[j], (int8_t*)estimated_output[j]);
for(j=0;j<n_segments;j++) {
start_meas(time_decoder);
#ifdef CUDA_FLAG
if(run_cuda){
n_iter = nrLDPC_decoder_LYC(&decParams, (int8_t*)channel_output_fixed[j], (int8_t*)estimated_output[j], block_length, time_decoder);
}
else{
// decode the sequence
// decoder supports BG2, Z=128 & 256
//esimated_output=ldpc_decoder(channel_output_fixed, block_length, No_iteration, (double)((float)nom_rate/(float)denom_rate));
///nrLDPC_decoder(&decParams, channel_output_fixed, estimated_output, NULL);
n_iter = nrLDPC_decoder(&decParams, (int8_t*)channel_output_fixed[j], (int8_t*)estimated_output[j], p_nrLDPC_procBuf, p_decoder_profiler);
}
#else
n_iter = nrLDPC_decoder(&decParams, (int8_t*)channel_output_fixed[j], (int8_t*)estimated_output[j], p_nrLDPC_procBuf, p_decoder_profiler);
#endif
stop_meas(time_decoder);
}
......@@ -514,17 +499,14 @@ int test_ldpc(short No_iteration,
int main(int argc, char *argv[])
{
#ifdef CUDA_FLAG
warmup_for_GPU();
#endif
unsigned int errors, errors_bit, crc_misses;
double errors_bit_uncoded;
short block_length=8448; // decoder supports length: 1201 -> 1280, 2401 -> 2560
char *ldpc_version=NULL; /* version of the ldpc decoder library to use (XXX suffix to use when loading libldpc_XXX.so */
short No_iteration=5;
int n_segments=1;
//double rate=0.333;
short run_cuda = 0;
int nom_rate=1;
int denom_rate=3;
......@@ -544,7 +526,7 @@ int main(int argc, char *argv[])
short BG=0,Zc,Kb=0;
while ((c = getopt (argc, argv, "q:r:s:S:l:G:n:d:i:t:u:h")) != -1)
while ((c = getopt (argc, argv, "q:r:s:S:l:G:n:d:i:t:u:hv:")) != -1)
switch (c)
{
case 'q':
......@@ -564,7 +546,7 @@ int main(int argc, char *argv[])
break;
case 'G':
run_cuda = atoi(optarg);
ldpc_version="_cuda";
break;
case 'n':
......@@ -590,7 +572,9 @@ int main(int argc, char *argv[])
case 'u':
test_uncoded = atoi(optarg);
break;
case 'v':
ldpc_version=strdup(optarg);
break;
case 'h':
default:
printf("CURRENTLY SUPPORTED CODE RATES: \n");
......@@ -609,6 +593,7 @@ int main(int argc, char *argv[])
printf("-t SNR simulation step, Default: 0.1\n");
printf("-i Max decoder iterations, Default: 5\n");
printf("-u Set SNR per coded bit, Default: 0\n");
printf("-v XXX Set ldpc shared library version. libldpc_XXX.so will be used \n");
exit(1);
break;
}
......@@ -619,7 +604,10 @@ int main(int argc, char *argv[])
printf("SNR0 %f: \n", SNR0);
load_nrLDPClib();
if (ldpc_version != NULL)
load_nrLDPClib(ldpc_version);
else
load_nrLDPClib(NULL);
load_nrLDPClib_ref("_orig", &encoder_orig);
//for (block_length=8;block_length<=MAX_BLOCK_LENGTH;block_length+=8)
......@@ -691,8 +679,7 @@ int main(int argc, char *argv[])
&crc_misses,
time_optim,
time_decoder,
dec_iter,
run_cuda);
dec_iter);
printf("SNR %f, BLER %f (%u/%d)\n", SNR, (float)decoded_errors[i]/(float)n_trials, decoded_errors[i], n_trials);
printf("SNR %f, BER %f (%u/%d)\n", SNR, (float)errors_bit/(float)n_trials/(float)block_length/(double)n_segments, decoded_errors[i], n_trials);
......
......@@ -46,7 +46,8 @@
#endif
static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDPC_procBuf* p_procBuf, uint32_t numLLR, t_nrLDPC_lut* p_lut, t_nrLDPC_dec_params* p_decParams, t_nrLDPC_time_stats* p_profiler);
void nrLDPC_initcall(t_nrLDPC_dec_params* p_decParams, int8_t* p_llr, int8_t* p_out) {
}
int32_t nrLDPC_decod(t_nrLDPC_dec_params* p_decParams, int8_t* p_llr, int8_t* p_out, t_nrLDPC_procBuf* p_procBuf, t_nrLDPC_time_stats* p_profiler)
{
uint32_t numLLR;
......
/*
* Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The OpenAirInterface Software Alliance licenses this file to You under
* the OAI Public License, Version 1.0 (the "License"); you may not use this file
* except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.openairinterface.org/?page_id=698
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*-------------------------------------------------------------------------------
* For more information about the OpenAirInterface (OAI) Software Alliance:
* contact@openairinterface.org
*/
/*! \file nrLDPC_decoder_CL.c
* \brief ldpc decoder, openCL implementaion
* \author Francois TABURET
* \date 2021
* \version 1.0
* \note initial implem - translation of cuda version
*/
/* uses HW component id for log messages ( --log_config.hw_log_level <warning| info|debug|trace>) */
#include <stdio.h>
#include <unistd.h>
#include <cuda_runtime.h>
#include <CL/opencl.h>
#include "PHY/CODING/nrLDPC_decoder/nrLDPC_types.h"
#include "PHY/CODING/nrLDPC_decoder/nrLDPCdecoder_defs.h"
#include "assertions.h"
#include "common/utils/LOG/log.h"
#define MAX_ITERATION 2
#define MC 1
#define MAX_OCLDEV 10
#define MAX_OCLRUNTIME 5
#define CLSETKERNELARG(A,B,C,D) \
rt=clSetKernelArg(A,B,C,D) ;\
AssertFatal(rt == CL_SUCCESS, "Error %d setting kernel argument index %d\n" , (int)rt, B);
typedef struct{
char x;
char y;
short value;
} h_element;
#include "../nrLDPC_decoder_LYC/bgs/BG1_compact_in_C.h"
typedef struct{
cl_uint max_CU;
cl_uint max_WID;
size_t *max_WIS;
} ocldev_t;
typedef struct {
cl_kernel cnp_kernel_1st;
cl_kernel cnp_kernel;
cl_kernel vnp_kernel_normal;
cl_kernel pack_decoded_bit;
} oclkernels_t;
typedef struct{
cl_uint num_devices;
cl_device_id devices[MAX_OCLDEV];
ocldev_t ocldev[MAX_OCLDEV];
cl_context context;
cl_program program;
cl_mem dev_h_compact1;
cl_mem dev_h_compact2;
cl_mem dev_const_llr;
cl_mem dev_llr;
cl_mem dev_dt;
cl_mem dev_tmp;
oclkernels_t kernels[MAX_OCLDEV];
cl_command_queue queue[MAX_OCLDEV];
} oclruntime_t;
typedef struct{
oclruntime_t runtime[MAX_OCLRUNTIME];
} ocl_t;
ocl_t ocl;
void set_compact_BG(int Zc,short BG){
cl_uint rt;
int row,col;
if(BG == 1){
row = 46;
col = 68;
}
else{
row = 42;
col = 52;
}
int compact_row = 30;
int compact_col = 19;
if(BG==2){compact_row = 10, compact_col = 23;}
int memorySize_h_compact1 = row * compact_col * sizeof(h_element);
int memorySize_h_compact2 = compact_row * col * sizeof(h_element);
int lift_index = 0;
short lift_set[][9] = {
{2,4,8,16,32,64,128,256},
{3,6,12,24,48,96,192,384},
{5,10,20,40,80,160,320},
{7,14,28,56,112,224},
{9,18,36,72,144,288},
{11,22,44,88,176,352},
{13,26,52,104,208},
{15,30,60,120,240},
{0}
};
for(int i = 0; lift_set[i][0] != 0; i++){
for(int j = 0; lift_set[i][j] != 0; j++){
if(Zc == lift_set[i][j]){
lift_index = i;
break;
}
}
}
printf("\nZc = %d BG = %d\n",Zc,BG);
ocl.runtime[0].dev_h_compact1 = clCreateBuffer(ocl.runtime[0].context, CL_MEM_READ_ONLY|CL_MEM_HOST_WRITE_ONLY, memorySize_h_compact1, NULL, (cl_int *)&rt);
AssertFatal(rt == CL_SUCCESS, "Error %d creating buffer dev_h_compact1 for platform %i \n" , (int)rt, 0);
ocl.runtime[0].dev_h_compact2 = clCreateBuffer(ocl.runtime[0].context, CL_MEM_READ_ONLY|CL_MEM_HOST_WRITE_ONLY, memorySize_h_compact2, NULL, (cl_int *)&rt);
AssertFatal(rt == CL_SUCCESS, "Error %d creating buffer dev_h_compact2 for platform %i \n" , (int)rt, 0);
h_element *h1;
h_element *h2;
switch(lift_index){
case 0:
h1 = host_h_compact1_I0;
h2 = host_h_compact2_I0;
break;
case 1:
h1 = host_h_compact1_I1;
h2 = host_h_compact2_I1;
break;
case 2:
h1 = host_h_compact1_I2;
h2 = host_h_compact2_I2;
break;
case 3:
h1 = host_h_compact1_I3;
h2 = host_h_compact2_I3;
break;
case 4:
h1 = host_h_compact1_I4;
h2 = host_h_compact2_I4;
break;
case 5:
h1 = host_h_compact1_I5;
h2 = host_h_compact2_I5;
break;
case 6:
h1 = host_h_compact1_I6;
h2 = host_h_compact2_I6;
break;
case 7:
h1 = host_h_compact1_I7;
h2 = host_h_compact2_I7;
break;
default:
AssertFatal(0, "Invalid lift_index value %i\n" , lift_index);
break;
}
rt = clEnqueueWriteBuffer(ocl.runtime[0].queue[0], ocl.runtime[0].dev_h_compact1, CL_TRUE, 0,memorySize_h_compact1, h1, 0, NULL, NULL);
AssertFatal(rt == CL_SUCCESS, "Error %d moving h_compact1 memory to pltf %i dev %i\n" , (int)rt, 0,0);
rt = clEnqueueWriteBuffer(ocl.runtime[0].queue[0], ocl.runtime[0].dev_h_compact2, CL_TRUE, 0,memorySize_h_compact2, h2, 0, NULL, NULL);
AssertFatal(rt == CL_SUCCESS, "Error %d moving h_compact2 memory to pltf %i dev %i\n" , (int)rt, 0,0);
// return 0;
}
void cl_error_callback(const char* errinfo, const void* private_info, size_t cb, void* user_data) {
oclruntime_t *runtime = (oclruntime_t *)user_data;
LOG_E(HW,"OpenCL accelerator error %s\n", errinfo );
}
char *clutil_getstrdev(int intdev) {
static char retstring[255]="";
char *retptr=retstring;
retptr+=sprintf(retptr,"0x%08x: ",(uint32_t)intdev);
if (intdev & CL_DEVICE_TYPE_CPU)
retptr+=sprintf(retptr,"%s","cpu ");
if (intdev & CL_DEVICE_TYPE_GPU)
retptr+=sprintf(retptr,"%s","gpu ");
if (intdev & CL_DEVICE_TYPE_ACCELERATOR)
retptr+=sprintf(retptr,"%s","acc ");
return retstring;
}
void get_CompilErr(cl_program program, int pltf) {
// Determine the size of the log
size_t log_size;
for(int i=0; i<ocl.runtime[pltf].num_devices;i++) {
clGetProgramBuildInfo(program, ocl.runtime[pltf].devices[i], CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
// Allocate memory for the log
char *log = (char *) malloc(log_size);
// Get the log
clGetProgramBuildInfo(program, ocl.runtime[pltf].devices[i], CL_PROGRAM_BUILD_LOG, log_size, log, NULL);
// Print the log
printf("%s\n", log);
free(log);
}
}
size_t load_source(char **source_str) {
int MAX_SOURCE_SIZE=(500*132);
FILE *fp;
size_t source_size;
fp = fopen("nrLDPC_decoder_kernels_CL.cl", "r");
AssertFatal(fp,"failed to open cl source: %s\n",strerror(errno));
*source_str = (char*)malloc(MAX_SOURCE_SIZE);
source_size = fread( *source_str, 1, MAX_SOURCE_SIZE, fp);
fclose( fp );
return source_size;
}
/* from here: entry points in decoder shared lib */
int ldpc_autoinit(void) { // called by the library loader
cl_platform_id platforms[10];
cl_uint num_platforms_found;
int context_ok=0;
cl_uint rt = clGetPlatformIDs( sizeof(platforms)/sizeof(cl_platform_id), platforms, &num_platforms_found );
AssertFatal(rt == CL_SUCCESS, "clGetPlatformIDs error %d\n" , (int)rt);
AssertFatal( num_platforms_found>0 , "clGetPlatformIDs: no cl compatible platform found\n");
for (int i=0 ; i<(int)num_platforms_found ; i++) {
char stringval[255];
rt = clGetPlatformInfo(platforms[i],CL_PLATFORM_PROFILE, sizeof(stringval),stringval,NULL);
AssertFatal(rt == CL_SUCCESS, "clGetPlatformInfo PROFILE error %d\n" , (int)rt);
LOG_I(HW,"Platform %i, OpenCL profile %s\n", i,stringval );
rt = clGetPlatformInfo(platforms[i],CL_PLATFORM_VERSION, sizeof(stringval),stringval,NULL);
AssertFatal(rt == CL_SUCCESS, "clGetPlatformInfo VERSION error %d\n" , (int)rt);
LOG_I(HW,"Platform %i, OpenCL version %s\n", i,stringval );
rt = clGetDeviceIDs(platforms[i],CL_DEVICE_TYPE_ALL, sizeof(ocl.runtime[i].devices)/sizeof(cl_device_id),ocl.runtime[i].devices,&(ocl.runtime[i].num_devices));
AssertFatal(rt == CL_SUCCESS, "clGetDeviceIDs error %d\n" , (int)rt);
int devok=0;
for (int j=0; j<ocl.runtime[i].num_devices; j++) {
cl_bool abool;
rt = clGetDeviceInfo(ocl.runtime[i].devices[j],CL_DEVICE_AVAILABLE, sizeof(abool),&abool,NULL);
AssertFatal(rt == CL_SUCCESS, "clGetDeviceInfo DEVICE_AVAILABLE error %d\n" , (int)rt);
LOG_I(HW,"Device %i is %s available\n", j, (abool==CL_TRUE?"":"not"));
cl_device_type devtype;
rt = clGetDeviceInfo(ocl.runtime[i].devices[j],CL_DEVICE_TYPE, sizeof(cl_device_type),&devtype,NULL);
AssertFatal(rt == CL_SUCCESS, "clGetDeviceInfo DEVICE_TYPE error %d\n" , (int)rt);
LOG_I(HW,"Device %i, type %d = %s\n", j,(int)devtype, clutil_getstrdev(devtype));
rt = clGetDeviceInfo(ocl.runtime[i].devices[j],CL_DEVICE_MAX_COMPUTE_UNITS,sizeof(ocl.runtime[i].ocldev[j].max_CU),&(ocl.runtime[i].ocldev[j].max_CU),NULL);
AssertFatal(rt == CL_SUCCESS, "clGetDeviceInfo MAX_COMPUTE_UNITS error %d\n" , (int)rt);
LOG_I(HW,"Device %i, number of Compute Units: %d\n", j,ocl.runtime[i].ocldev[j].max_CU);
rt = clGetDeviceInfo(ocl.runtime[i].devices[j],CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,sizeof(ocl.runtime[i].ocldev[j].max_WID),&(ocl.runtime[i].ocldev[j].max_WID),NULL);
AssertFatal(rt == CL_SUCCESS, "clGetDeviceInfo MAX_WORK_ITEM_DIMENSIONS error %d\n" , (int)rt);
LOG_I(HW,"Device %i, max Work Items dimension: %d\n", j,ocl.runtime[i].ocldev[j].max_WID);
ocl.runtime[i].ocldev[j].max_WIS = (size_t *)malloc(ocl.runtime[i].ocldev[j].max_WID * sizeof(size_t));
rt = clGetDeviceInfo(ocl.runtime[i].devices[j],CL_DEVICE_MAX_WORK_ITEM_SIZES,sizeof(ocl.runtime[i].ocldev[j].max_WID)*sizeof(size_t),ocl.runtime[i].ocldev[j].max_WIS,NULL);
AssertFatal(rt == CL_SUCCESS, "clGetDeviceInfo MAX_WORK_ITEM_SIZES error %d\n" , (int)rt);
for(int k=0; k<ocl.runtime[i].ocldev[j].max_WID;k++)
LOG_I(HW,"Device %i, max Work Items size for dimension: %d %u\n", j,k,(uint32_t)ocl.runtime[i].ocldev[j].max_WIS[k]);
devok++;
}
if (devok >0) {
ocl.runtime[i].context = clCreateContext(NULL, ocl.runtime[i].num_devices, ocl.runtime[i].devices, cl_error_callback, &(ocl.runtime[i]), (cl_int *)&rt);
AssertFatal(rt == CL_SUCCESS, "Error %d creating context for platform %i\n" , (int)rt, i);
for(int dev=0; dev<ocl.runtime[i].num_devices; dev++) {
ocl.runtime[i].queue[dev] = clCreateCommandQueueWithProperties(ocl.runtime[i].context,ocl.runtime[i].devices[dev] , 0, (cl_int *)&rt);
AssertFatal(rt == CL_SUCCESS, "Error %d creating command queue for platform %i device %i\n" , (int)rt, i,dev);
}
ocl.runtime[i].dev_const_llr = clCreateBuffer(ocl.runtime[i].context, CL_MEM_READ_ONLY|CL_MEM_HOST_WRITE_ONLY, 68*384, NULL, (cl_int *)&rt);
AssertFatal(rt == CL_SUCCESS, "Error %d creating buffer dev_const_llr for platform %i \n" , (int)rt, i);
ocl.runtime[i].dev_llr = clCreateBuffer(ocl.runtime[i].context, CL_MEM_READ_WRITE|CL_MEM_HOST_WRITE_ONLY, 68*384, NULL, (cl_int *)&rt);
AssertFatal(rt == CL_SUCCESS, "Error %d creating buffer dev_llr for platform %i \n" , (int)rt, i);
ocl.runtime[i].dev_dt = clCreateBuffer(ocl.runtime[i].context, CL_MEM_READ_WRITE|CL_MEM_HOST_NO_ACCESS, 46*68*384, NULL, (cl_int *)&rt);
AssertFatal(rt == CL_SUCCESS, "Error %d creating buffer dev_dt for platform %i \n" , (int)rt, i);
ocl.runtime[i].dev_tmp = clCreateBuffer(ocl.runtime[i].context, CL_MEM_READ_ONLY|CL_MEM_HOST_WRITE_ONLY, 68*384, NULL, (cl_int *)&rt);
AssertFatal(rt == CL_SUCCESS, "Error %d creating buffer dev_tmp for platform %i \n" , (int)rt, i);
char *source_str;
size_t source_size=load_source(&source_str);
cl_program program = clCreateProgramWithSource(ocl.runtime[i].context, 1,
(const char **)&source_str, (const size_t *)&source_size, (cl_int *)&rt);
AssertFatal(rt == CL_SUCCESS, "Error %d creating program for platform %i \n" , (int)rt, i);
rt = clBuildProgram(program, ocl.runtime[i].num_devices,ocl.runtime[i].devices, NULL, NULL, NULL);
if (rt == CL_BUILD_PROGRAM_FAILURE) {
get_CompilErr(program,i);
}
AssertFatal(rt == CL_SUCCESS, "Error %d buildding program for platform %i \n" , rt, i);
for(int dev=0; dev<ocl.runtime[i].num_devices; dev++) {
ocl.runtime[i].kernels[dev].cnp_kernel_1st = clCreateKernel(program, "ldpc_cnp_kernel_1st_iter", (cl_int *)&rt);
AssertFatal(rt == CL_SUCCESS, "Error %d creating kernel %s platform %i, dev %i\n" , (int)rt,"ldpc_cnp_kernel_1st_iter", i,dev);
ocl.runtime[i].kernels[dev].cnp_kernel = clCreateKernel(program, "ldpc_cnp_kernel", (cl_int *)&rt);
AssertFatal(rt == CL_SUCCESS, "Error %d creating kernel %s platform %i, dev %i\n" , (int)rt,"ldpc_cnp_kernel", i,dev);
ocl.runtime[i].kernels[dev].vnp_kernel_normal = clCreateKernel(program, "ldpc_vnp_kernel_normal", (cl_int *)&rt);
AssertFatal(rt == CL_SUCCESS, "Error %d creating kernel %s platform %i, dev %i\n" , (int)rt,"ldpc_vnp_kernel_normal", i,dev);
ocl.runtime[i].kernels[dev].pack_decoded_bit = clCreateKernel(program, "pack_decoded_bit", (cl_int *)&rt);
AssertFatal(rt == CL_SUCCESS, "Error %d creating kernel %s platform %i, dev %i\n" , (int)rt,"pack_decoded_bit", i,dev);
}
context_ok++;
}
devok=0;
}
AssertFatal(context_ok>0, "No openCL device available to accelerate ldpc\n");
return 0;
}
void nrLDPC_initcall(t_nrLDPC_dec_params* p_decParams, int8_t* p_llr, int8_t* p_out) {
set_compact_BG(p_decParams->Z,p_decParams->BG);
// init_LLR_DMA(p_decParams, p_llr, p_out);
}
int32_t nrLDPC_decod(t_nrLDPC_dec_params* p_decParams, int8_t* p_llr, int8_t* p_out,t_nrLDPC_procBuf* p_procBuf, t_nrLDPC_time_stats *time_decoder)
{
uint16_t Zc = p_decParams->Z;
uint8_t BG = p_decParams->BG;
// uint8_t numMaxIter = p_decParams->numMaxIter;
int block_length = p_decParams->block_length;
// e_nrLDPC_outMode outMode = p_decParams->outMode;
uint8_t row,col;
if(BG == 1){
row = 46;
col = 68;
}
else{
row = 42;
col = 52;
}
// alloc memory
// unsigned char *hard_decision = (unsigned char*)p_out;
// gpu
int memorySize_llr = col * Zc * sizeof(char) * MC;
// cudaCheck( cudaMemcpyToSymbol(dev_const_llr, p_llr, memorySize_llr_cuda) );
// cudaCheck( cudaMemcpyToSymbol(dev_llr, p_llr, memorySize_llr_cuda) );
int rt = clEnqueueWriteBuffer(ocl.runtime[0].queue[0], ocl.runtime[0].dev_const_llr, CL_TRUE, 0,
memorySize_llr, p_llr, 0, NULL, NULL);
AssertFatal(rt == CL_SUCCESS, "Error %d moving p_llr data to read only memory in pltf %i dev %i\n" , (int)rt, 0,0);
rt = clEnqueueWriteBuffer(ocl.runtime[0].queue[0], ocl.runtime[0].dev_llr, CL_TRUE, 0,
memorySize_llr, p_llr, 0, NULL, NULL);
AssertFatal(rt == CL_SUCCESS, "Error %d moving p_llr data to read-write memory in pltf %i dev %i\n" , (int)rt, 0,0);
// Define CUDA kernel dimension
// int blockSizeX = Zc;
// dim3 dimGridKernel1(row, MC, 1); // dim of the thread blocks
// dim3 dimBlockKernel1(blockSizeX, 1, 1);
// dim3 dimGridKernel2(col, MC, 1);
// dim3 dimBlockKernel2(blockSizeX, 1, 1);
// cudaDeviceSynchronize();
// lauch kernel
size_t global_item_sizek0[2] = {row*Zc,MC}; // Process the entire lists
size_t global_item_sizek1[2] = {col*Zc,MC}; // Process the entire lists
size_t local_item_sizek[2] = {128,1}; // Divide work items into groups of 128
for(int ii = 0; ii < MAX_ITERATION; ii++){
// first kernel
if(ii == 0){
// ldpc_cnp_kernel_1st_iter
// <<<dimGridKernel1, dimBlockKernel1>>>
// ( BG, row, col, Zc);
CLSETKERNELARG(ocl.runtime[0].kernels[0].cnp_kernel_1st, 0, sizeof(cl_mem), (void *)&(ocl.runtime[0].dev_llr));
CLSETKERNELARG(ocl.runtime[0].kernels[0].cnp_kernel_1st, 1, sizeof(cl_mem), (void *)&(ocl.runtime[0].dev_dt));
CLSETKERNELARG(ocl.runtime[0].kernels[0].cnp_kernel_1st, 2, sizeof(cl_mem), (void *)&(ocl.runtime[0].dev_h_compact1));
CLSETKERNELARG(ocl.runtime[0].kernels[0].cnp_kernel_1st, 3, sizeof(int), (void *)&(BG));
CLSETKERNELARG(ocl.runtime[0].kernels[0].cnp_kernel_1st, 4, sizeof(int), (void *)&(row));
CLSETKERNELARG(ocl.runtime[0].kernels[0].cnp_kernel_1st, 5, sizeof(int), (void *)&(col));
CLSETKERNELARG(ocl.runtime[0].kernels[0].cnp_kernel_1st, 6, sizeof(int), (void *)&(Zc));
rt = clEnqueueNDRangeKernel(ocl.runtime[0].queue[0], ocl.runtime[0].kernels[0].cnp_kernel_1st, 2, NULL,
global_item_sizek0, local_item_sizek, 0, NULL, NULL);
AssertFatal(rt == CL_SUCCESS, "Error %d enqueing cnp_kernel_1st \n" , (int)rt);
}else{
// ldpc_cnp_kernel
// <<<dimGridKernel1, dimBlockKernel1>>>
// ( BG, row, col, Zc);
CLSETKERNELARG(ocl.runtime[0].kernels[0].cnp_kernel, 0, sizeof(cl_mem), (void *)&(ocl.runtime[0].dev_llr));
CLSETKERNELARG(ocl.runtime[0].kernels[0].cnp_kernel, 1, sizeof(cl_mem), (void *)&(ocl.runtime[0].dev_dt));
CLSETKERNELARG(ocl.runtime[0].kernels[0].cnp_kernel, 2, sizeof(cl_mem), (void *)&(ocl.runtime[0].dev_h_compact1));
CLSETKERNELARG(ocl.runtime[0].kernels[0].cnp_kernel, 3, sizeof(int), (void *)&(BG));
CLSETKERNELARG(ocl.runtime[0].kernels[0].cnp_kernel, 4, sizeof(int), (void *)&(row));
CLSETKERNELARG(ocl.runtime[0].kernels[0].cnp_kernel, 5, sizeof(int), (void *)&(col));
CLSETKERNELARG(ocl.runtime[0].kernels[0].cnp_kernel, 6, sizeof(int), (void *)&(Zc));
rt = clEnqueueNDRangeKernel(ocl.runtime[0].queue[0], ocl.runtime[0].kernels[0].cnp_kernel, 2, NULL,
global_item_sizek0, local_item_sizek, 0, NULL, NULL);
AssertFatal(rt == CL_SUCCESS, "Error %d enqueing cnp_kernel\n" , (int)rt);
}
// second kernel
// ldpc_vnp_kernel_normal
// <<<dimGridKernel2, dimBlockKernel2>>>
// // (dev_llr, dev_const_llr,BG, row, col, Zc);
// (BG, row, col, Zc);
CLSETKERNELARG(ocl.runtime[0].kernels[0].vnp_kernel_normal, 0, sizeof(cl_mem), (void *)&(ocl.runtime[0].dev_llr));
CLSETKERNELARG(ocl.runtime[0].kernels[0].vnp_kernel_normal, 1, sizeof(cl_mem), (void *)&(ocl.runtime[0].dev_dt));
CLSETKERNELARG(ocl.runtime[0].kernels[0].vnp_kernel_normal, 2, sizeof(cl_mem), (void *)&(ocl.runtime[0].dev_const_llr));
CLSETKERNELARG(ocl.runtime[0].kernels[0].vnp_kernel_normal, 3, sizeof(cl_mem), (void *)&(ocl.runtime[0].dev_h_compact2));
CLSETKERNELARG(ocl.runtime[0].kernels[0].vnp_kernel_normal, 4, sizeof(int), (void *)&(BG));
CLSETKERNELARG(ocl.runtime[0].kernels[0].vnp_kernel_normal, 5, sizeof(int), (void *)&(row));
CLSETKERNELARG(ocl.runtime[0].kernels[0].vnp_kernel_normal, 6, sizeof(int), (void *)&(col));
CLSETKERNELARG(ocl.runtime[0].kernels[0].vnp_kernel_normal, 7, sizeof(int), (void *)&(Zc));
rt = clEnqueueNDRangeKernel(ocl.runtime[0].queue[0], ocl.runtime[0].kernels[0].vnp_kernel_normal, 2, NULL,
global_item_sizek1, local_item_sizek, 0, NULL, NULL);
AssertFatal(rt == CL_SUCCESS, "Error %d enqueing vnp_kernel_normal \n" , (int)rt);
}
// int pack = (block_length/128)+1;
// dim3 pack_block(pack, MC, 1);
// pack_decoded_bit<<<pack_block,128>>>( col, Zc);
CLSETKERNELARG(ocl.runtime[0].kernels[0].pack_decoded_bit, 0, sizeof(cl_mem), (void *)&(ocl.runtime[0].dev_llr));
CLSETKERNELARG(ocl.runtime[0].kernels[0].pack_decoded_bit, 1, sizeof(cl_mem), (void *)&(ocl.runtime[0].dev_tmp));
CLSETKERNELARG(ocl.runtime[0].kernels[0].pack_decoded_bit, 2, sizeof(int), (void *)&(col));
CLSETKERNELARG(ocl.runtime[0].kernels[0].pack_decoded_bit, 3, sizeof(int), (void *)&(Zc));
// Execute the OpenCL kernel on the list
size_t global_item_size[2] = {block_length,MC}; // Process the entire lists
size_t local_item_size[2] = {128,1}; // Divide work items into groups of 128
rt = clEnqueueNDRangeKernel(ocl.runtime[0].queue[0], ocl.runtime[0].kernels[0].pack_decoded_bit, 2, NULL,
global_item_size, local_item_size, 0, NULL, NULL);
AssertFatal(rt == CL_SUCCESS, "Error %d enqueing pack_decoded_bit \n" , (int)rt);
// cudaCheck( cudaMemcpyFromSymbol((void*)hard_decision, (const void*)dev_tmp, (block_length/8)*sizeof(unsigned char)) );
// cudaDeviceSynchronize();
rt = clEnqueueReadBuffer(ocl.runtime[0].queue[0], ocl.runtime[0].dev_tmp, CL_TRUE, 0,
(block_length/8)*sizeof(unsigned char) , p_llr, 0, NULL, NULL);
AssertFatal(rt == CL_SUCCESS, "Error %d moving p_llr data to pltf %i dev %i\n" , (int)rt, 0,0);
return MAX_ITERATION;
}
/*
* Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The OpenAirInterface Software Alliance licenses this file to You under
* the OAI Public License, Version 1.0 (the "License"); you may not use this file
* except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.openairinterface.org/?page_id=698
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*-------------------------------------------------------------------------------
* For more information about the OpenAirInterface (OAI) Software Alliance:
* contact@openairinterface.org
*/
/*! \file PHY/CODING/nrLDPC_decoder_kernels_CL.cl
* \brief kernel functions for ldpc decoder accelerated via openCL
* \author Francois TABURET
* \date 2021
* \version 1.0
* \company Nokia BellLabs France
* \email: francois.taburet@nokia-bell-labs.com
* \note initial implem - translation of cuda version
* \warning
*/
#define define MAX_ITERATION 2
#define MC 1
#define INT32_MAX 2147483647
typedef struct{
char x;
char y;
short value;
} h_element;
//__global char dev_dt [46*68*384];
//__local char *dev_t;
//__global char dev_llr[68*384];
//__global unsigned char dev_tmp[68*384];
//__constant h_element dev_h_compact1[46*19] = {}; // used in kernel 1
//__constant h_element dev_h_compact2[68*30] = {}; // used in kernel 2
// __device__ __constantant__ h_element dev_h_compact1[46*19]; // used in kernel 1
// __device__ __constantant__ h_element dev_h_compact2[68*30]; // used in kernel 2
// row and col element count
__constant char h_ele_row_bg1_count[46] = {
19, 19, 19, 19, 3, 8, 9, 7, 10, 9,
7, 8, 7, 6, 7, 7, 6, 6, 6, 6,
6, 6, 5, 5, 6, 5, 5, 4, 5, 5,
5, 5, 5, 5, 5, 5, 5, 4, 5, 5,
4, 5, 4, 5, 5, 4};
__constant char h_ele_col_bg1_count[68] = {
30, 28, 7, 11, 9, 4, 8, 12, 8, 7,
12, 10, 12, 11, 10, 7, 10, 10, 13, 7,
8, 11, 12, 5, 6, 6, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1};
__constant char h_ele_row_bg2_count[42] = {
8, 10, 8, 10, 4, 6, 6, 6, 4, 5,
5, 5, 4, 5, 5, 4, 5, 5, 4, 4,
4, 4, 3, 4, 4, 3, 5, 3, 4, 3,
5, 3, 4, 4, 4, 4, 4, 3, 4, 4,
4, 4};
__constant char h_ele_col_bg2_count[52] = {
22, 23, 10, 5, 5, 14, 7, 13, 6, 8,
9, 16, 9, 12, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1};
// Kernel 1
__kernel void ldpc_cnp_kernel_1st_iter( __global char * dev_llr, __global char * dev_dt, __local h_element *dev_h_compact1, int BG, int row, int col, int Zc)
{
// int iMCW = blockIdx.y; // codeword id
// int iBlkRow = blockIdx.x; // block row in h_base
// int iSubRow = threadIdx.x; // row index in sub_block of h_base
// if(blockIdx.x == 0 && threadIdx.x == 1) printf("cnp %d\n", threadIdx.x);
int iMCW = get_group_id(1); // codeword id
int iBlkRow = get_group_id(0); // block row in h_base
int iBlkCol; // block col in h_base
int iSubRow = get_local_id(0);; // row index in sub_block of h_base
int iCol; // overall col index in h_base
int offsetR;
int shift_t;
// For 2-min algorithm.
int Q_sign = 0;
int sq;
int Q, Q_abs;
int R_temp;
int sign = 1;
int rmin1 = INT32_MAX;
int rmin2 = INT32_MAX;
char idx_min = 0;
h_element h_element_t;
int s = (BG==1)? h_ele_row_bg1_count[iBlkRow]:h_ele_row_bg2_count[iBlkRow];
offsetR = (iMCW * row*col*Zc) + iBlkRow * Zc + iSubRow; // row*col*Zc = size of dev_dt
// if(blockIdx.x == 0 && threadIdx.x == 1) printf("s: %d, offset %d\n", s, offsetR);
// The 1st recursion
for(int i = 0; i < s; i++) // loop through all the ZxZ sub-blocks in a row
{
h_element_t = dev_h_compact1[i*row+iBlkRow]; // compact_col == row
iBlkCol = h_element_t.y;
shift_t = h_element_t.value;
shift_t = (iSubRow + shift_t) % Zc;
iCol = (iMCW * col*Zc) + iBlkCol * Zc + shift_t; // col*Zc = size of llr
Q = dev_llr[iCol];
Q_abs = (Q>0)? Q : -Q;
sq = Q < 0;
// if(blockIdx.x == 0 && threadIdx.x == 1) printf("i %d, icol %d, Q: %d\n", i, iCol, Q);
// quick version
sign = sign * (1 - sq * 2);
Q_sign |= sq << i;
if (Q_abs < rmin1){
rmin2 = rmin1;
rmin1 = Q_abs;
idx_min = i;
} else if (Q_abs < rmin2){
rmin2 = Q_abs;
}
}
// if(blockIdx.x == 0 && threadIdx.x == 1)printf("min1 %d, min2 %d, min1_idx %d\n", rmin1, rmin2, idx_min);
// The 2nd recursion
for(int i = 0; i < s; i++){
// v0: Best performance so far. 0.75f is the value of alpha.
sq = 1 - 2 * ((Q_sign >> i) & 0x01);
R_temp = 0.75f * sign * sq * (i != idx_min ? rmin1 : rmin2);
// write results to global memory
h_element_t = dev_h_compact1[i*row+iBlkRow];
int addr_temp = offsetR + h_element_t.y * row * Zc;
dev_dt[addr_temp] = R_temp;
// if(blockIdx.x == 0 && threadIdx.x == 1)printf("R_temp %d, temp_addr %d\n", R_temp, addr_temp);
}
}
// Kernel_1
__kernel void ldpc_cnp_kernel( __global char * dev_llr, __global char * dev_dt, __local h_element *dev_h_compact1, int BG, int row, int col, int Zc)
{
// if(blockIdx.x == 0 && threadIdx.x == 1) printf("cnp\n");
// int iMCW = blockIdx.y;
// int iBlkRow = blockIdx.x; // block row in h_base // block col in h_base
// int iSubRow = threadIdx.x; // row index in sub_block of h_base
int iMCW = get_group_id(1);
int iBlkRow = get_group_id(0); // block row in h_base
int iBlkCol; // block col in h_base
int iSubRow = get_local_id(0);; // row index in sub_block of h_base
int iCol; // overall col index in h_base
int offsetR;
int shift_t;
// For 2-min algorithm.
int Q_sign = 0;
int sq;
int Q, Q_abs;
int R_temp;
int sign = 1;
int rmin1 = INT32_MAX;
int rmin2 = INT32_MAX;
char idx_min = 0;
h_element h_element_t;
int s = (BG==1)? h_ele_row_bg1_count[iBlkRow]: h_ele_row_bg2_count[iBlkRow];
offsetR = (iMCW *row*col*Zc) + iBlkRow * Zc + iSubRow; // row * col * Zc = size of dev_dt
// if(blockIdx.x == 0 && threadIdx.x == 1) printf("s: %d, offset %d\n", s, offsetR);
// The 1st recursion
for(int i = 0; i < s; i++) // loop through all the ZxZ sub-blocks in a row
{
h_element_t = dev_h_compact1[i*row+iBlkRow];
iBlkCol = h_element_t.y;
shift_t = h_element_t.value;
shift_t = (iSubRow + shift_t) % Zc;
iCol = iBlkCol * Zc + shift_t;
R_temp = dev_dt[offsetR + iBlkCol * row * Zc];
Q = dev_llr[iMCW * (col*Zc) + iCol] - R_temp;
Q_abs = (Q>0)? Q : -Q;
// if(blockIdx.x == 0 && threadIdx.x == 1) printf("i %d, icol %d, Q: %d\n", i, iCol, Q);
sq = Q < 0;
sign = sign * (1 - sq * 2);
Q_sign |= sq << i;
if (Q_abs < rmin1){
rmin2 = rmin1;
rmin1 = Q_abs;
idx_min = i;
} else if (Q_abs < rmin2){
rmin2 = Q_abs;
}
}
// if(blockIdx.x == 0 && threadIdx.x == 1)printf("min1 %d, min2 %d, min1_idx %d\n", rmin1, rmin2, idx_min);
// The 2nd recursion
for(int i = 0; i < s; i ++){
sq = 1 - 2 * ((Q_sign >> i) & 0x01);
R_temp = 0.75f * sign * sq * (i != idx_min ? rmin1 : rmin2);
// write results to global memory
h_element_t = dev_h_compact1[i*row+iBlkRow];
int addr_temp = h_element_t.y * row * Zc + offsetR;
dev_dt[addr_temp] = R_temp;
// if(blockIdx.x == 0 && threadIdx.x == 1)printf("R_temp %d, temp_addr %d\n", R_temp, addr_temp);
}
}
// Kernel 2: VNP processing
__kernel void
ldpc_vnp_kernel_normal(__global char * dev_llr, __global char * dev_dt, __global char * dev_const_llr, __local h_element *dev_h_compact2, int BG, int row, int col, int Zc)
{
// int iMCW = blockIdx.y;
// int iBlkCol = blockIdx.x;
// int iSubCol = threadIdx.x;
int iMCW = get_group_id(1);
int iBlkCol = get_group_id(0);
int iBlkRow;
int iSubCol = get_local_id(0);
int iRow;
int iCol;
int shift_t, sf;
int APP;
h_element h_element_t;
// update all the llr values
iCol = iBlkCol * Zc + iSubCol;
APP = dev_const_llr[iMCW *col*Zc + iCol];
int offsetDt = iMCW *row*col*Zc + iBlkCol * row * Zc;
int s = (BG==1)? h_ele_col_bg1_count[iBlkCol]:h_ele_col_bg2_count[iBlkCol];
for(int i = 0; i < s; i++)
{
h_element_t = dev_h_compact2[i*col+iBlkCol];
shift_t = h_element_t.value%Zc;
iBlkRow = h_element_t.x;
sf = iSubCol - shift_t;
sf = (sf + Zc) % Zc;
iRow = iBlkRow * Zc + sf;
APP = APP + dev_dt[offsetDt + iRow];
}
if(APP > SCHAR_MAX) APP = SCHAR_MAX;
if(APP < SCHAR_MIN) APP = SCHAR_MIN;
// write back to device global memory
dev_llr[iMCW *col*Zc + iCol] = APP;
}
__kernel void pack_decoded_bit(__global unsigned char * dev_llr, __global unsigned char * dev_tmp, int col, int Zc)
{
// int iMCW = blockIdx.y;
// int btid = threadIdx.x;
unsigned char tmp[128];
int iMCW = get_group_id(1);
int btid = get_local_id(0);
int tid = iMCW * col*Zc + get_group_id(0)*128 + btid;
tmp[btid] = 0;
if(dev_llr[tid] < 0){
tmp[btid] = 1 << (7-(btid&7));
}
// __syncthreads();
if(btid < 16){
dev_tmp[iMCW * col*Zc + get_group_id(0)*16+btid] = 0;
for(int i = 0; i < 8; i++){
dev_tmp[iMCW * col*Zc + get_group_id(0)*16+btid] += tmp[btid*8+i];
}
}
}
......@@ -70,6 +70,7 @@ typedef struct nrLDPC_dec_params {
uint16_t Z; /**< Lifting size */
uint8_t R; /**< Decoding rate: Format 15,13,... for code rates 1/5, 1/3,... */
uint8_t numMaxIter; /**< Maximum number of iterations */
int block_length;
e_nrLDPC_outMode outMode; /**< Output format */
} t_nrLDPC_dec_params;
......
......@@ -7,14 +7,14 @@
* \note
* \warning
*/
#include <iostream>
#include <stdio.h>
#include <unistd.h>
#include <cuda_runtime.h>
#include <cuda.h>
#include "PHY/CODING/nrLDPC_decoder/nrLDPC_types.h"
#include "PHY/CODING/nrLDPC_decoder/nrLDPCdecoder_defs.h"
#include "assertions.h"
#include "bgs/BG1_I0"
#include "bgs/BG1_I1"
#include "bgs/BG1_I2"
......@@ -462,10 +462,11 @@ void read_BG(int BG, int *h, int row, int col)
}
extern "C"
void init_LLR_DMA_for_CUDA(t_nrLDPC_dec_params* p_decParams, int8_t* p_llr, int8_t* p_out, int block_length){
void init_LLR_DMA(t_nrLDPC_dec_params* p_decParams, int8_t* p_llr, int8_t* p_out){
uint16_t Zc = p_decParams->Z;
uint8_t BG = p_decParams->BG;
int block_length = p_decParams->block_length;
uint8_t row,col;
if(BG == 1){
row = 46;
......@@ -483,14 +484,60 @@ void init_LLR_DMA_for_CUDA(t_nrLDPC_dec_params* p_decParams, int8_t* p_llr, int8
}
using namespace std ;
/* from here: entry points in decoder shared lib */
extern "C"
int32_t nrLDPC_decoder_LYC(t_nrLDPC_dec_params* p_decParams, int8_t* p_llr, int8_t* p_out, int block_length, time_stats_t *time_decoder)
{
int ldpc_autoinit(void) { // called by the library loader
/*int devices = 0;
cudaError_t err = cudaGetDeviceCount(&devices);
AssertFatal(devices>0,"\nNo cuda GPU found\n\n");
const int kb = 1024;
const int mb = kb * kb;
wcout << "NBody.GPU" << endl << "=========" << endl << endl;
wcout << "CUDA version: v" << CUDART_VERSION << endl;
wcout << "CUDA Devices: " << endl << endl;
for(int i = 0; i < devices; ++i)
{
cudaDeviceProp props;
cudaGetDeviceProperties(&props, i);
wcout << i << ": " << props.name << ": " << props.major << "." << props.minor << endl;
wcout << " Global memory: " << props.totalGlobalMem / mb << "mb" << endl;
wcout << " Shared memory: " << props.sharedMemPerBlock / kb << "kb" << endl;
wcout << " Constant memory: " << props.totalConstMem / kb << "kb" << endl;
wcout << " Block registers: " << props.regsPerBlock << endl << endl;
wcout << " Warp size: " << props.warpSize << endl;
wcout << " Threads per block: " << props.maxThreadsPerBlock << endl;
wcout << " Max block dimensions: [ " << props.maxThreadsDim[0] << ", " << props.maxThreadsDim[1] << ", " << props.maxThreadsDim[2] << " ]" << endl;
wcout << " Max grid dimensions: [ " << props.maxGridSize[0] << ", " << props.maxGridSize[1] << ", " << props.maxGridSize[2] << " ]" << endl;
wcout << endl;
}
*/
warmup_for_GPU();
return 0;
}
extern "C"
void nrLDPC_initcall(t_nrLDPC_dec_params* p_decParams, int8_t* p_llr, int8_t* p_out) {
set_compact_BG(p_decParams->Z,p_decParams->BG);
init_LLR_DMA(p_decParams, p_llr, p_out);
}
extern "C"
int32_t nrLDPC_decod(t_nrLDPC_dec_params* p_decParams, int8_t* p_llr, int8_t* p_out,t_nrLDPC_procBuf* p_procBuf, t_nrLDPC_time_stats *time_decoder)
{
uint16_t Zc = p_decParams->Z;
uint8_t BG = p_decParams->BG;
uint8_t numMaxIter = p_decParams->numMaxIter;
int block_length = p_decParams->block_length;
e_nrLDPC_outMode outMode = p_decParams->outMode;
cudaError_t cudaStatus;
uint8_t row,col;
......
......@@ -45,6 +45,7 @@ typedef struct {
time_stats_t *toutput;
}encoder_implemparams_t;
#define INIT0_LDPCIMPLEMPARAMS {0,0,0,NULL,NULL,NULL,NULL}
typedef void(*nrLDPC_initcallfunc_t)(t_nrLDPC_dec_params* p_decParams, int8_t* p_llr, int8_t* p_out);
typedef int(*nrLDPC_encoderfunc_t)(unsigned char **,unsigned char **,int,int,short, short, encoder_implemparams_t*);
//============================================================================================================================
// decoder interface
......
......@@ -23,11 +23,14 @@
#ifdef LDPC_LOADER
nrLDPC_decoderfunc_t nrLDPC_decoder;
nrLDPC_encoderfunc_t nrLDPC_encoder;
nrLDPC_initcallfunc_t nrLDPC_initcall;
#else
/* functions to load the LDPC shared lib, implemented in openair1/PHY/CODING/nrLDPC_load.c */
extern int load_nrLDPClib(void) ;
extern int load_nrLDPClib(char *version) ;
extern int load_nrLDPClib_ref(char *libversion, nrLDPC_encoderfunc_t * nrLDPC_encoder_ptr); // for ldpctest
/* ldpc coder/decoder functions, as loaded by load_nrLDPClib(). */
extern nrLDPC_initcallfunc_t nrLDPC_initcall;
extern nrLDPC_decoderfunc_t nrLDPC_decoder;
extern nrLDPC_encoderfunc_t nrLDPC_encoder;
// inline functions:
......
......@@ -42,22 +42,32 @@
/* function description array, to be used when loading the encoding/decoding shared lib */
static loader_shlibfunc_t shlib_fdesc[2];
static loader_shlibfunc_t shlib_fdesc[3];
char *arg[64]={"ldpctest","-O","cmdlineonly::dbgl0"};
/* arguments used when called from phy simulators exec's which do not use the config module */
/* arg is used to initialize the config module so that the loader works as expected */
char *arg[64]={"ldpctest","-O","cmdlineonly::dbgl0",NULL,NULL};
int load_nrLDPClib(void) {
int load_nrLDPClib(char *version) {
char *ptr = (char*)config_get_if();
char libname[64]="ldpc";
if ( ptr==NULL ) {// phy simulators, config module possibly not loaded
load_configmodule(3,(char **)arg,CONFIG_ENABLECMDLINEONLY) ;
load_configmodule(0,(char **)NULL,CONFIG_ENABLECMDLINEONLY) ;
logInit();
}
shlib_fdesc[0].fname = "nrLDPC_decod";
shlib_fdesc[1].fname = "nrLDPC_encod";
int ret=load_module_shlib("ldpc",shlib_fdesc,sizeof(shlib_fdesc)/sizeof(loader_shlibfunc_t),NULL);
shlib_fdesc[2].fname = "nrLDPC_initcall";
int ret;
if (version)
ret=load_module_version_shlib(libname,version,shlib_fdesc,sizeof(shlib_fdesc)/sizeof(loader_shlibfunc_t),NULL);
else
ret=load_module_shlib(libname,shlib_fdesc,sizeof(shlib_fdesc)/sizeof(loader_shlibfunc_t),NULL);
AssertFatal( (ret >= 0),"Error loading ldpc decoder");
nrLDPC_decoder = (nrLDPC_decoderfunc_t)shlib_fdesc[0].fptr;
nrLDPC_encoder = (nrLDPC_encoderfunc_t)shlib_fdesc[1].fptr;
nrLDPC_initcall = (nrLDPC_initcallfunc_t)shlib_fdesc[2].fptr;
return 0;
}
......@@ -65,10 +75,8 @@ int load_nrLDPClib_ref(char *libversion, nrLDPC_encoderfunc_t * nrLDPC_encoder_p
loader_shlibfunc_t shlib_encoder_fdesc;
shlib_encoder_fdesc.fname = "nrLDPC_encod";
char libpath[64];
sprintf(libpath,"ldpc%s",libversion);
int ret=load_module_shlib(libpath,&shlib_encoder_fdesc,1,NULL);
AssertFatal( (ret >= 0),"Error loading ldpc encoder %s\n",libpath);
int ret=load_module_version_shlib("ldpc",libversion,&shlib_encoder_fdesc,1,NULL);
AssertFatal( (ret >= 0),"Error loading ldpc encoder %s\n",(libversion==NULL)?"":libversion);
*nrLDPC_encoder_ptr = (nrLDPC_encoderfunc_t)shlib_encoder_fdesc.fptr;
return 0;
}
......
......@@ -108,7 +108,7 @@ int phy_init_nr_gNB(PHY_VARS_gNB *gNB,
crcTableInit();
init_scrambling_luts();
init_pucch2_luts();
load_nrLDPClib();
load_nrLDPClib(NULL);
// PBCH DMRS gold sequences generation
nr_init_pbch_dmrs(gNB);
//PDCCH DMRS init
......
......@@ -497,6 +497,8 @@ uint32_t nr_dlsch_decoding(PHY_VARS_NR_UE *phy_vars_ue,
}
VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_DLSCH_LDPC, VCD_FUNCTION_IN);
p_decParams->block_length=length_dec;
nrLDPC_initcall(p_decParams, (int8_t*)&pl[0], llrProcBuf);
no_iteration_ldpc = nrLDPC_decoder(p_decParams,
(int8_t *)&pl[0],
llrProcBuf,
......@@ -956,7 +958,8 @@ uint32_t nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue,
for (i=0, j=0; j < ((kc*harq_process->Z)>>4)+1; i+=2, j++) {
pl[j] = _mm_packs_epi16(pv[i],pv[i+1]);
}
p_decParams->block_length=length_dec;
nrLDPC_initcall(p_decParams, (int8_t*)&pl[0], llrProcBuf);
no_iteration_ldpc = nrLDPC_decoder(p_decParams,
(int8_t *)&pl[0],
llrProcBuf,
......@@ -1340,7 +1343,8 @@ void nr_dlsch_decoding_process(void *arg) {
for (i=0, j=0; j < ((kc*harq_process->Z)>>4)+1; i+=2, j++) {
pl[j] = _mm_packs_epi16(pv[i],pv[i+1]);
}
p_decParams->block_length=length_dec;
nrLDPC_initcall(p_decParams, (int8_t*)&pl[0], llrProcBuf);
no_iteration_ldpc = nrLDPC_decoder(p_decParams,
(int8_t *)&pl[0],
llrProcBuf,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment