Commit 6a5d8b86 authored by hardy's avatar hardy

Merge remote-tracking branch 'origin/NR_ldpc_cuda_softmodem' into integration_2021_wk44

parents af7d9cd9 8dd5346e
...@@ -309,6 +309,7 @@ endif() ...@@ -309,6 +309,7 @@ endif()
# #
# add autotools definitions that were maybe used! # add autotools definitions that were maybe used!
add_definitions("-DSTDC_HEADERS=1 -DHAVE_SYS_TYPES_H=1 -DHAVE_SYS_STAT_H=1 -DHAVE_STDLIB_H=1 -DHAVE_STRING_H=1 -DHAVE_MEMORY_H=1 -DHAVE_STRINGS_H=1 -DHAVE_INTTYPES_H=1 -DHAVE_STDINT_H=1 -DHAVE_UNISTD_H=1 -DHAVE_FCNTL_H=1 -DHAVE_ARPA_INET_H=1 -DHAVE_SYS_TIME_H=1 -DHAVE_SYS_SOCKET_H=1 -DHAVE_STRERROR=1 -DHAVE_SOCKET=1 -DHAVE_MEMSET=1 -DHAVE_GETTIMEOFDAY=1 -DHAVE_STDLIB_H=1 -DHAVE_MALLOC=1 -DHAVE_LIBSCTP") add_definitions("-DSTDC_HEADERS=1 -DHAVE_SYS_TYPES_H=1 -DHAVE_SYS_STAT_H=1 -DHAVE_STDLIB_H=1 -DHAVE_STRING_H=1 -DHAVE_MEMORY_H=1 -DHAVE_STRINGS_H=1 -DHAVE_INTTYPES_H=1 -DHAVE_STDINT_H=1 -DHAVE_UNISTD_H=1 -DHAVE_FCNTL_H=1 -DHAVE_ARPA_INET_H=1 -DHAVE_SYS_TIME_H=1 -DHAVE_SYS_SOCKET_H=1 -DHAVE_STRERROR=1 -DHAVE_SOCKET=1 -DHAVE_MEMSET=1 -DHAVE_GETTIMEOFDAY=1 -DHAVE_STDLIB_H=1 -DHAVE_MALLOC=1 -DHAVE_LIBSCTP")
set(commonOpts "-pipe -Wno-packed-bitfield-compat -fPIC -Wall -fno-strict-aliasing -rdynamic") set(commonOpts "-pipe -Wno-packed-bitfield-compat -fPIC -Wall -fno-strict-aliasing -rdynamic")
...@@ -318,11 +319,7 @@ set(CMAKE_C_FLAGS ...@@ -318,11 +319,7 @@ set(CMAKE_C_FLAGS
set(CMAKE_CXX_FLAGS set(CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} ${C_FLAGS_PROCESSOR} ${commonOpts} -std=c++11") "${CMAKE_CXX_FLAGS} ${C_FLAGS_PROCESSOR} ${commonOpts} -std=c++11")
# cuda compiler bug (limitation) on complex macro definition
if (CUDA_FOUND)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DCUDA_FLAG")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DCUDA_FLAG")
endif()
if (SANITIZE_ADDRESS) if (SANITIZE_ADDRESS)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -fno-common") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -fno-common")
...@@ -1590,6 +1587,16 @@ set(PHY_LDPC_OPTIM8SEGMULTI_SRC ...@@ -1590,6 +1587,16 @@ set(PHY_LDPC_OPTIM8SEGMULTI_SRC
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder.c ${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder.c
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_encoder/ldpc_encoder_optim8segmulti.c ${OPENAIR1_DIR}/PHY/CODING/nrLDPC_encoder/ldpc_encoder_optim8segmulti.c
) )
set(PHY_LDPC_CUDA_SRC
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder_LYC/nrLDPC_decoder_LYC.cu
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_encoder/ldpc_encoder_optim8segmulti.c
)
set(PHY_LDPC_CL_SRC
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder_CL.c
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_encoder/ldpc_encoder_optim8segmulti.c
)
set(PHY_NR_CODINGIF set(PHY_NR_CODINGIF
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_load.c; ${OPENAIR1_DIR}/PHY/CODING/nrLDPC_load.c;
) )
...@@ -1597,8 +1604,18 @@ set(PHY_NR_CODINGIF ...@@ -1597,8 +1604,18 @@ set(PHY_NR_CODINGIF
add_library(ldpc_orig MODULE ${PHY_LDPC_ORIG_SRC} ) add_library(ldpc_orig MODULE ${PHY_LDPC_ORIG_SRC} )
add_library(ldpc_optim MODULE ${PHY_LDPC_OPTIM_SRC} ) add_library(ldpc_optim MODULE ${PHY_LDPC_OPTIM_SRC} )
add_library(ldpc_optim8seg MODULE ${PHY_LDPC_OPTIM8SEG_SRC} ) add_library(ldpc_optim8seg MODULE ${PHY_LDPC_OPTIM8SEG_SRC} )
add_library(ldpc_cl MODULE ${PHY_LDPC_CL_SRC} )
target_link_libraries(ldpc_cl OpenCL)
if (CUDA_FOUND)
cuda_add_library(ldpc_cuda MODULE ${PHY_LDPC_CUDA_SRC} )
set_target_properties(ldpc_cuda PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
# CUDA_ADD_CUFFT_TO_TARGET(ldpc_cuda)
endif (CUDA_FOUND)
add_library(ldpc MODULE ${PHY_LDPC_OPTIM8SEGMULTI_SRC} ) add_library(ldpc MODULE ${PHY_LDPC_OPTIM8SEGMULTI_SRC} )
add_library(coding MODULE ${PHY_TURBOSRC} ) add_library(coding MODULE ${PHY_TURBOSRC} )
add_library(dfts MODULE ${OPENAIR1_DIR}/PHY/TOOLS/oai_dfts.c ) add_library(dfts MODULE ${OPENAIR1_DIR}/PHY/TOOLS/oai_dfts.c )
...@@ -3119,7 +3136,11 @@ target_link_libraries (nr-uesoftmodem ${LIB_LMS_LIBRARIES}) ...@@ -3119,7 +3136,11 @@ target_link_libraries (nr-uesoftmodem ${LIB_LMS_LIBRARIES})
target_link_libraries (nr-uesoftmodem ${T_LIB}) target_link_libraries (nr-uesoftmodem ${T_LIB})
add_dependencies( nr-uesoftmodem ldpc_orig ldpc_optim ldpc_optim8seg ldpc ) add_dependencies( nr-uesoftmodem ldpc_orig ldpc_optim ldpc_optim8seg ldpc )
if (CUDA_FOUND)
add_dependencies( nr-uesoftmodem ldpc_cuda)
add_dependencies( nr-softmodem ldpc_cuda)
add_dependencies( ocp-gnb ldpc_cuda)
endif (CUDA_FOUND)
###################################" ###################################"
# Addexecutables for tests # Addexecutables for tests
#################################### ####################################
...@@ -3179,46 +3200,18 @@ target_link_libraries(smallblocktest ...@@ -3179,46 +3200,18 @@ target_link_libraries(smallblocktest
m pthread ${ATLAS_LIBRARIES} dl m pthread ${ATLAS_LIBRARIES} dl
) )
if (CUDA_FOUND)
###################################################
# For CUDA library
###################################################
CUDA_ADD_LIBRARY(LDPC_CU add_executable(ldpctest
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder_LYC/nrLDPC_decoder_LYC.cu ${PHY_NR_CODINGIF}
) ${OPENAIR1_DIR}/PHY/CODING/TESTBENCH/ldpctest.c
CUDA_ADD_CUFFT_TO_TARGET(LDPC_CU) ${T_SOURCE}
cuda_add_executable(ldpctest ${SHLIB_LOADER_SOURCES}
${OPENAIR1_DIR}/PHY/CODING/TESTBENCH/ldpctest.c )
${T_SOURCE}
${SHLIB_LOADER_SOURCES}
)
target_link_libraries(ldpctest -ldl
-Wl,--start-group
LDPC_CU UTIL SIMU PHY_NR CONFIG_LIB
-Wl,--end-group
m pthread ${ATLAS_LIBRARIES} dl
)
else (CUDA_FOUND)
add_executable(ldpctest
${PHY_NR_CODINGIF}
${OPENAIR1_DIR}/PHY/CODING/TESTBENCH/ldpctest.c
${T_SOURCE}
${SHLIB_LOADER_SOURCES}
)
endif ()
# add_executable(ldpctest
# ${PHY_NR_CODINGIF}
# ${OPENAIR1_DIR}/PHY/CODING/TESTBENCH/ldpctest.c
# ${T_SOURCE}
# ${SHLIB_LOADER_SOURCES}
# )
add_dependencies( ldpctest ldpc_orig ldpc_optim ldpc_optim8seg ldpc ) add_dependencies( ldpctest ldpc_orig ldpc_optim ldpc_optim8seg ldpc )
if (CUDA_FOUND)
add_dependencies( ldpctest ldpc_cuda)
endif (CUDA_FOUND)
target_link_libraries(ldpctest target_link_libraries(ldpctest
-Wl,--start-group UTIL SIMU_COMMON SIMU PHY_NR PHY_COMMON PHY_NR_COMMON CONFIG_LIB -Wl,--end-group -Wl,--start-group UTIL SIMU_COMMON SIMU PHY_NR PHY_COMMON PHY_NR_COMMON CONFIG_LIB -Wl,--end-group
m pthread ${ATLAS_LIBRARIES} dl m pthread ${ATLAS_LIBRARIES} dl
......
...@@ -10,6 +10,10 @@ int load_module_shlib(char *modname,loader_shlibfunc_t *farray, int numf) ...@@ -10,6 +10,10 @@ int load_module_shlib(char *modname,loader_shlibfunc_t *farray, int numf)
* If the farray pointer is null, looks for `< modname >_getfarray` symbol, calls the corresponding function when the symbol is found. `< modname >_getfarray` takes one argument, a pointer to a `loader_shlibfunc_t` array, and returns the number of items in this array, as defined by the `getfarrayfunc_t` type. The `loader_shlibfunc_t` array returned by the shared library must be fully filled (both `fname` and `fptr` fields). * If the farray pointer is null, looks for `< modname >_getfarray` symbol, calls the corresponding function when the symbol is found. `< modname >_getfarray` takes one argument, a pointer to a `loader_shlibfunc_t` array, and returns the number of items in this array, as defined by the `getfarrayfunc_t` type. The `loader_shlibfunc_t` array returned by the shared library must be fully filled (both `fname` and `fptr` fields).
* looks for the `numf` function symbols listed in the `farray[i].fname` arguments and set the corresponding `farray[i].fptr`function pointers * looks for the `numf` function symbols listed in the `farray[i].fname` arguments and set the corresponding `farray[i].fptr`function pointers
```c
int load_module_version_shlib(char *modname, char *version, loader_shlibfunc_t *farray, int numf)
```
Allows loading a specific library version, as specified by the `version` parameter. When version is not NULL the version that is possibly specified as a config module parameter is ignored. This call has been introduced for phy simulators executables which do not use the config module. It is used, for example, by the ldcp initialization (`load_nrLDPClib` function in [nrLDPC_load.c](../../../../../openair1/PHY/CODING/nrLDPC_load.c) to allow the `ldpctest` simulator to select the cuda accelerated ldcp implementation. `load_module_shlib` is just a define macro to switch to a `load_module_version_shlib` call, adding a NULL pointer for the version parameter.
```c ```c
void * get_shlibmodule_fptr(char *modname, char *fname) void * get_shlibmodule_fptr(char *modname, char *fname)
......
...@@ -63,7 +63,7 @@ void loader_init(void) { ...@@ -63,7 +63,7 @@ void loader_init(void) {
} }
/* build the full shared lib name from the module name */ /* build the full shared lib name from the module name */
char *loader_format_shlibpath(char *modname) char *loader_format_shlibpath(char *modname, char *version)
{ {
char *tmpstr; char *tmpstr;
...@@ -97,7 +97,10 @@ int ret; ...@@ -97,7 +97,10 @@ int ret;
shlibpath = loader_data.shlibpath ; shlibpath = loader_data.shlibpath ;
} }
/* no specific shared lib version */ /* no specific shared lib version */
if (shlibversion == NULL) { if (version != NULL) { // version specified as a function parameter
shlibversion=version;
}
if (shlibversion == NULL) { // no specific version specified, neither as a config param or as a function param
shlibversion = "" ; shlibversion = "" ;
} }
/* alloc memory for full module shared lib file name */ /* alloc memory for full module shared lib file name */
...@@ -118,7 +121,7 @@ int ret; ...@@ -118,7 +121,7 @@ int ret;
return tmpstr; return tmpstr;
} }
int load_module_shlib(char *modname,loader_shlibfunc_t *farray, int numf, void *autoinit_arg) int load_module_version_shlib(char *modname, char *version, loader_shlibfunc_t *farray, int numf, void *autoinit_arg)
{ {
void *lib_handle = NULL; void *lib_handle = NULL;
initfunc_t fpi; initfunc_t fpi;
...@@ -138,7 +141,7 @@ int load_module_shlib(char *modname,loader_shlibfunc_t *farray, int numf, void * ...@@ -138,7 +141,7 @@ int load_module_shlib(char *modname,loader_shlibfunc_t *farray, int numf, void *
loader_init(); loader_init();
} }
shlib_path = loader_format_shlibpath(modname); shlib_path = loader_format_shlibpath(modname, version);
for (int i = 0; i < loader_data.numshlibs; i++) { for (int i = 0; i < loader_data.numshlibs; i++) {
if (strcmp(loader_data.shlibs[i].name, modname) == 0) { if (strcmp(loader_data.shlibs[i].name, modname) == 0) {
......
...@@ -84,10 +84,11 @@ loader_data_t loader_data; ...@@ -84,10 +84,11 @@ loader_data_t loader_data;
/*-------------------------------------------------------------------------------------------------------------*/ /*-------------------------------------------------------------------------------------------------------------*/
#else /* LOAD_MODULE_SHLIB_MAIN */ #else /* LOAD_MODULE_SHLIB_MAIN */
extern int load_module_shlib(char *modname, loader_shlibfunc_t *farray, int numf, void *initfunc_arg);
extern int load_module_version_shlib(char *modname, char *version, loader_shlibfunc_t *farray, int numf, void *initfunc_arg);
extern void * get_shlibmodule_fptr(char *modname, char *fname); extern void * get_shlibmodule_fptr(char *modname, char *fname);
extern loader_data_t loader_data; extern loader_data_t loader_data;
#endif /* LOAD_MODULE_SHLIB_MAIN */ #endif /* LOAD_MODULE_SHLIB_MAIN */
#define load_module_shlib(M, F, N, I) load_module_version_shlib(M, NULL, F, N, I)
#endif #endif
...@@ -203,10 +203,7 @@ UE on machine 2: ...@@ -203,10 +203,7 @@ UE on machine 2:
[Selecting an alternative ldpc implementation at run time](../openair1/PHY/CODING/DOC/LDPCImplementation.md)
[oai wiki home](https://gitlab.eurecom.fr/oai/openairinterface5g/wikis/home) [oai wiki home](https://gitlab.eurecom.fr/oai/openairinterface5g/wikis/home)
......
...@@ -436,7 +436,7 @@ int main( int argc, char **argv ) { ...@@ -436,7 +436,7 @@ int main( int argc, char **argv ) {
itti_init(TASK_MAX, tasks_info); itti_init(TASK_MAX, tasks_info);
init_opt() ; init_opt() ;
load_nrLDPClib(); load_nrLDPClib(NULL);
if (ouput_vcd) { if (ouput_vcd) {
vcd_signal_dumper_init("/tmp/openair_dump_nrUE.vcd"); vcd_signal_dumper_init("/tmp/openair_dump_nrUE.vcd");
......
#LDPC coder/decoder implementation # LDPC coder/decoder implementation
The LDPC coder and decoder are implemented in a shared library, dynamically loaded at run-time using the [oai shared library loader](file://../../../../common/utils/DOC/loader.md). The code loading the LDPC library is in [nrLDPC_load.c](file://../nrLDPC_load.c), in function `load_nrLDPClib`, which must be called at init time. The LDPC coder and decoder are implemented in a shared library, dynamically loaded at run-time using the [oai shared library loader](file://../../../../common/utils/DOC/loader.md). The code loading the LDPC library is in [nrLDPC_load.c](file://../nrLDPC_load.c), in function `load_nrLDPClib`, which must be called at init time.
## Selecting the LDPC library at run time ## Selecting the LDPC library at run time
By default the function `int load_nrLDPClib(void)` looks for `libldpc.so`, this default behavior can be changed using the oai loader configuration options in the configuration file or from the command line as shown below: By default the function `int load_nrLDPClib(void)` looks for `libldpc.so`, this default behavior can be changed using the oai loader configuration options in the configuration file or from the command line as shown below:
>loading `libldpc_optim8seg.so` instead of `libldpc.so` #### Examples of ldpc shared lib selection when running nr softmodem's:
loading `libldpc_optim8seg.so` instead of `libldpc.so`:
``` ```
./nr-softmodem -O libconfig:gnb.band78.tm1.106PRB.usrpx300.conf:dbgl5 --loader.ldpc.shlibversion _optim8seg ./nr-softmodem -O libconfig:gnb.band78.tm1.106PRB.usrpx300.conf:dbgl5 --loader.ldpc.shlibversion _optim8seg
...@@ -18,9 +20,160 @@ By default the function `int load_nrLDPClib(void)` looks for `libldpc.so`, this ...@@ -18,9 +20,160 @@ By default the function `int load_nrLDPClib(void)` looks for `libldpc.so`, this
........................ ........................
``` ```
Today, this mechanism is not available in the `ldpctest` phy simulator which doesn't initialize the [configuration module](file://../../../../common/config/DOC/config.md). loads `libldpc.so` and `libldpc_orig.so` to compare the performance of the two implementations. loading `libldpc_cl.so` instead of `libldpc.so`:
`make ldpc_cl`
`cp ../../../openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder_kernels_CL.cl`
`./nr-softmodem -O libconfig:gnb.band78.sa.fr1.106PRB.usrpb210.conf:dbgl5 --rfsim --rfsimulator.serveraddr server --sa --log_config.gtpu_log_level info --loader.ldpc.shlibversion _cl`
``` [LOADER] library libldpc_cl.so successfully loaded
------------------------------------------------
[HW] Platform 0, OpenCL profile FULL_PROFILE
[HW] Platform 0, OpenCL version OpenCL 2.1 LINUX
[HW] Device 0 is available
[HW] Device 0, type 2 = 0x00000002: cpu
[HW] Device 0, number of Compute Units: 8
[HW] Device 0, max Work Items dimension: 3
[HW] Device 0, max Work Items size for dimension: 0 8192
[HW] Device 0, max Work Items size for dimension: 1 8192
[HW] Device 0, max Work Items size for dimension: 2 8192
[New Thread 0x7fffcc258700 (LWP 3945123)]
[New Thread 0x7fffc3e57700 (LWP 3945124)]
[New Thread 0x7fffcbe57700 (LWP 3945125)]
[New Thread 0x7fffcba56700 (LWP 3945126)]
[New Thread 0x7fffcb254700 (LWP 3945128)]
[New Thread 0x7fffcb655700 (LWP 3945127)]
[New Thread 0x7fffcae53700 (LWP 3945129)]
[HW] Platform 1, OpenCL profile FULL_PROFILE
[HW] Platform 1, OpenCL version OpenCL 2.0 beignet 1.3
[New Thread 0x7fffc965a700 (LWP 3945130)]
[Thread 0x7fffc965a700 (LWP 3945130) exited]
[HW] Device 0 is available
[HW] Device 0, type 4 = 0x00000004: gpu
[HW] Device 0, number of Compute Units: 20
[HW] Device 0, max Work Items dimension: 3
[HW] Device 0, max Work Items size for dimension: 0 512
[HW] Device 0, max Work Items size for dimension: 1 512
[HW] Device 0, max Work Items size for dimension: 2 512
-----------------------------------------------------------------
```
`./nr-uesoftmodem -r 106 --numerology 1 --band 78 -C 3619200000 --rfsim --sa -O libconfig:/usr/local/oai/conf/nrue_sim.conf:dbgl5 --nokrnmod --loader.ldpc.shlibversion _cl --log_config.hw_log_level info`
```[CONFIG] shlibversion set to _cl from command line
............................................................
[CONFIG] loader.ldpc 1 options set from command line
[LOADER] library libldpc_cl.so successfully loaded
[HW] Platform 0, OpenCL profile FULL_PROFILE
[HW] Platform 0, OpenCL version OpenCL 2.1 LINUX
[HW] Device 0 is available
[HW] Device 0, type 2 = 0x00000002: cpu
[HW] Device 0, number of Compute Units: 8
[HW] Device 0, max Work Items dimension: 3
[HW] Device 0, max Work Items size for dimension: 0 8192
[HW] Device 0, max Work Items size for dimension: 1 8192
[HW] Device 0, max Work Items size for dimension: 2 8192
[New Thread 0x7fffecccc700 (LWP 3945413)]
[New Thread 0x7fffec8cb700 (LWP 3945415)]
[New Thread 0x7fffec4ca700 (LWP 3945414)]
[New Thread 0x7fffdf7fd700 (LWP 3945417)]
[New Thread 0x7fffdfbfe700 (LWP 3945418)]
[New Thread 0x7fffdffff700 (LWP 3945416)]
[New Thread 0x7fffd73fc700 (LWP 3945419)]
[HW] Platform 1, OpenCL profile FULL_PROFILE
[HW] Platform 1, OpenCL version OpenCL 2.0 beignet 1.3
[New Thread 0x7fffde105700 (LWP 3945420)]
[Thread 0x7fffde105700 (LWP 3945420) exited]
[HW] Device 0 is available
[HW] Device 0, type 4 = 0x00000004: gpu
[HW] Device 0, number of Compute Units: 20
[HW] Device 0, max Work Items dimension: 3
[HW] Device 0, max Work Items size for dimension: 0 512
[HW] Device 0, max Work Items size for dimension: 1 512
[HW] Device 0, max Work Items size for dimension: 2 512
------------------------------------------------------------
```
```
A mechanism to select ldpc implementation is also available in the `ldpctest` phy simulator via the `-v`option, which can be used to specify the version of the ldpc shared library to be used.
#### Examples of ldpc shared lib selection when running ldpctest:
Loading libldpc_cuda.so, the cuda implementation of the ldpc decoder:
```$ ./ldpctest -v _cuda
Initializing random number generator, seed 0
block length 8448:
n_trials 1:
SNR0 -2.000000:
[CONFIG] get parameters from cmdline , debug flags: 0x00400000
[CONFIG] log_config: 2/3 parameters successfully set
[CONFIG] log_config: 53/53 parameters successfully set
[CONFIG] log_config: 53/53 parameters successfully set
[CONFIG] log_config: 16/16 parameters successfully set
[CONFIG] log_config: 16/16 parameters successfully set
log init done
[CONFIG] loader: 2/2 parameters successfully set
[CONFIG] loader.ldpc: 1/2 parameters successfully set
[LOADER] library libldpc_cuda.so successfully loaded
...................................
​```
```
Loading libldpc_cl.so, the opencl implementation of the ldpc decoder:
`make ldpc_cl`
`cp ../../../openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder_kernels_CL.cl`
`./ldpctest -v _cl`
```$ ./ldpctest -v _cl
Initializing random number generator, seed 0
block length 8448:
n_trials 1:
SNR0 -2.000000:
[CONFIG] get parameters from cmdline , debug flags: 0x00400000
[CONFIG] log_config: 2/3 parameters successfully set
[CONFIG] log_config: 53/53 parameters successfully set
[CONFIG] log_config: 53/53 parameters successfully set
[CONFIG] log_config: 16/16 parameters successfully set
[CONFIG] log_config: 16/16 parameters successfully set
log init done
[CONFIG] loader: 2/2 parameters successfully set
[CONFIG] loader.ldpc: 1/2 parameters successfully set
[LOADER] library libldpc_cl.so successfully loaded
[HW] Platform 0, OpenCL profile FULL_PROFILE
[HW] Platform 0, OpenCL version OpenCL 2.1 LINUX
[HW] Device 0 is available
[HW] Device 0, type 2 = 0x00000002: cpu
[HW] Device 0, number of Compute Units: 8
[HW] Device 0, max Work Items dimension: 3
[HW] Device 0, max Work Items size for dimension: 0 8192
[HW] Device 0, max Work Items size for dimension: 1 8192
[HW] Device 0, max Work Items size for dimension: 2 8192
[HW] Platform 1, OpenCL profile FULL_PROFILE
[HW] Platform 1, OpenCL version OpenCL 2.0 beignet 1.3
[HW] Device 0 is available
[HW] Device 0, type 4 = 0x00000004: gpu
[HW] Device 0, number of Compute Units: 20
[HW] Device 0, max Work Items dimension: 3
[HW] Device 0, max Work Items size for dimension: 0 512
[HW] Device 0, max Work Items size for dimension: 1 512
[HW] Device 0, max Work Items size for dimension: 2 512
................................
​```
```
### LDPC libraries
Libraries implementing the LDPC algorithms must be named `libldpc<_version>.so`, they must implement three functions: `nrLDPC_initcall` `nrLDPC_decod` and `nrLDPC_encod`. The prototypes for these functions is defined in [nrLDPC_defs.h](file://nrLDPC_defs.h).
`libldpc_cuda.so`has been tested with the `ldpctest` executable, usage from the softmodem's has to be tested.
###LDPC libraries `libldpc_cl`is under development.
Libraries implementing the LDPC algorithms must be named `libldpc<_version>.so`, they must implement two functions: `nrLDPC_decod` and `nrLDPC_encod`. The prototypes for these functions is defined in [nrLDPC_defs.h](file://nrLDPC_defs.h).
[oai Wikis home](https://gitlab.eurecom.fr/oai/openairinterface5g/wikis/home) [oai Wikis home](https://gitlab.eurecom.fr/oai/openairinterface5g/wikis/home)
...@@ -101,8 +101,8 @@ int test_ldpc(short No_iteration, ...@@ -101,8 +101,8 @@ int test_ldpc(short No_iteration,
unsigned int *crc_misses, unsigned int *crc_misses,
time_stats_t *time_optim, time_stats_t *time_optim,
time_stats_t *time_decoder, time_stats_t *time_decoder,
n_iter_stats_t *dec_iter, n_iter_stats_t *dec_iter
short run_cuda) )
{ {
//clock initiate //clock initiate
//time_stats_t time,time_optim,tinput,tprep,tparity,toutput, time_decoder; //time_stats_t time,time_optim,tinput,tprep,tparity,toutput, time_decoder;
...@@ -393,28 +393,13 @@ int test_ldpc(short No_iteration, ...@@ -393,28 +393,13 @@ int test_ldpc(short No_iteration,
decParams.R=code_rate_vec[R_ind];//13; decParams.R=code_rate_vec[R_ind];//13;
decParams.numMaxIter=No_iteration; decParams.numMaxIter=No_iteration;
decParams.outMode = nrLDPC_outMode_BIT; decParams.outMode = nrLDPC_outMode_BIT;
decParams.block_length=block_length;
//decParams.outMode =nrLDPC_outMode_LLRINT8; //decParams.outMode =nrLDPC_outMode_LLRINT8;
#ifdef CUDA_FLAG nrLDPC_initcall(&decParams, (int8_t*)channel_output_fixed[j], (int8_t*)estimated_output[j]);
set_compact_BG(Zc,BG);
init_LLR_DMA_for_CUDA(&decParams, (int8_t*)channel_output_fixed[j], (int8_t*)estimated_output[j], block_length);
#endif
for(j=0;j<n_segments;j++) { for(j=0;j<n_segments;j++) {
start_meas(time_decoder); start_meas(time_decoder);
#ifdef CUDA_FLAG
if(run_cuda){
n_iter = nrLDPC_decoder_LYC(&decParams, (int8_t*)channel_output_fixed[j], (int8_t*)estimated_output[j], block_length, time_decoder);
}
else{
// decode the sequence
// decoder supports BG2, Z=128 & 256
//esimated_output=ldpc_decoder(channel_output_fixed, block_length, No_iteration, (double)((float)nom_rate/(float)denom_rate));
///nrLDPC_decoder(&decParams, channel_output_fixed, estimated_output, NULL);
n_iter = nrLDPC_decoder(&decParams, (int8_t*)channel_output_fixed[j], (int8_t*)estimated_output[j], p_nrLDPC_procBuf, p_decoder_profiler); n_iter = nrLDPC_decoder(&decParams, (int8_t*)channel_output_fixed[j], (int8_t*)estimated_output[j], p_nrLDPC_procBuf, p_decoder_profiler);
} stop_meas(time_decoder);
#else
n_iter = nrLDPC_decoder(&decParams, (int8_t*)channel_output_fixed[j], (int8_t*)estimated_output[j], p_nrLDPC_procBuf, p_decoder_profiler);
#endif
stop_meas(time_decoder);
} }
//for (i=(Kb+nrows) * Zc-5;i<(Kb+nrows) * Zc;i++) //for (i=(Kb+nrows) * Zc-5;i<(Kb+nrows) * Zc;i++)
...@@ -514,17 +499,14 @@ int test_ldpc(short No_iteration, ...@@ -514,17 +499,14 @@ int test_ldpc(short No_iteration,
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
#ifdef CUDA_FLAG
warmup_for_GPU();
#endif
unsigned int errors, errors_bit, crc_misses; unsigned int errors, errors_bit, crc_misses;
double errors_bit_uncoded; double errors_bit_uncoded;
short block_length=8448; // decoder supports length: 1201 -> 1280, 2401 -> 2560 short block_length=8448; // decoder supports length: 1201 -> 1280, 2401 -> 2560
char *ldpc_version=NULL; /* version of the ldpc decoder library to use (XXX suffix to use when loading libldpc_XXX.so */
short No_iteration=5; short No_iteration=5;
int n_segments=1; int n_segments=1;
//double rate=0.333; //double rate=0.333;
short run_cuda = 0;
int nom_rate=1; int nom_rate=1;
int denom_rate=3; int denom_rate=3;
...@@ -544,7 +526,7 @@ int main(int argc, char *argv[]) ...@@ -544,7 +526,7 @@ int main(int argc, char *argv[])
short BG=0,Zc,Kb=0; short BG=0,Zc,Kb=0;
while ((c = getopt (argc, argv, "q:r:s:S:l:G:n:d:i:t:u:h")) != -1) while ((c = getopt (argc, argv, "q:r:s:S:l:G:n:d:i:t:u:hv:")) != -1)
switch (c) switch (c)
{ {
case 'q': case 'q':
...@@ -564,7 +546,7 @@ int main(int argc, char *argv[]) ...@@ -564,7 +546,7 @@ int main(int argc, char *argv[])
break; break;
case 'G': case 'G':
run_cuda = atoi(optarg); ldpc_version="_cuda";
break; break;
case 'n': case 'n':
...@@ -590,9 +572,11 @@ int main(int argc, char *argv[]) ...@@ -590,9 +572,11 @@ int main(int argc, char *argv[])
case 'u': case 'u':
test_uncoded = atoi(optarg); test_uncoded = atoi(optarg);
break; break;
case 'v':
ldpc_version=strdup(optarg);
break;
case 'h': case 'h':
default: default:
printf("CURRENTLY SUPPORTED CODE RATES: \n"); printf("CURRENTLY SUPPORTED CODE RATES: \n");
printf("BG1 (blocklength > 3840): 1/3, 2/3, 22/25 (8/9) \n"); printf("BG1 (blocklength > 3840): 1/3, 2/3, 22/25 (8/9) \n");
printf("BG2 (blocklength <= 3840): 1/5, 1/3, 2/3 \n\n"); printf("BG2 (blocklength <= 3840): 1/5, 1/3, 2/3 \n\n");
...@@ -609,6 +593,7 @@ int main(int argc, char *argv[]) ...@@ -609,6 +593,7 @@ int main(int argc, char *argv[])
printf("-t SNR simulation step, Default: 0.1\n"); printf("-t SNR simulation step, Default: 0.1\n");
printf("-i Max decoder iterations, Default: 5\n"); printf("-i Max decoder iterations, Default: 5\n");
printf("-u Set SNR per coded bit, Default: 0\n"); printf("-u Set SNR per coded bit, Default: 0\n");
printf("-v XXX Set ldpc shared library version. libldpc_XXX.so will be used \n");
exit(1); exit(1);
break; break;
} }
...@@ -619,7 +604,10 @@ int main(int argc, char *argv[]) ...@@ -619,7 +604,10 @@ int main(int argc, char *argv[])
printf("SNR0 %f: \n", SNR0); printf("SNR0 %f: \n", SNR0);
load_nrLDPClib(); if (ldpc_version != NULL)
load_nrLDPClib(ldpc_version);
else
load_nrLDPClib(NULL);
load_nrLDPClib_ref("_orig", &encoder_orig); load_nrLDPClib_ref("_orig", &encoder_orig);
//for (block_length=8;block_length<=MAX_BLOCK_LENGTH;block_length+=8) //for (block_length=8;block_length<=MAX_BLOCK_LENGTH;block_length+=8)
...@@ -691,8 +679,7 @@ int main(int argc, char *argv[]) ...@@ -691,8 +679,7 @@ int main(int argc, char *argv[])
&crc_misses, &crc_misses,
time_optim, time_optim,
time_decoder, time_decoder,
dec_iter, dec_iter);
run_cuda);
printf("SNR %f, BLER %f (%u/%d)\n", SNR, (float)decoded_errors[i]/(float)n_trials, decoded_errors[i], n_trials); printf("SNR %f, BLER %f (%u/%d)\n", SNR, (float)decoded_errors[i]/(float)n_trials, decoded_errors[i], n_trials);
printf("SNR %f, BER %f (%u/%d)\n", SNR, (float)errors_bit/(float)n_trials/(float)block_length/(double)n_segments, decoded_errors[i], n_trials); printf("SNR %f, BER %f (%u/%d)\n", SNR, (float)errors_bit/(float)n_trials/(float)block_length/(double)n_segments, decoded_errors[i], n_trials);
......
...@@ -46,7 +46,8 @@ ...@@ -46,7 +46,8 @@
#endif #endif
static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDPC_procBuf* p_procBuf, uint32_t numLLR, t_nrLDPC_lut* p_lut, t_nrLDPC_dec_params* p_decParams, t_nrLDPC_time_stats* p_profiler); static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDPC_procBuf* p_procBuf, uint32_t numLLR, t_nrLDPC_lut* p_lut, t_nrLDPC_dec_params* p_decParams, t_nrLDPC_time_stats* p_profiler);
void nrLDPC_initcall(t_nrLDPC_dec_params* p_decParams, int8_t* p_llr, int8_t* p_out) {
}
int32_t nrLDPC_decod(t_nrLDPC_dec_params* p_decParams, int8_t* p_llr, int8_t* p_out, t_nrLDPC_procBuf* p_procBuf, t_nrLDPC_time_stats* p_profiler) int32_t nrLDPC_decod(t_nrLDPC_dec_params* p_decParams, int8_t* p_llr, int8_t* p_out, t_nrLDPC_procBuf* p_procBuf, t_nrLDPC_time_stats* p_profiler)
{ {
uint32_t numLLR; uint32_t numLLR;
......
This diff is collapsed.
/*
* Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The OpenAirInterface Software Alliance licenses this file to You under
* the OAI Public License, Version 1.0 (the "License"); you may not use this file
* except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.openairinterface.org/?page_id=698
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*-------------------------------------------------------------------------------
* For more information about the OpenAirInterface (OAI) Software Alliance:
* contact@openairinterface.org
*/
/*! \file PHY/CODING/nrLDPC_decoder_kernels_CL.cl
* \brief kernel functions for ldpc decoder accelerated via openCL
* \author Francois TABURET
* \date 2021
* \version 1.0
* \company Nokia BellLabs France
* \email: francois.taburet@nokia-bell-labs.com
* \note initial implem - translation of cuda version
* \warning
*/
#define define MAX_ITERATION 2
#define MC 1
#define INT32_MAX 2147483647
typedef struct{
char x;
char y;
short value;
} h_element;
//__global char dev_dt [46*68*384];
//__local char *dev_t;
//__global char dev_llr[68*384];
//__global unsigned char dev_tmp[68*384];
//__constant h_element dev_h_compact1[46*19] = {}; // used in kernel 1
//__constant h_element dev_h_compact2[68*30] = {}; // used in kernel 2
// __device__ __constantant__ h_element dev_h_compact1[46*19]; // used in kernel 1
// __device__ __constantant__ h_element dev_h_compact2[68*30]; // used in kernel 2
// row and col element count
__constant char h_ele_row_bg1_count[46] = {
19, 19, 19, 19, 3, 8, 9, 7, 10, 9,
7, 8, 7, 6, 7, 7, 6, 6, 6, 6,
6, 6, 5, 5, 6, 5, 5, 4, 5, 5,
5, 5, 5, 5, 5, 5, 5, 4, 5, 5,
4, 5, 4, 5, 5, 4};
__constant char h_ele_col_bg1_count[68] = {
30, 28, 7, 11, 9, 4, 8, 12, 8, 7,
12, 10, 12, 11, 10, 7, 10, 10, 13, 7,
8, 11, 12, 5, 6, 6, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1};
__constant char h_ele_row_bg2_count[42] = {
8, 10, 8, 10, 4, 6, 6, 6, 4, 5,
5, 5, 4, 5, 5, 4, 5, 5, 4, 4,
4, 4, 3, 4, 4, 3, 5, 3, 4, 3,
5, 3, 4, 4, 4, 4, 4, 3, 4, 4,
4, 4};
__constant char h_ele_col_bg2_count[52] = {
22, 23, 10, 5, 5, 14, 7, 13, 6, 8,
9, 16, 9, 12, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1};
// Kernel 1
__kernel void ldpc_cnp_kernel_1st_iter( __global char * dev_llr, __global char * dev_dt, __local h_element *dev_h_compact1, int BG, int row, int col, int Zc)
{
// int iMCW = blockIdx.y; // codeword id
// int iBlkRow = blockIdx.x; // block row in h_base
// int iSubRow = threadIdx.x; // row index in sub_block of h_base
// if(blockIdx.x == 0 && threadIdx.x == 1) printf("cnp %d\n", threadIdx.x);
int iMCW = get_group_id(1); // codeword id
int iBlkRow = get_group_id(0); // block row in h_base
int iBlkCol; // block col in h_base
int iSubRow = get_local_id(0);; // row index in sub_block of h_base
int iCol; // overall col index in h_base
int offsetR;
int shift_t;
// For 2-min algorithm.
int Q_sign = 0;
int sq;
int Q, Q_abs;
int R_temp;
int sign = 1;
int rmin1 = INT32_MAX;
int rmin2 = INT32_MAX;
char idx_min = 0;
h_element h_element_t;
int s = (BG==1)? h_ele_row_bg1_count[iBlkRow]:h_ele_row_bg2_count[iBlkRow];
offsetR = (iMCW * row*col*Zc) + iBlkRow * Zc + iSubRow; // row*col*Zc = size of dev_dt
// if(blockIdx.x == 0 && threadIdx.x == 1) printf("s: %d, offset %d\n", s, offsetR);
// The 1st recursion
for(int i = 0; i < s; i++) // loop through all the ZxZ sub-blocks in a row
{
h_element_t = dev_h_compact1[i*row+iBlkRow]; // compact_col == row
iBlkCol = h_element_t.y;
shift_t = h_element_t.value;
shift_t = (iSubRow + shift_t) % Zc;
iCol = (iMCW * col*Zc) + iBlkCol * Zc + shift_t; // col*Zc = size of llr
Q = dev_llr[iCol];
Q_abs = (Q>0)? Q : -Q;
sq = Q < 0;
// if(blockIdx.x == 0 && threadIdx.x == 1) printf("i %d, icol %d, Q: %d\n", i, iCol, Q);
// quick version
sign = sign * (1 - sq * 2);
Q_sign |= sq << i;
if (Q_abs < rmin1){
rmin2 = rmin1;
rmin1 = Q_abs;
idx_min = i;
} else if (Q_abs < rmin2){
rmin2 = Q_abs;
}
}
// if(blockIdx.x == 0 && threadIdx.x == 1)printf("min1 %d, min2 %d, min1_idx %d\n", rmin1, rmin2, idx_min);
// The 2nd recursion
for(int i = 0; i < s; i++){
// v0: Best performance so far. 0.75f is the value of alpha.
sq = 1 - 2 * ((Q_sign >> i) & 0x01);
R_temp = 0.75f * sign * sq * (i != idx_min ? rmin1 : rmin2);
// write results to global memory
h_element_t = dev_h_compact1[i*row+iBlkRow];
int addr_temp = offsetR + h_element_t.y * row * Zc;
dev_dt[addr_temp] = R_temp;
// if(blockIdx.x == 0 && threadIdx.x == 1)printf("R_temp %d, temp_addr %d\n", R_temp, addr_temp);
}
}
// Kernel_1
__kernel void ldpc_cnp_kernel( __global char * dev_llr, __global char * dev_dt, __local h_element *dev_h_compact1, int BG, int row, int col, int Zc)
{
// if(blockIdx.x == 0 && threadIdx.x == 1) printf("cnp\n");
// int iMCW = blockIdx.y;
// int iBlkRow = blockIdx.x; // block row in h_base // block col in h_base
// int iSubRow = threadIdx.x; // row index in sub_block of h_base
int iMCW = get_group_id(1);
int iBlkRow = get_group_id(0); // block row in h_base
int iBlkCol; // block col in h_base
int iSubRow = get_local_id(0);; // row index in sub_block of h_base
int iCol; // overall col index in h_base
int offsetR;
int shift_t;
// For 2-min algorithm.
int Q_sign = 0;
int sq;
int Q, Q_abs;
int R_temp;
int sign = 1;
int rmin1 = INT32_MAX;
int rmin2 = INT32_MAX;
char idx_min = 0;
h_element h_element_t;
int s = (BG==1)? h_ele_row_bg1_count[iBlkRow]: h_ele_row_bg2_count[iBlkRow];
offsetR = (iMCW *row*col*Zc) + iBlkRow * Zc + iSubRow; // row * col * Zc = size of dev_dt
// if(blockIdx.x == 0 && threadIdx.x == 1) printf("s: %d, offset %d\n", s, offsetR);
// The 1st recursion
for(int i = 0; i < s; i++) // loop through all the ZxZ sub-blocks in a row
{
h_element_t = dev_h_compact1[i*row+iBlkRow];
iBlkCol = h_element_t.y;
shift_t = h_element_t.value;
shift_t = (iSubRow + shift_t) % Zc;
iCol = iBlkCol * Zc + shift_t;
R_temp = dev_dt[offsetR + iBlkCol * row * Zc];
Q = dev_llr[iMCW * (col*Zc) + iCol] - R_temp;
Q_abs = (Q>0)? Q : -Q;
// if(blockIdx.x == 0 && threadIdx.x == 1) printf("i %d, icol %d, Q: %d\n", i, iCol, Q);
sq = Q < 0;
sign = sign * (1 - sq * 2);
Q_sign |= sq << i;
if (Q_abs < rmin1){
rmin2 = rmin1;
rmin1 = Q_abs;
idx_min = i;
} else if (Q_abs < rmin2){
rmin2 = Q_abs;
}
}
// if(blockIdx.x == 0 && threadIdx.x == 1)printf("min1 %d, min2 %d, min1_idx %d\n", rmin1, rmin2, idx_min);
// The 2nd recursion
for(int i = 0; i < s; i ++){
sq = 1 - 2 * ((Q_sign >> i) & 0x01);
R_temp = 0.75f * sign * sq * (i != idx_min ? rmin1 : rmin2);
// write results to global memory
h_element_t = dev_h_compact1[i*row+iBlkRow];
int addr_temp = h_element_t.y * row * Zc + offsetR;
dev_dt[addr_temp] = R_temp;
// if(blockIdx.x == 0 && threadIdx.x == 1)printf("R_temp %d, temp_addr %d\n", R_temp, addr_temp);
}
}
// Kernel 2: VNP processing
__kernel void
ldpc_vnp_kernel_normal(__global char * dev_llr, __global char * dev_dt, __global char * dev_const_llr, __local h_element *dev_h_compact2, int BG, int row, int col, int Zc)
{
// int iMCW = blockIdx.y;
// int iBlkCol = blockIdx.x;
// int iSubCol = threadIdx.x;
int iMCW = get_group_id(1);
int iBlkCol = get_group_id(0);
int iBlkRow;
int iSubCol = get_local_id(0);
int iRow;
int iCol;
int shift_t, sf;
int APP;
h_element h_element_t;
// update all the llr values
iCol = iBlkCol * Zc + iSubCol;
APP = dev_const_llr[iMCW *col*Zc + iCol];
int offsetDt = iMCW *row*col*Zc + iBlkCol * row * Zc;
int s = (BG==1)? h_ele_col_bg1_count[iBlkCol]:h_ele_col_bg2_count[iBlkCol];
for(int i = 0; i < s; i++)
{
h_element_t = dev_h_compact2[i*col+iBlkCol];
shift_t = h_element_t.value%Zc;
iBlkRow = h_element_t.x;
sf = iSubCol - shift_t;
sf = (sf + Zc) % Zc;
iRow = iBlkRow * Zc + sf;
APP = APP + dev_dt[offsetDt + iRow];
}
if(APP > SCHAR_MAX) APP = SCHAR_MAX;
if(APP < SCHAR_MIN) APP = SCHAR_MIN;
// write back to device global memory
dev_llr[iMCW *col*Zc + iCol] = APP;
}
__kernel void pack_decoded_bit(__global unsigned char * dev_llr, __global unsigned char * dev_tmp, int col, int Zc)
{
// int iMCW = blockIdx.y;
// int btid = threadIdx.x;
unsigned char tmp[128];
int iMCW = get_group_id(1);
int btid = get_local_id(0);
int tid = iMCW * col*Zc + get_group_id(0)*128 + btid;
tmp[btid] = 0;
if(dev_llr[tid] < 0){
tmp[btid] = 1 << (7-(btid&7));
}
// __syncthreads();
if(btid < 16){
dev_tmp[iMCW * col*Zc + get_group_id(0)*16+btid] = 0;
for(int i = 0; i < 8; i++){
dev_tmp[iMCW * col*Zc + get_group_id(0)*16+btid] += tmp[btid*8+i];
}
}
}
...@@ -70,6 +70,7 @@ typedef struct nrLDPC_dec_params { ...@@ -70,6 +70,7 @@ typedef struct nrLDPC_dec_params {
uint16_t Z; /**< Lifting size */ uint16_t Z; /**< Lifting size */
uint8_t R; /**< Decoding rate: Format 15,13,... for code rates 1/5, 1/3,... */ uint8_t R; /**< Decoding rate: Format 15,13,... for code rates 1/5, 1/3,... */
uint8_t numMaxIter; /**< Maximum number of iterations */ uint8_t numMaxIter; /**< Maximum number of iterations */
int block_length;
e_nrLDPC_outMode outMode; /**< Output format */ e_nrLDPC_outMode outMode; /**< Output format */
} t_nrLDPC_dec_params; } t_nrLDPC_dec_params;
......
...@@ -7,14 +7,14 @@ ...@@ -7,14 +7,14 @@
* \note * \note
* \warning * \warning
*/ */
#include <iostream>
#include <stdio.h> #include <stdio.h>
#include <unistd.h> #include <unistd.h>
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <cuda.h> #include <cuda.h>
#include "PHY/CODING/nrLDPC_decoder/nrLDPC_types.h" #include "PHY/CODING/nrLDPC_decoder/nrLDPC_types.h"
#include "PHY/CODING/nrLDPC_decoder/nrLDPCdecoder_defs.h" #include "PHY/CODING/nrLDPC_decoder/nrLDPCdecoder_defs.h"
#include "assertions.h"
#include "bgs/BG1_I0" #include "bgs/BG1_I0"
#include "bgs/BG1_I1" #include "bgs/BG1_I1"
#include "bgs/BG1_I2" #include "bgs/BG1_I2"
...@@ -462,10 +462,11 @@ void read_BG(int BG, int *h, int row, int col) ...@@ -462,10 +462,11 @@ void read_BG(int BG, int *h, int row, int col)
} }
extern "C" extern "C"
void init_LLR_DMA_for_CUDA(t_nrLDPC_dec_params* p_decParams, int8_t* p_llr, int8_t* p_out, int block_length){ void init_LLR_DMA(t_nrLDPC_dec_params* p_decParams, int8_t* p_llr, int8_t* p_out){
uint16_t Zc = p_decParams->Z; uint16_t Zc = p_decParams->Z;
uint8_t BG = p_decParams->BG; uint8_t BG = p_decParams->BG;
int block_length = p_decParams->block_length;
uint8_t row,col; uint8_t row,col;
if(BG == 1){ if(BG == 1){
row = 46; row = 46;
...@@ -483,14 +484,60 @@ void init_LLR_DMA_for_CUDA(t_nrLDPC_dec_params* p_decParams, int8_t* p_llr, int8 ...@@ -483,14 +484,60 @@ void init_LLR_DMA_for_CUDA(t_nrLDPC_dec_params* p_decParams, int8_t* p_llr, int8
} }
using namespace std ;
/* from here: entry points in decoder shared lib */
extern "C" extern "C"
int32_t nrLDPC_decoder_LYC(t_nrLDPC_dec_params* p_decParams, int8_t* p_llr, int8_t* p_out, int block_length, time_stats_t *time_decoder) int ldpc_autoinit(void) { // called by the library loader
{ /*int devices = 0;
cudaError_t err = cudaGetDeviceCount(&devices);
AssertFatal(devices>0,"\nNo cuda GPU found\n\n");
const int kb = 1024;
const int mb = kb * kb;
wcout << "NBody.GPU" << endl << "=========" << endl << endl;
wcout << "CUDA version: v" << CUDART_VERSION << endl;
wcout << "CUDA Devices: " << endl << endl;
for(int i = 0; i < devices; ++i)
{
cudaDeviceProp props;
cudaGetDeviceProperties(&props, i);
wcout << i << ": " << props.name << ": " << props.major << "." << props.minor << endl;
wcout << " Global memory: " << props.totalGlobalMem / mb << "mb" << endl;
wcout << " Shared memory: " << props.sharedMemPerBlock / kb << "kb" << endl;
wcout << " Constant memory: " << props.totalConstMem / kb << "kb" << endl;
wcout << " Block registers: " << props.regsPerBlock << endl << endl;
wcout << " Warp size: " << props.warpSize << endl;
wcout << " Threads per block: " << props.maxThreadsPerBlock << endl;
wcout << " Max block dimensions: [ " << props.maxThreadsDim[0] << ", " << props.maxThreadsDim[1] << ", " << props.maxThreadsDim[2] << " ]" << endl;
wcout << " Max grid dimensions: [ " << props.maxGridSize[0] << ", " << props.maxGridSize[1] << ", " << props.maxGridSize[2] << " ]" << endl;
wcout << endl;
}
*/
warmup_for_GPU();
return 0;
}
extern "C"
void nrLDPC_initcall(t_nrLDPC_dec_params* p_decParams, int8_t* p_llr, int8_t* p_out) {
set_compact_BG(p_decParams->Z,p_decParams->BG);
init_LLR_DMA(p_decParams, p_llr, p_out);
}
extern "C"
int32_t nrLDPC_decod(t_nrLDPC_dec_params* p_decParams, int8_t* p_llr, int8_t* p_out,t_nrLDPC_procBuf* p_procBuf, t_nrLDPC_time_stats *time_decoder)
{
uint16_t Zc = p_decParams->Z; uint16_t Zc = p_decParams->Z;
uint8_t BG = p_decParams->BG; uint8_t BG = p_decParams->BG;
uint8_t numMaxIter = p_decParams->numMaxIter; uint8_t numMaxIter = p_decParams->numMaxIter;
int block_length = p_decParams->block_length;
e_nrLDPC_outMode outMode = p_decParams->outMode; e_nrLDPC_outMode outMode = p_decParams->outMode;
cudaError_t cudaStatus; cudaError_t cudaStatus;
uint8_t row,col; uint8_t row,col;
......
...@@ -45,6 +45,7 @@ typedef struct { ...@@ -45,6 +45,7 @@ typedef struct {
time_stats_t *toutput; time_stats_t *toutput;
}encoder_implemparams_t; }encoder_implemparams_t;
#define INIT0_LDPCIMPLEMPARAMS {0,0,0,NULL,NULL,NULL,NULL} #define INIT0_LDPCIMPLEMPARAMS {0,0,0,NULL,NULL,NULL,NULL}
typedef void(*nrLDPC_initcallfunc_t)(t_nrLDPC_dec_params* p_decParams, int8_t* p_llr, int8_t* p_out);
typedef int(*nrLDPC_encoderfunc_t)(unsigned char **,unsigned char **,int,int,short, short, encoder_implemparams_t*); typedef int(*nrLDPC_encoderfunc_t)(unsigned char **,unsigned char **,int,int,short, short, encoder_implemparams_t*);
//============================================================================================================================ //============================================================================================================================
// decoder interface // decoder interface
...@@ -56,4 +57,4 @@ typedef int(*nrLDPC_encoderfunc_t)(unsigned char **,unsigned char **,int,int,sho ...@@ -56,4 +57,4 @@ typedef int(*nrLDPC_encoderfunc_t)(unsigned char **,unsigned char **,int,int,sho
\param p_profiler LDPC profiler statistics \param p_profiler LDPC profiler statistics
*/ */
typedef int32_t(*nrLDPC_decoderfunc_t)(t_nrLDPC_dec_params* , int8_t*, int8_t* , t_nrLDPC_procBuf* , t_nrLDPC_time_stats* ); typedef int32_t(*nrLDPC_decoderfunc_t)(t_nrLDPC_dec_params* , int8_t*, int8_t* , t_nrLDPC_procBuf* , t_nrLDPC_time_stats* );
#endif #endif
\ No newline at end of file
...@@ -23,13 +23,16 @@ ...@@ -23,13 +23,16 @@
#ifdef LDPC_LOADER #ifdef LDPC_LOADER
nrLDPC_decoderfunc_t nrLDPC_decoder; nrLDPC_decoderfunc_t nrLDPC_decoder;
nrLDPC_encoderfunc_t nrLDPC_encoder; nrLDPC_encoderfunc_t nrLDPC_encoder;
nrLDPC_initcallfunc_t nrLDPC_initcall;
#else #else
/* functions to load the LDPC shared lib, implemented in openair1/PHY/CODING/nrLDPC_load.c */ /* functions to load the LDPC shared lib, implemented in openair1/PHY/CODING/nrLDPC_load.c */
extern int load_nrLDPClib(void) ; extern int load_nrLDPClib(char *version) ;
extern int load_nrLDPClib_ref(char *libversion, nrLDPC_encoderfunc_t * nrLDPC_encoder_ptr); // for ldpctest extern int load_nrLDPClib_ref(char *libversion, nrLDPC_encoderfunc_t * nrLDPC_encoder_ptr); // for ldpctest
/* ldpc coder/decoder functions, as loaded by load_nrLDPClib(). */ /* ldpc coder/decoder functions, as loaded by load_nrLDPClib(). */
extern nrLDPC_initcallfunc_t nrLDPC_initcall;
extern nrLDPC_decoderfunc_t nrLDPC_decoder; extern nrLDPC_decoderfunc_t nrLDPC_decoder;
extern nrLDPC_encoderfunc_t nrLDPC_encoder; extern nrLDPC_encoderfunc_t nrLDPC_encoder;
// inline functions: // inline functions:
#include "openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_init_mem.h" #include "openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_init_mem.h"
#endif #endif
\ No newline at end of file
...@@ -42,22 +42,32 @@ ...@@ -42,22 +42,32 @@
/* function description array, to be used when loading the encoding/decoding shared lib */ /* function description array, to be used when loading the encoding/decoding shared lib */
static loader_shlibfunc_t shlib_fdesc[2]; static loader_shlibfunc_t shlib_fdesc[3];
char *arg[64]={"ldpctest","-O","cmdlineonly::dbgl0"}; /* arguments used when called from phy simulators exec's which do not use the config module */
/* arg is used to initialize the config module so that the loader works as expected */
char *arg[64]={"ldpctest","-O","cmdlineonly::dbgl0",NULL,NULL};
int load_nrLDPClib(void) { int load_nrLDPClib(char *version) {
char *ptr = (char*)config_get_if(); char *ptr = (char*)config_get_if();
char libname[64]="ldpc";
if ( ptr==NULL ) {// phy simulators, config module possibly not loaded if ( ptr==NULL ) {// phy simulators, config module possibly not loaded
load_configmodule(3,(char **)arg,CONFIG_ENABLECMDLINEONLY) ; load_configmodule(0,(char **)NULL,CONFIG_ENABLECMDLINEONLY) ;
logInit(); logInit();
} }
shlib_fdesc[0].fname = "nrLDPC_decod"; shlib_fdesc[0].fname = "nrLDPC_decod";
shlib_fdesc[1].fname = "nrLDPC_encod"; shlib_fdesc[1].fname = "nrLDPC_encod";
int ret=load_module_shlib("ldpc",shlib_fdesc,sizeof(shlib_fdesc)/sizeof(loader_shlibfunc_t),NULL); shlib_fdesc[2].fname = "nrLDPC_initcall";
int ret;
if (version)
ret=load_module_version_shlib(libname,version,shlib_fdesc,sizeof(shlib_fdesc)/sizeof(loader_shlibfunc_t),NULL);
else
ret=load_module_shlib(libname,shlib_fdesc,sizeof(shlib_fdesc)/sizeof(loader_shlibfunc_t),NULL);
AssertFatal( (ret >= 0),"Error loading ldpc decoder"); AssertFatal( (ret >= 0),"Error loading ldpc decoder");
nrLDPC_decoder = (nrLDPC_decoderfunc_t)shlib_fdesc[0].fptr; nrLDPC_decoder = (nrLDPC_decoderfunc_t)shlib_fdesc[0].fptr;
nrLDPC_encoder = (nrLDPC_encoderfunc_t)shlib_fdesc[1].fptr; nrLDPC_encoder = (nrLDPC_encoderfunc_t)shlib_fdesc[1].fptr;
nrLDPC_initcall = (nrLDPC_initcallfunc_t)shlib_fdesc[2].fptr;
return 0; return 0;
} }
...@@ -65,10 +75,8 @@ int load_nrLDPClib_ref(char *libversion, nrLDPC_encoderfunc_t * nrLDPC_encoder_p ...@@ -65,10 +75,8 @@ int load_nrLDPClib_ref(char *libversion, nrLDPC_encoderfunc_t * nrLDPC_encoder_p
loader_shlibfunc_t shlib_encoder_fdesc; loader_shlibfunc_t shlib_encoder_fdesc;
shlib_encoder_fdesc.fname = "nrLDPC_encod"; shlib_encoder_fdesc.fname = "nrLDPC_encod";
char libpath[64]; int ret=load_module_version_shlib("ldpc",libversion,&shlib_encoder_fdesc,1,NULL);
sprintf(libpath,"ldpc%s",libversion); AssertFatal( (ret >= 0),"Error loading ldpc encoder %s\n",(libversion==NULL)?"":libversion);
int ret=load_module_shlib(libpath,&shlib_encoder_fdesc,1,NULL);
AssertFatal( (ret >= 0),"Error loading ldpc encoder %s\n",libpath);
*nrLDPC_encoder_ptr = (nrLDPC_encoderfunc_t)shlib_encoder_fdesc.fptr; *nrLDPC_encoder_ptr = (nrLDPC_encoderfunc_t)shlib_encoder_fdesc.fptr;
return 0; return 0;
} }
......
...@@ -108,7 +108,7 @@ int phy_init_nr_gNB(PHY_VARS_gNB *gNB, ...@@ -108,7 +108,7 @@ int phy_init_nr_gNB(PHY_VARS_gNB *gNB,
crcTableInit(); crcTableInit();
init_scrambling_luts(); init_scrambling_luts();
init_pucch2_luts(); init_pucch2_luts();
load_nrLDPClib(); load_nrLDPClib(NULL);
// PBCH DMRS gold sequences generation // PBCH DMRS gold sequences generation
nr_init_pbch_dmrs(gNB); nr_init_pbch_dmrs(gNB);
//PDCCH DMRS init //PDCCH DMRS init
......
...@@ -497,6 +497,8 @@ uint32_t nr_dlsch_decoding(PHY_VARS_NR_UE *phy_vars_ue, ...@@ -497,6 +497,8 @@ uint32_t nr_dlsch_decoding(PHY_VARS_NR_UE *phy_vars_ue,
} }
VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_DLSCH_LDPC, VCD_FUNCTION_IN); VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_DLSCH_LDPC, VCD_FUNCTION_IN);
p_decParams->block_length=length_dec;
nrLDPC_initcall(p_decParams, (int8_t*)&pl[0], llrProcBuf);
no_iteration_ldpc = nrLDPC_decoder(p_decParams, no_iteration_ldpc = nrLDPC_decoder(p_decParams,
(int8_t *)&pl[0], (int8_t *)&pl[0],
llrProcBuf, llrProcBuf,
...@@ -956,7 +958,8 @@ uint32_t nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue, ...@@ -956,7 +958,8 @@ uint32_t nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue,
for (i=0, j=0; j < ((kc*harq_process->Z)>>4)+1; i+=2, j++) { for (i=0, j=0; j < ((kc*harq_process->Z)>>4)+1; i+=2, j++) {
pl[j] = _mm_packs_epi16(pv[i],pv[i+1]); pl[j] = _mm_packs_epi16(pv[i],pv[i+1]);
} }
p_decParams->block_length=length_dec;
nrLDPC_initcall(p_decParams, (int8_t*)&pl[0], llrProcBuf);
no_iteration_ldpc = nrLDPC_decoder(p_decParams, no_iteration_ldpc = nrLDPC_decoder(p_decParams,
(int8_t *)&pl[0], (int8_t *)&pl[0],
llrProcBuf, llrProcBuf,
...@@ -1340,7 +1343,8 @@ void nr_dlsch_decoding_process(void *arg) { ...@@ -1340,7 +1343,8 @@ void nr_dlsch_decoding_process(void *arg) {
for (i=0, j=0; j < ((kc*harq_process->Z)>>4)+1; i+=2, j++) { for (i=0, j=0; j < ((kc*harq_process->Z)>>4)+1; i+=2, j++) {
pl[j] = _mm_packs_epi16(pv[i],pv[i+1]); pl[j] = _mm_packs_epi16(pv[i],pv[i+1]);
} }
p_decParams->block_length=length_dec;
nrLDPC_initcall(p_decParams, (int8_t*)&pl[0], llrProcBuf);
no_iteration_ldpc = nrLDPC_decoder(p_decParams, no_iteration_ldpc = nrLDPC_decoder(p_decParams,
(int8_t *)&pl[0], (int8_t *)&pl[0],
llrProcBuf, llrProcBuf,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment