diff --git a/cmake_targets/CMakeLists.txt b/cmake_targets/CMakeLists.txt index d154910828bc37671aa8d0136c39e45fa217b1ac..cf7b1543d39958984016bf528f816c0c8d197f4f 100644 --- a/cmake_targets/CMakeLists.txt +++ b/cmake_targets/CMakeLists.txt @@ -309,6 +309,7 @@ endif() # # add autotools definitions that were maybe used! + add_definitions("-DSTDC_HEADERS=1 -DHAVE_SYS_TYPES_H=1 -DHAVE_SYS_STAT_H=1 -DHAVE_STDLIB_H=1 -DHAVE_STRING_H=1 -DHAVE_MEMORY_H=1 -DHAVE_STRINGS_H=1 -DHAVE_INTTYPES_H=1 -DHAVE_STDINT_H=1 -DHAVE_UNISTD_H=1 -DHAVE_FCNTL_H=1 -DHAVE_ARPA_INET_H=1 -DHAVE_SYS_TIME_H=1 -DHAVE_SYS_SOCKET_H=1 -DHAVE_STRERROR=1 -DHAVE_SOCKET=1 -DHAVE_MEMSET=1 -DHAVE_GETTIMEOFDAY=1 -DHAVE_STDLIB_H=1 -DHAVE_MALLOC=1 -DHAVE_LIBSCTP") set(commonOpts "-pipe -Wno-packed-bitfield-compat -fPIC -Wall -fno-strict-aliasing -rdynamic") @@ -318,11 +319,7 @@ set(CMAKE_C_FLAGS set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${C_FLAGS_PROCESSOR} ${commonOpts} -std=c++11") -# cuda compiler bug (limitation) on complex macro definition -if (CUDA_FOUND) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DCUDA_FLAG") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DCUDA_FLAG") -endif() + if (SANITIZE_ADDRESS) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -fno-common") @@ -3123,7 +3120,11 @@ target_link_libraries (nr-uesoftmodem ${LIB_LMS_LIBRARIES}) target_link_libraries (nr-uesoftmodem ${T_LIB}) add_dependencies( nr-uesoftmodem ldpc_orig ldpc_optim ldpc_optim8seg ldpc ) - +if (CUDA_FOUND) + add_dependencies( nr-uesoftmodem ldpc_cuda) + add_dependencies( nr-softmodem ldpc_cuda) + add_dependencies( ocp-gnb ldpc_cuda) +endif (CUDA_FOUND) ###################################" # Addexecutables for tests #################################### @@ -3183,41 +3184,14 @@ target_link_libraries(smallblocktest m pthread ${ATLAS_LIBRARIES} dl ) -if (CUDA_FOUND) -################################################### -# For CUDA library -################################################### - - - cuda_add_executable(ldpctest - ${OPENAIR1_DIR}/PHY/CODING/TESTBENCH/ldpctest.c - ${T_SOURCE} - ${SHLIB_LOADER_SOURCES} - ) - target_link_libraries(ldpctest -ldl - -Wl,--start-group - UTIL SIMU PHY_NR CONFIG_LIB - -Wl,--end-group - m pthread ${ATLAS_LIBRARIES} dl - ) - -else (CUDA_FOUND) - add_executable(ldpctest - ${PHY_NR_CODINGIF} - ${OPENAIR1_DIR}/PHY/CODING/TESTBENCH/ldpctest.c - ${T_SOURCE} - ${SHLIB_LOADER_SOURCES} - ) - -endif () +add_executable(ldpctest + ${PHY_NR_CODINGIF} + ${OPENAIR1_DIR}/PHY/CODING/TESTBENCH/ldpctest.c + ${T_SOURCE} + ${SHLIB_LOADER_SOURCES} + ) -# add_executable(ldpctest - # ${PHY_NR_CODINGIF} - # ${OPENAIR1_DIR}/PHY/CODING/TESTBENCH/ldpctest.c - # ${T_SOURCE} - # ${SHLIB_LOADER_SOURCES} - # ) add_dependencies( ldpctest ldpc_orig ldpc_optim ldpc_optim8seg ldpc ) target_link_libraries(ldpctest diff --git a/openair1/PHY/CODING/nrLDPC_decoder_LYC/nrLDPC_decoder_LYC.cu b/openair1/PHY/CODING/nrLDPC_decoder_LYC/nrLDPC_decoder_LYC.cu index 931d5003385af8b4144fdbef244ed2176272a90e..42faa1e27a628a276ece456995b2511435149814 100644 --- a/openair1/PHY/CODING/nrLDPC_decoder_LYC/nrLDPC_decoder_LYC.cu +++ b/openair1/PHY/CODING/nrLDPC_decoder_LYC/nrLDPC_decoder_LYC.cu @@ -483,6 +483,52 @@ void init_LLR_DMA_for_CUDA(t_nrLDPC_dec_params* p_decParams, int8_t* p_llr, int8 } +using namespace std ; + +/* from here: entry points in decoder shared lib */ +extern "C" +int ldpc_autoinit(void) { // called by the library loader +int devices = 0; + + cudaError_t err = cudaGetDeviceCount(&devices); + AssertFatal(devices>0,"\nNo cuda GPU found\n\n"); + + const int kb = 1024; + const int mb = kb * kb; + wcout << "NBody.GPU" << endl << "=========" << endl << endl; + + wcout << "CUDA version: v" << CUDART_VERSION << endl; + + + wcout << "CUDA Devices: " << endl << endl; + + for(int i = 0; i < devices; ++i) + { + cudaDeviceProp props; + cudaGetDeviceProperties(&props, i); + wcout << i << ": " << props.name << ": " << props.major << "." << props.minor << endl; + wcout << " Global memory: " << props.totalGlobalMem / mb << "mb" << endl; + wcout << " Shared memory: " << props.sharedMemPerBlock / kb << "kb" << endl; + wcout << " Constant memory: " << props.totalConstMem / kb << "kb" << endl; + wcout << " Block registers: " << props.regsPerBlock << endl << endl; + + wcout << " Warp size: " << props.warpSize << endl; + wcout << " Threads per block: " << props.maxThreadsPerBlock << endl; + wcout << " Max block dimensions: [ " << props.maxThreadsDim[0] << ", " << props.maxThreadsDim[1] << ", " << props.maxThreadsDim[2] << " ]" << endl; + wcout << " Max grid dimensions: [ " << props.maxGridSize[0] << ", " << props.maxGridSize[1] << ", " << props.maxGridSize[2] << " ]" << endl; + wcout << endl; + } + warmup_for_GPU(); + return 0; +} + +extern "C" +void nrLDPC_initcall(t_nrLDPC_dec_params* p_decParams, int8_t* p_llr, int8_t* p_out) { + set_compact_BG(p_decParams->Z,p_decParams->BG); + init_LLR_DMA(p_decParams, p_llr, p_out); +} + + extern "C" int32_t nrLDPC_decoder_LYC(t_nrLDPC_dec_params* p_decParams, int8_t* p_llr, int8_t* p_out, int block_length, time_stats_t *time_decoder) { diff --git a/openair1/PHY/CODING/nrLDPC_load.c b/openair1/PHY/CODING/nrLDPC_load.c index a74bdf7bd12a73c81ac6654f3e34495194e631a0..677c5d53b0b80f844bb1e677e2dff79fe627f9e2 100644 --- a/openair1/PHY/CODING/nrLDPC_load.c +++ b/openair1/PHY/CODING/nrLDPC_load.c @@ -42,14 +42,25 @@ /* function description array, to be used when loading the encoding/decoding shared lib */ -static loader_shlibfunc_t shlib_fdesc[2]; -char *arg[64]={"ldpctest","-O","cmdlineonly::dbgl0"}; +static loader_shlibfunc_t shlib_fdesc[3]; + +/* arguments used when called from phy simulators exec's which do not use the config module */ +/* arg is used to initialize the config module so that the loader works as expected */ +char *arg[64]={"ldpctest","-O","cmdlineonly::dbgl0",NULL,NULL}; int load_nrLDPClib(void) { char *ptr = (char*)config_get_if(); + char libname[64]="ldpc"; + int argc=3; + if (run_cuda) { + arg[3]="--loader.ldpc.shlibversion"; + argc++; + arg[4]="_cuda"; + argc++; + } if ( ptr==NULL ) {// phy simulators, config module possibly not loaded - load_configmodule(3,(char **)arg,CONFIG_ENABLECMDLINEONLY) ; + load_configmodule(argc,(char **)arg,CONFIG_ENABLECMDLINEONLY) ; logInit(); } shlib_fdesc[0].fname = "nrLDPC_decod"; diff --git a/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_decoding.c b/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_decoding.c index 509e42d663be9ee61815b16bb88b02fe5a0c04ca..816b025d34e8b3e220550b70b5b8213675ab19d1 100644 --- a/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_decoding.c +++ b/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_decoding.c @@ -497,6 +497,7 @@ uint32_t nr_dlsch_decoding(PHY_VARS_NR_UE *phy_vars_ue, } VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_DLSCH_LDPC, VCD_FUNCTION_IN); + nrLDPC_initcall(p_decParams, (int8_t*)&pl[0], llrProcBuf); no_iteration_ldpc = nrLDPC_decoder(p_decParams, (int8_t *)&pl[0], llrProcBuf, @@ -956,7 +957,7 @@ uint32_t nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue, for (i=0, j=0; j < ((kc*harq_process->Z)>>4)+1; i+=2, j++) { pl[j] = _mm_packs_epi16(pv[i],pv[i+1]); } - + nrLDPC_initcall(p_decParams, (int8_t*)&pl[0], llrProcBuf); no_iteration_ldpc = nrLDPC_decoder(p_decParams, (int8_t *)&pl[0], llrProcBuf, @@ -1340,7 +1341,7 @@ void nr_dlsch_decoding_process(void *arg) { for (i=0, j=0; j < ((kc*harq_process->Z)>>4)+1; i+=2, j++) { pl[j] = _mm_packs_epi16(pv[i],pv[i+1]); } - + nrLDPC_initcall(p_decParams, (int8_t*)&pl[0], llrProcBuf); no_iteration_ldpc = nrLDPC_decoder(p_decParams, (int8_t *)&pl[0], llrProcBuf,