Commit 5440d667 authored by Robert Schmidt's avatar Robert Schmidt

Merge remote-tracking branch 'origin/t2-offload-mr' into integration_2023_w48

parents 39468ca9 7d7dced7
......@@ -494,20 +494,21 @@ target_link_libraries(shlib_loader PRIVATE CONFIG_LIB)
##########################################################
# LDPC offload library
# LDPC offload library - AMD T2 Accelerator Card
##########################################################
add_boolean_option(ENABLE_LDPC_T1 OFF "Build support for LDPC Offload to T1 library" OFF)
if (ENABLE_LDPC_T1)
pkg_check_modules(LIBDPDK_T1 REQUIRED libdpdk=20.05.0)
find_library(T1 NAMES rte_pmd_hpac_sdfec_pmd REQUIRED)
if (NOT T1)
message(FATAL_ERROR "Library rte_pmd_hpac_sdfec_pmd for T1 offload not found")
add_boolean_option(ENABLE_LDPC_T2 OFF "Build support for LDPC Offload to T2 library" OFF)
set(ENV{PKG_CONFIG_PATH} "$ENV{PKG_CONFIG_PATH}:/usr/local/lib/x86_64-linux-gnu/pkgconfig")
if (ENABLE_LDPC_T2)
pkg_check_modules(LIBDPDK_T2 REQUIRED libdpdk=20.11.7)
find_library(PMD_T2 NAMES rte_baseband_accl_ldpc HINTS "/opt/dpdk-t2/lib/x86_64-linux-gnu/")
if (NOT PMD_T2)
message(FATAL_ERROR "could not find PMD T2")
endif()
add_library(ldpc_t1 MODULE ${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder_offload.c)
set_target_properties(ldpc_t1 PROPERTIES COMPILE_FLAGS "-DALLOW_EXPERIMENTAL_API")
target_compile_options(ldpc_t1 PRIVATE ${LIBDPDK_T1_CFLAGS})
target_link_libraries(ldpc_t1 ${LIBDPDK_T1_LDFLAGS} ${T1})
message(STATUS "T2 build: use ${PMD_T2}")
add_library(ldpc_t2 MODULE ${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder_offload.c)
set_target_properties(ldpc_t2 PROPERTIES COMPILE_FLAGS "-DALLOW_EXPERIMENTAL_API")
target_link_libraries(ldpc_t2 ${LIBDPDK_T2_LDFLAGS} ${PMD_T2})
endif()
##########################################################
......@@ -819,19 +820,30 @@ set(PHY_LDPC_ORIG_SRC
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder.c
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_encoder/ldpc_encoder.c
)
add_library(ldpc_orig MODULE ${PHY_LDPC_ORIG_SRC} )
target_link_libraries(ldpc_orig PRIVATE ldpc_gen_HEADERS)
set(PHY_LDPC_OPTIM_SRC
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder.c
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_encoder/ldpc_encoder_optim.c
)
)
add_library(ldpc_optim MODULE ${PHY_LDPC_OPTIM_SRC} )
target_link_libraries(ldpc_optim PRIVATE ldpc_gen_HEADERS)
set(PHY_LDPC_OPTIM8SEG_SRC
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder.c
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_encoder/ldpc_encoder_optim8seg.c
)
add_library(ldpc_optim8seg MODULE ${PHY_LDPC_OPTIM8SEG_SRC} )
target_link_libraries(ldpc_optim8seg PRIVATE ldpc_gen_HEADERS)
set(PHY_LDPC_OPTIM8SEGMULTI_SRC
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder.c
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_encoder/ldpc_encoder_optim8segmulti.c
)
)
add_library(ldpc MODULE ${PHY_LDPC_OPTIM8SEGMULTI_SRC} )
target_link_libraries(ldpc PRIVATE ldpc_gen_HEADERS)
set(PHY_LDPC_CUDA_SRC
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder_LYC/nrLDPC_decoder_LYC.cu
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_encoder/ldpc_encoder_optim8segmulti.c
......@@ -845,21 +857,13 @@ add_custom_target( nrLDPC_decoder_kernels_CL
COMMAND gcc ${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder_CL.c -dD -DNRLDPC_KERNEL_SOURCE -E -o ${CMAKE_CURRENT_BINARY_DIR}/nrLDPC_decoder_kernels_CL.clc
SOURCES ${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder_CL.c
)
set(PHY_NR_CODINGIF
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_load.c;
)
add_library(ldpc_orig MODULE ${PHY_LDPC_ORIG_SRC} )
target_link_libraries(ldpc_orig PRIVATE ldpc_gen_HEADERS)
add_library(ldpc_optim MODULE ${PHY_LDPC_OPTIM_SRC} )
target_link_libraries(ldpc_optim PRIVATE ldpc_gen_HEADERS)
add_library(ldpc_optim8seg MODULE ${PHY_LDPC_OPTIM8SEG_SRC} )
target_link_libraries(ldpc_optim8seg PRIVATE ldpc_gen_HEADERS)
add_library(ldpc_cl MODULE ${PHY_LDPC_CL_SRC} )
target_link_libraries(ldpc_cl OpenCL)
add_dependencies(ldpc_cl nrLDPC_decoder_kernels_CL)
set(PHY_NR_CODINGIF
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_load.c
)
##############################################
# Base CUDA setting
......@@ -877,13 +881,6 @@ if (ENABLE_LDPC_CUDA)
endif()
endif()
add_library(ldpc MODULE ${PHY_LDPC_OPTIM8SEGMULTI_SRC} )
target_link_libraries(ldpc PRIVATE ldpc_gen_HEADERS)
add_library(ldpc_parityCheck MODULE ${PHY_LDPC_OPTIM8SEGMULTI_SRC} )
target_compile_definitions(ldpc_parityCheck PUBLIC NR_LDPC_ENABLE_PARITY_CHECK)
target_link_libraries(ldpc_parityCheck PRIVATE ldpc_gen_HEADERS)
add_library(coding MODULE ${PHY_TURBOSRC} )
add_library(dfts MODULE ${OPENAIR1_DIR}/PHY/TOOLS/oai_dfts.c ${OPENAIR1_DIR}/PHY/TOOLS/oai_dfts_neon.c)
......@@ -2090,8 +2087,8 @@ endif()
add_dependencies(nr-softmodem ldpc_orig ldpc_optim ldpc_optim8seg ldpc)
if (ENABLE_LDPC_T1)
add_dependencies(nr-softmodem ldpc_t1)
if (ENABLE_LDPC_T2)
add_dependencies(nr-softmodem ldpc_t2)
endif()
# force the generation of ASN.1 so that we don't need to wait during the build
......@@ -2212,12 +2209,12 @@ add_executable(ldpctest
${OPENAIR1_DIR}/PHY/CODING/TESTBENCH/ldpctest.c
)
add_dependencies( ldpctest ldpc_orig ldpc_optim ldpc_optim8seg ldpc ldpc_parityCheck)
add_dependencies(ldpctest ldpc_orig ldpc_optim ldpc_optim8seg ldpc)
if (ENABLE_LDPC_CUDA)
add_dependencies(ldpctest ldpc_cuda)
endif()
target_link_libraries(ldpctest PRIVATE
-Wl,--start-group UTIL SIMU PHY_NR PHY_COMMON PHY_NR_COMMON -Wl,--end-group
-Wl,--start-group UTIL SIMU PHY_COMMON PHY_NR_COMMON -Wl,--end-group
m pthread dl shlib_loader ${T_LIB}
)
......@@ -2307,8 +2304,8 @@ add_executable(nr_ulsim
${PHY_INTERFACE_DIR}/queue_t.c
)
if (ENABLE_LDPC_T1)
add_dependencies(nr_ulsim ldpc_t1)
if (ENABLE_LDPC_T2)
add_dependencies(nr_ulsim ldpc_t2)
endif()
target_link_libraries(nr_ulsim PRIVATE
......@@ -2376,7 +2373,7 @@ if (${T_TRACER})
PHY_COMMON PHY PHY_UE PHY_NR PHY_NR_COMMON PHY_NR_UE PHY_RU PHY_MEX
L2 L2_LTE L2_NR L2_LTE_NR L2_UE NR_L2_UE L2_UE_LTE_NR MAC_NR_COMMON MAC_UE_NR ngap
CN_UTILS GTPV1U SCTP_CLIENT MME_APP LIB_NAS_UE NB_IoT SIMU SIMU_ETH OPENAIR0_LIB
ldpc_orig ldpc_optim ldpc_optim8seg ldpc dfts config_internals)
ldpc_orig ldpc_optim ldpc_optim8seg ldpc_t2 ldpc_cl ldpc_cuda ldpc dfts config_internals)
if (TARGET ${i})
add_dependencies(${i} generate_T)
endif()
......
......@@ -35,7 +35,7 @@
<mode>TesteNB</mode>
<class>Build_eNB</class>
<desc>Build gNB (USRP)</desc>
<Build_eNB_args>--gNB -w USRP --ninja -c -P --build-lib "ldpc_cuda ldpc_t1" --cmake-opt -DASN1C_EXEC=/opt/asn1c/bin/asn1c</Build_eNB_args>
<Build_eNB_args>--gNB -w USRP --ninja -c -P --build-lib "ldpc_cuda" --cmake-opt -DASN1C_EXEC=/opt/asn1c/bin/asn1c</Build_eNB_args>
<forced_workspace_cleanup>True</forced_workspace_cleanup>
</testCase>
......
<!--
Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The OpenAirInterface Software Alliance licenses this file to You under
the OAI Public License, Version 1.1 (the "License"); you may not use this file
except in compliance with the License.
You may obtain a copy of the License at
http://www.openairinterface.org/?page_id=698
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
For more information about the OpenAirInterface (OAI) Software Alliance:
contact@openairinterface.org
-->
<testCaseList>
<htmlTabRef>test-t1-offload</htmlTabRef>
<htmlTabName>Test T1 Offload</htmlTabName>
<htmlTabIcon>tasks</htmlTabIcon>
<repeatCount>1</repeatCount>
<TestCaseRequestedList>000111 000112 000121 000122 000131 000132 000211 000212 000221 000222 000231 000232</TestCaseRequestedList>
<TestCaseExclusionList></TestCaseExclusionList>
<testCase id="000111">
<class>Run_LDPCt1Test</class>
<desc>Run nr_ulsim with CPU</desc>
<physim_run_args>-n100 -s30 -m5 -r106 -R106 -C10 -P</physim_run_args>
</testCase>
<testCase id="000112">
<class>Run_LDPCt1Test</class>
<desc>Run nr_ulsim with T1 LDPC offload</desc>
<physim_run_args>-n100 -s30 -m5 -r106 -R106 -o -P</physim_run_args>
</testCase>
<testCase id="000121">
<class>Run_LDPCt1Test</class>
<desc>Run nr_ulsim with CPU</desc>
<physim_run_args>-n100 -s30 -m15 -r106 -R106 -C10 -P</physim_run_args>
</testCase>
<testCase id="000122">
<class>Run_LDPCt1Test</class>
<desc>Run nr_ulsim with T1 LDPC offload</desc>
<physim_run_args>-n100 -s30 -m15 -r106 -R106 -o -P</physim_run_args>
</testCase>
<testCase id="000131">
<class>Run_LDPCt1Test</class>
<desc>Run nr_ulsim with CPU</desc>
<physim_run_args>-n100 -s30 -m25 -r106 -R106 -C10 -P</physim_run_args>
</testCase>
<testCase id="000132">
<class>Run_LDPCt1Test</class>
<desc>Run nr_ulsim with T1 LDPC offload</desc>
<physim_run_args>-n100 -s30 -m25 -r106 -R106 -o -P</physim_run_args>
</testCase>
<testCase id="000211">
<class>Run_LDPCt1Test</class>
<desc>Run nr_ulsim with CPU</desc>
<physim_run_args>-n100 -s30 -m5 -r273 -R273 -C10 -P</physim_run_args>
</testCase>
<testCase id="000212">
<class>Run_LDPCt1Test</class>
<desc>Run nr_ulsim with T1 LDPC offload</desc>
<physim_run_args>-n100 -s30 -m5 -r273 -R273 -o -P</physim_run_args>
</testCase>
<testCase id="000221">
<class>Run_LDPCt1Test</class>
<desc>Run nr_ulsim with CPU</desc>
<physim_run_args>-n100 -s30 -m15 -r273 -R273 -C10 -P</physim_run_args>
</testCase>
<testCase id="000222">
<class>Run_LDPCt1Test</class>
<desc>Run nr_ulsim with T1 LDPC offload</desc>
<physim_run_args>-n100 -s30 -m15 -r273 -R273 -o -P</physim_run_args>
</testCase>
<testCase id="000231">
<class>Run_LDPCt1Test</class>
<desc>Run nr_ulsim with CPU</desc>
<physim_run_args>-n100 -s30 -m25 -r273 -R273 -C10 -P</physim_run_args>
</testCase>
<testCase id="000232">
<class>Run_LDPCt1Test</class>
<desc>Run nr_ulsim with T1 LDPC offload</desc>
<physim_run_args>-n100 -s30 -m25 -r273 -R273 -o -P</physim_run_args>
</testCase>
</testCaseList>
......@@ -46,7 +46,7 @@ BUILD_DOXYGEN=0
DISABLE_HARDWARE_DEPENDENCY="False"
CMAKE_BUILD_TYPE="RelWithDebInfo"
CMAKE_CMD="$CMAKE"
OPTIONAL_LIBRARIES="telnetsrv enbscope uescope nrscope nrqtscope ldpc_cuda ldpc_t1 websrv oai_iqplayer"
OPTIONAL_LIBRARIES="telnetsrv enbscope uescope nrscope nrqtscope ldpc_cuda ldpc_t2 websrv oai_iqplayer"
TARGET_LIST=""
function print_help() {
......
......@@ -183,7 +183,6 @@ Some libraries have further dependencies and might not build on every system:
- `enbscope`, `uescope`, `nrscope`: libforms/X
- `nrqtscope`: Qt5
- `ldpc_cuda`: CUDA
- `ldpc_t1`: DPDK and VVDN T1
- `websrv`: npm and others
# Running `cmake` directly
......
<table style="border-collapse: collapse; border: none;">
<tr style="border-collapse: collapse; border: none;">
<td style="border-collapse: collapse; border: none;">
<a href="http://www.openairinterface.org/">
<img src="./images/oai_final_logo.png" alt="" border=3 height=50 width=150>
</img>
</a>
</td>
<td style="border-collapse: collapse; border: none; vertical-align: center;">
<b><font size = "5">OAI T1/T2 LDPC offload</font></b>
</td>
</tr>
</table>
**Table of Contents**
[[_TOC_]]
This documentation aims to provide a tutorial for AMD Xilinx T2 Telco card integration into OAI and its usage.
# Requirements
- bitstream image and PMD driver for the T2 card provided by AccelerComm
- DPDK 20.11.7 with patch from Accelercomm (also tested with DPDK 20.11.3)
- tested on RHEL7.9, RHEL9.2, Ubuntu 20.04, Ubuntu 22.04
# DPDK setup
## DPDK installation
```
# Get DPDK source code
git clone https://github.com/DPDK/dpdk-stable.git ~/dpdk-stable
cd ~/dpdk-stable
git checkout v20.11.7
git apply ~/ACL_BBDEV_DPDK20.11.3_xxx.patch
```
Replace `~/ACL_BBDEV_DPDK20.11.3_xxx.patch` by patch file provided by
Accelercomm.
```
cd ~/dpdk-stable
meson setup build
# meson setup --prefix=/opt/dpdk-t2 build for installation with non-default installation prefix
cd build
ninja
sudo ninja install
sudo ldconfig
```
## DPDK configuration
- load required kernel module
```
sudo modprobe igb_uio
sudo insmod ~/dpdk-stable/kernel/linux/igb_uio/igb_uio.ko
```
- check presence of the card and its PCI addres on the host machine
```
lspci | grep "Xilinx"
```
- bind the card with igb_uio driver
```
sudo python3 ~/dpdk-stable/usertools/dpdk-devbind.py -b igb_uio 41:00.0
```
Replace PCI address of the card *41:00.0* by address detected by *lspci | grep "Xilinx"* command
- hugepages setup (10 x 1GB hugepages)
```
sudo python3 ~/dpdk-stable/usertools/dpdk-hugepages.py -p 1G --setup 10G`
```
*Note: device binding and hugepages setup has to be done after every reboot of
the host machine*
# Modifications in the OAI code
## PMD path specification
Path to the PMD for operating the card is specified in `CMakeLists.txt` file in
*LDPC OFFLOAD library* section. Modify following line based on the location of
the PMD on your system. By deafult, path to the PMD is set to `/opt/dpdk-t2/lib/x86_64-linux-gnu/`.
```
find_library(PMD_T2 NAMES rte_baseband_accl_ldpc HINTS "/opt/dpdk-t2/lib/x86_64-linux-gnu/")
```
## T2 card DPDK initialization
Following lines in `openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder_offload.c` file has to be
modified based on your system requirements. By default, PCI address of the T2 card is set to 41:00.0 and cores 14 and 15 are assigned to the DPDK.
```
char *dpdk_dev = "41:00.0"; //PCI address of the card
char *argv_re[] = {"bbdev", "-a", dpdk_dev, "-l", "14-15", "--file-prefix=b6", "--"};
```
For the DPDK EAL initialization, device is specified by `-a` option and list
of cores to run the DPDK application on is selected by `-l` option. PCI adress of
the T2 card can be detected by `lspci | grep "Xilinx"` command.
# OAI Build
OTA deployment is precisely described in the following tutorial:
- [NR_SA_Tutorial_COTS_UE](https://gitlab.eurecom.fr/oai/openairinterface5g/-/blob/develop/doc/NR_SA_Tutorial_COTS_UE.md)
Instead of section *3.2 Build OAI gNB* from the tutorial, run following commands:
```
# Get openairinterface5g source code
git clone https://gitlab.eurecom.fr/oai/openairinterface5g.git ~/openairinterface5g
cd ~/openairinterface5g
git checkout develop
# Install OAI dependencies
cd ~/openairinterface5g/cmake_targets
./build_oai -I
# Build OAI gNB
cd ~/openairinterface5g
source oaienv
cd cmake_targets
./build_oai -w USRP --ninja --gNB -P -C --build-lib "ldpc_t2"
```
Shared object file *libldpc_t2.so* is created during the compilation. This object is conditionally compiled. Selection of the library to compile is done using *--build-lib ldpc_t2*.
*Required poll mode driver has to be present on the host machine and required DPDK version has to be installed on the host, prior to the build of OAI*
# 5G PHY simulators
## nr_ulsim test
Offload of the channel decoding to the T2 card is in nr_ulsim specified by *-o* option. Example command for running nr_ulsim with LDPC decoding offload to the T2 card:
```
cd ~/openairinterface5g
source oaienv
cd cmake_targets/ran_build/build
sudo ./nr_ulsim -n100 -s20 -m20 -r273 -R273 -o
```
## nr_dlsim test
Offload of the channel encoding to the AMD Xilinx T2 card is in nr_dlsim specified by *-c* option. Example command for running nr_dlsim with LDPC encoding offload to the T2 card:
```
cd ~/openairinterface5g
source oaienv
cd cmake_targets/ran_build/build
sudo ./nr_dlsim -n300 -s30 -R 106 -e 27 -c
```
# OTA test
Offload of the channel encoding and decoding to the AMD Xilinx T2 card is enabled by *--ldpc-offload-enable 1* option.
## Run OAI gNB with USRP B210
```
cd ~/openairinterface5g
source oaienv
cd cmake_targets/ran_build/build
sudo ./nr-softmodem --sa -O ../../../targets/PROJECTS/GENERIC-NR-5GC/CONF/gnb.sa.band78.fr1.106PRB.usrpb210.conf --ldpc-offload-enable 1
```
# Limitations
## AMD Xilinx T2 card
- offload of the LDPC encoding implemented for MCS > 2, OAI CPU encoder is used for MCS =< 2
- functionality of the LDPC encoding and decoding offload verified in OTA SISO setup with USRP N310 and Quectel RM500Q, blocking of the card reported for MIMO setup (2 layers)
*Note: AMD Xilinx T1 Telco card is not supported anymore.*
......@@ -98,7 +98,7 @@
#define CONFIG_HLP_WORKER_CMD "two option for worker 'WORKER_DISABLE' or 'WORKER_ENABLE'\n"
#define CONFIG_HLP_USRP_THREAD "having extra thead for usrp tx\n"
#define CONFIG_HLP_DISABLNBIOT "disable nb-iot, even if defined in config\n"
#define CONFIG_HLP_LDPC_OFFLOAD "enable LDPC offload\n"
#define CONFIG_HLP_LDPC_OFFLOAD "enable LDPC offload to AMD Xilinx T2 card\n"
#define CONFIG_HLP_USRP_ARGS "set the arguments to identify USRP (same syntax as in UHD)\n"
#define CONFIG_HLP_TX_SUBDEV "set the arguments to select tx_subdev (same syntax as in UHD)\n"
#define CONFIG_HLP_RX_SUBDEV "set the arguments to select rx_subdev (same syntax as in UHD)\n"
......
......@@ -428,6 +428,9 @@ static void get_channel_model_mode(configmodule_interface_t *cfg)
int NB_UE_INST = 1;
configmodule_interface_t *uniqCfg = NULL;
// A global var to reduce the changes size
ldpc_interface_t ldpc_interface = {0}, ldpc_interface_offload = {0};
int main( int argc, char **argv ) {
int set_exe_prio = 1;
if (checkIfFedoraDistribution())
......@@ -465,8 +468,8 @@ int main( int argc, char **argv ) {
itti_init(TASK_MAX, tasks_info);
init_opt() ;
load_nrLDPClib(NULL);
load_LDPClib(NULL, &ldpc_interface);
if (ouput_vcd) {
vcd_signal_dumper_init("/tmp/openair_dump_nrUE.vcd");
}
......
This diff is collapsed.
......@@ -133,14 +133,24 @@
#include "bnProc128/nrLDPC_bnProc_BG2_R23_128.h"
#endif
//#define NR_LDPC_ENABLE_PARITY_CHECK
//#define NR_LDPC_PROFILER_DETAIL(a) a
#define NR_LDPC_PROFILER_DETAIL(a)
#include "openair1/PHY/CODING/nrLDPC_extern.h"
#ifdef NR_LDPC_DEBUG_MODE
#include "nrLDPC_tools/nrLDPC_debug.h"
#endif
// decoder interface
/**
\brief LDPC decoder API type definition
\param p_decParams LDPC decoder parameters
\param p_llr Input LLRs
\param p_llrOut Output vector
\param p_profiler LDPC profiler statistics
*/
static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr,
int8_t* p_out,
uint32_t numLLR,
......@@ -148,13 +158,25 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr,
t_nrLDPC_dec_params* p_decParams,
t_nrLDPC_time_stats* p_profiler,
decode_abort_t* ab);
void nrLDPC_initcall(t_nrLDPC_dec_params* p_decParams, int8_t* p_llr, int8_t* p_out) {
int32_t LDPCinit()
{
return 0;
}
int32_t LDPCshutdown()
{
return 0;
}
int32_t nrLDPC_decod(t_nrLDPC_dec_params* p_decParams,
int8_t* p_llr,
int8_t* p_out,
t_nrLDPC_time_stats* p_profiler,
decode_abort_t* ab)
int32_t LDPCdecoder(t_nrLDPC_dec_params* p_decParams,
uint8_t harq_pid,
uint8_t ulsch_id,
uint8_t C,
int8_t* p_llr,
int8_t* p_out,
t_nrLDPC_time_stats* p_profiler,
decode_abort_t* ab)
{
uint32_t numLLR;
t_nrLDPC_lut lut;
......@@ -817,31 +839,30 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr,
#endif
// Parity Check
#ifdef NR_LDPC_ENABLE_PARITY_CHECK
NR_LDPC_PROFILER_DETAIL(start_meas(&p_profiler->cnProcPc));
if (BG == 1)
pcRes = nrLDPC_cnProcPc_BG1(p_lut, cnProcBuf, cnProcBufRes, Z);
else
pcRes = nrLDPC_cnProcPc_BG2(p_lut, cnProcBuf, cnProcBufRes, Z);
NR_LDPC_PROFILER_DETAIL(stop_meas(&p_profiler->cnProcPc));
#else
if (numIter > 2) {
int8_t llrOut[NR_LDPC_MAX_NUM_LLR] __attribute__((aligned(64))) = {0};
int8_t* p_llrOut = outMode == nrLDPC_outMode_LLRINT8 ? p_out : llrOut;
nrLDPC_llrRes2llrOut(p_lut, p_llrOut, llrRes, Z, BG);
if (outMode == nrLDPC_outMode_BIT)
nrLDPC_llr2bitPacked(p_out, p_llrOut, numLLR);
else // if (outMode == nrLDPC_outMode_BITINT8)
nrLDPC_llr2bit(p_out, p_llrOut, numLLR);
if (check_crc((uint8_t*)p_out, p_decParams->block_length, p_decParams->crc_type)) {
LOG_D(PHY, "Segment CRC OK, exiting LDPC decoder\n");
break;
if (!p_decParams->check_crc) {
NR_LDPC_PROFILER_DETAIL(start_meas(&p_profiler->cnProcPc));
if (BG == 1)
pcRes = nrLDPC_cnProcPc_BG1(p_lut, cnProcBuf, cnProcBufRes, Z);
else
pcRes = nrLDPC_cnProcPc_BG2(p_lut, cnProcBuf, cnProcBufRes, Z);
NR_LDPC_PROFILER_DETAIL(stop_meas(&p_profiler->cnProcPc));
} else {
if (numIter > 2) {
int8_t llrOut[NR_LDPC_MAX_NUM_LLR] __attribute__((aligned(64))) = {0};
int8_t* p_llrOut = outMode == nrLDPC_outMode_LLRINT8 ? p_out : llrOut;
nrLDPC_llrRes2llrOut(p_lut, p_llrOut, llrRes, Z, BG);
if (outMode == nrLDPC_outMode_BIT)
nrLDPC_llr2bitPacked(p_out, p_llrOut, numLLR);
else // if (outMode == nrLDPC_outMode_BITINT8)
nrLDPC_llr2bit(p_out, p_llrOut, numLLR);
if (p_decParams->check_crc((uint8_t*)p_out, p_decParams->E, p_decParams->crc_type)) {
LOG_D(PHY, "Segment CRC OK, exiting LDPC decoder\n");
break;
}
}
}
#endif
}
#ifdef NR_LDPC_ENABLE_PARITY_CHECK
{
if (!p_decParams->check_crc) {
int8_t llrOut[NR_LDPC_MAX_NUM_LLR] __attribute__((aligned(64))) = {0};
int8_t* p_llrOut = outMode == nrLDPC_outMode_LLRINT8 ? p_out : llrOut;
// Assign results from processing buffer to output
......@@ -856,7 +877,6 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr,
nrLDPC_llr2bit(p_out, p_llrOut, numLLR);
NR_LDPC_PROFILER_DETAIL(stop_meas(&p_profiler->llr2bit));
}
#endif
return numIter;
}
......
......@@ -47,8 +47,7 @@ typedef struct{
#include <unistd.h>
#include <sys/stat.h>
#include <CL/opencl.h>
#include "PHY/CODING/nrLDPC_decoder/nrLDPC_types.h"
#include "PHY/CODING/nrLDPC_decoder/nrLDPCdecoder_defs.h"
#include "openair1/PHY/CODING/nrLDPC_extern.h"
#include "assertions.h"
#include "common/utils/LOG/log.h"
......@@ -135,9 +134,9 @@ void set_compact_BG(int Zc,short BG){
}
printf("\nZc = %d BG = %d\n",Zc,BG);
ocl.runtime[0].dev_h_compact1 = clCreateBuffer(ocl.runtime[0].context, CL_MEM_READ_ONLY|CL_MEM_HOST_WRITE_ONLY, memorySize_h_compact1, NULL, (cl_int *)&rt);
AssertFatal(rt == CL_SUCCESS, "Error %d creating buffer dev_h_compact1 for platform %i \n" , (int)rt, 0);
AssertFatal(rt == CL_SUCCESS, "Error %d creating buffer dev_h_compact1 for platform %i \n" , (int)rt, 0);
ocl.runtime[0].dev_h_compact2 = clCreateBuffer(ocl.runtime[0].context, CL_MEM_READ_ONLY|CL_MEM_HOST_WRITE_ONLY, memorySize_h_compact2, NULL, (cl_int *)&rt);
AssertFatal(rt == CL_SUCCESS, "Error %d creating buffer dev_h_compact2 for platform %i \n" , (int)rt, 0);
AssertFatal(rt == CL_SUCCESS, "Error %d creating buffer dev_h_compact2 for platform %i \n" , (int)rt, 0);
h_element *h1;
h_element *h2;
switch(lift_index){
......@@ -328,18 +327,25 @@ int ldpc_autoinit(void) { // called by the library loader
return 0;
}
int32_t LDPCshutdown()
{
return 0;
}
void nrLDPC_initcall(t_nrLDPC_dec_params* p_decParams, int8_t* p_llr, int8_t* p_out) {
set_compact_BG(p_decParams->Z,p_decParams->BG);
// init_LLR_DMA(p_decParams, p_llr, p_out);
int32_t LDPCinit()
{
// init_LLR_DMA(p_decParams, p_llr, p_out);
return 0;
}
int32_t nrLDPC_decod(t_nrLDPC_dec_params *p_decParams,
int8_t *p_llr,
int8_t *p_out,
t_nrLDPC_procBuf *p_procBuf,
t_nrLDPC_time_stats *time_decoder,
decode_abort_t *ab)
int32_t LDPCdecoder(t_nrLDPC_dec_params *p_decParams,
uint8_t harq_pid,
uint8_t ulsch_id,
uint8_t C,
int8_t *p_llr,
int8_t *p_out,
t_nrLDPC_time_stats *time_decoder,
decode_abort_t *ab)
{
uint16_t Zc = p_decParams->Z;
uint8_t BG = p_decParams->BG;
......@@ -362,12 +368,14 @@ int32_t nrLDPC_decod(t_nrLDPC_dec_params *p_decParams,
int memorySize_llr = col * Zc * sizeof(char) * MC;
// cudaCheck( cudaMemcpyToSymbol(dev_const_llr, p_llr, memorySize_llr_cuda) );
// cudaCheck( cudaMemcpyToSymbol(dev_llr, p_llr, memorySize_llr_cuda) );
int rt = clEnqueueWriteBuffer(ocl.runtime[0].queue[0], ocl.runtime[0].dev_const_llr, CL_TRUE, 0,
memorySize_llr, p_llr, 0, NULL, NULL);
AssertFatal(rt == CL_SUCCESS, "Error %d moving p_llr data to read only memory in pltf %i dev %i\n" , (int)rt, 0,0);
set_compact_BG(p_decParams->Z, p_decParams->BG);
int rt =
clEnqueueWriteBuffer(ocl.runtime[0].queue[0], ocl.runtime[0].dev_const_llr, CL_TRUE, 0, memorySize_llr, p_llr, 0, NULL, NULL);
AssertFatal(rt == CL_SUCCESS, "Error %d moving p_llr data to read only memory in pltf %i dev %i\n" , (int)rt, 0,0);
rt = clEnqueueWriteBuffer(ocl.runtime[0].queue[0], ocl.runtime[0].dev_llr, CL_TRUE, 0,
memorySize_llr, p_llr, 0, NULL, NULL);
AssertFatal(rt == CL_SUCCESS, "Error %d moving p_llr data to read-write memory in pltf %i dev %i\n" , (int)rt, 0,0);
AssertFatal(rt == CL_SUCCESS, "Error %d moving p_llr data to read-write memory in pltf %i dev %i\n" , (int)rt, 0,0);
// Define CUDA kernel dimension
// int blockSizeX = Zc;
// dim3 dimGridKernel1(row, MC, 1); // dim of the thread blocks
......
......@@ -85,24 +85,27 @@ typedef struct nrLDPC_dec_params {
uint8_t BG; /**< Base graph */
uint16_t Z; /**< Lifting size */
uint8_t R; /**< Decoding rate: Format 15,13,... for code rates 1/5, 1/3,... */
uint16_t F; /**< Filler bits */
uint8_t Qm; /**< Modulation */
uint8_t rv;
uint8_t numMaxIter; /**< Maximum number of iterations */
int block_length;
int E;
e_nrLDPC_outMode outMode; /**< Output format */
int crc_type;
int (*check_crc)(uint8_t* decoded_bytes, uint32_t n, uint8_t crc_type);
uint8_t setCombIn;
} t_nrLDPC_dec_params;
/**
Structure containing LDPC decoder parameters.
*/
typedef struct nrLDPCoffload_params {
uint8_t BG; /**< Base graph */
uint16_t Z;
uint16_t Kr;
uint16_t Z;
uint16_t Kr;
uint8_t rv;
uint32_t E;
uint16_t n_cb;
uint16_t F; /**< Filler bits */
uint8_t Qm; /**< Modulation */
uint8_t setCombIn;
} t_nrLDPCoffload_params;
/**
......
......@@ -47,7 +47,7 @@ typedef struct {
time_stats_t *toutput;
int Kr;
uint32_t Kb;
uint32_t *Zc;
uint32_t Zc;
void *harq;
/// Encoder BG
uint8_t BG;
......@@ -57,13 +57,16 @@ typedef struct {
uint32_t K;
/// Number of "Filler" bits
uint32_t F;
/// LDPC-code outputs
uint8_t *d[MAX_NUM_NR_DLSCH_SEGMENTS_PER_LAYER*NR_MAX_NB_LAYERS];
/// Modulation order
uint8_t Qm;
uint32_t E;
unsigned int G;
// Redundancy version index
uint8_t rv;
} encoder_implemparams_t;
#define INIT0_LDPCIMPLEMPARAMS {0,0,0,NULL,NULL,NULL,NULL}
typedef void(*nrLDPC_initcallfunc_t)(t_nrLDPC_dec_params *p_decParams, int8_t *p_llr, int8_t *p_out);
typedef int(*nrLDPC_encoderfunc_t)(unsigned char **,unsigned char **,int,int,short, short, encoder_implemparams_t *);
//============================================================================================================================
typedef int32_t(LDPC_initfunc_t)(void);
typedef int32_t(LDPC_shutdownfunc_t)(void);
// decoder interface
/**
\brief LDPC decoder API type definition
......@@ -73,8 +76,14 @@ typedef int(*nrLDPC_encoderfunc_t)(unsigned char **,unsigned char **,int,int,sho
\param p_profiler LDPC profiler statistics
*/
typedef int32_t (*nrLDPC_decoderfunc_t)(t_nrLDPC_dec_params *, int8_t *, int8_t *, t_nrLDPC_time_stats *, decode_abort_t *ab);
typedef int32_t(*nrLDPC_decoffloadfunc_t)(t_nrLDPC_dec_params* , uint8_t, uint8_t, uint8_t , uint8_t, uint16_t, uint32_t, uint8_t, int8_t*, int8_t* ,uint8_t);
typedef int32_t(*nrLDPC_dectopfunc_t)(void);
typedef int32_t(LDPC_decoderfunc_t)(t_nrLDPC_dec_params *p_decParams,
uint8_t harq_pid,
uint8_t ulsch_id,
uint8_t C,
int8_t *p_llr,
int8_t *p_out,
t_nrLDPC_time_stats *,
decode_abort_t *ab);
typedef int32_t(LDPC_encoderfunc_t)(uint8_t **, uint8_t **, encoder_implemparams_t *);
#endif
......@@ -38,44 +38,29 @@
#include "defs.h"
#include "assertions.h"
#include "openair1/PHY/CODING/nrLDPC_defs.h"
#include "openair1/PHY/CODING/nrLDPC_extern.h"
#include "ldpc_generate_coefficient.c"
int ldpc_encoder_orig(uint8_t *test_input,uint8_t *channel_input,int Zc,int Kb,short block_length, short BG,uint8_t gen_code)
int LDPCencoder(unsigned char **inputArray, unsigned char **outputArray, encoder_implemparams_t *impp)
{
const unsigned char *input = inputArray[0];
// channel input is the output of this function!
unsigned char *output = outputArray[0];
const int Zc = impp->Zc;
const int Kb = impp->Kb;
const short block_length = impp->K;
const short BG = impp->BG;
const uint8_t gen_code = impp->gen_code;
uint8_t c[22*384]; //padded input, unpacked, max size
uint8_t d[68*384]; //coded output, unpacked, max size
uint8_t channel_temp,temp;
short *Gen_shift_values, *no_shift_values, *pointer_shift_values;
short nrows = 46;//parity check bits
short ncols = 22;//info bits
uint8_t d[68 * 384]; // coded output, unpacked, max size
int i,i1,i2,i3,i4,i5,temp_prime,var;
int no_punctured_columns,removed_bit,rate=3;
int no_punctured_columns, removed_bit;
int nind=0;
int indlist[1000];
int indlist2[1000];
//determine number of bits in codeword
//if (block_length>3840)
if (BG==1)
{
nrows=46; //parity check bits
ncols=22; //info bits
rate=3;
}
//else if (block_length<=3840)
else if (BG==2)
{
//BG=2;
nrows=42; //parity check bits
ncols=10; // info bits
rate=5;
}
Gen_shift_values=choose_generator_matrix(BG,Zc);
const short *Gen_shift_values = choose_generator_matrix(BG, Zc);
if (Gen_shift_values==NULL) {
printf("ldpc_encoder_orig: could not find generator matrix\n");
return(-1);
......@@ -83,21 +68,14 @@ int ldpc_encoder_orig(uint8_t *test_input,uint8_t *channel_input,int Zc,int Kb,s
//printf("ldpc_encoder_orig: BG %d, Zc %d, Kb %d\n",BG, Zc, Kb);
AssertFatal(BG <= 2, "BG %d is not supported yet\n", BG);
// load base graph of generator matrix
if (BG==1)
{
no_shift_values=(short *) no_shift_values_BG1;
pointer_shift_values=(short *) pointer_shift_values_BG1;
}
else if (BG==2)
{
no_shift_values=(short *) no_shift_values_BG2;
pointer_shift_values=(short *) pointer_shift_values_BG2;
}
else {
AssertFatal(0,"BG %d is not supported yet\n",BG);
}
const short nrows = BG == 1 ? 46 : 42;
const short ncols = BG == 1 ? 22 : 10;
const short rate = BG == 1 ? 3 : 5;
const short *no_shift_values = BG == 1 ? no_shift_values_BG1 : no_shift_values_BG2;
const short *pointer_shift_values = BG == 1 ? pointer_shift_values_BG1 : pointer_shift_values_BG2;
no_punctured_columns=(int)((nrows-2)*Zc+block_length-block_length*rate)/Zc;
removed_bit=(nrows-no_punctured_columns-2) * Zc+block_length-(block_length*rate);
//printf("%d\n",no_punctured_columns);
......@@ -108,9 +86,9 @@ int ldpc_encoder_orig(uint8_t *test_input,uint8_t *channel_input,int Zc,int Kb,s
for (i=0; i<block_length; i++)
{
//c[i] = test_input[i/8]<<(i%8);
//c[i]=c[i]>>7&1;
c[i]=(test_input[i/8]&(128>>(i&7)))>>(7-(i&7));
// c[i] = input[i/8]<<(i%8);
// c[i]=c[i]>>7&1;
c[i] = (input[i / 8] & (128 >> (i & 7))) >> (7 - (i & 7));
}
// parity check part
......@@ -188,10 +166,9 @@ int ldpc_encoder_orig(uint8_t *test_input,uint8_t *channel_input,int Zc,int Kb,s
fprintf(fd2," c2=&csimd[i2];\n");
fprintf(fd2," d2=&dsimd[i2];\n");
for (i1=0; i1 < nrows; i1++)
for (i1 = 0; i1 < nrows; i1++)
{
channel_temp=0;
fprintf(fd,"\n//row: %d\n",i1);
fprintf(fd2,"\n//row: %d\n",i1);
fprintf(fd," d2[%d]=",(Zc*i1)>>shift);
......@@ -225,8 +202,7 @@ int ldpc_encoder_orig(uint8_t *test_input,uint8_t *channel_input,int Zc,int Kb,s
fprintf(fd2,"c2[%d]",indlist2[i4]);
for (i4=0;i4<nind-1;i4++) { fprintf(fd,")"); fprintf(fd2,")"); }
fprintf(fd,";\n");
fprintf(fd2,";\n");
fprintf(fd2, ";\n");
}
fprintf(fd," }\n}\n");
fprintf(fd2," }\n}\n");
......@@ -243,40 +219,33 @@ int ldpc_encoder_orig(uint8_t *test_input,uint8_t *channel_input,int Zc,int Kb,s
//rotate matrix here
for (i5=0; i5 < Kb; i5++)
{
temp = c[i5*Zc];
memmove(&c[i5*Zc], &c[i5*Zc+1], (Zc-1)*sizeof(uint8_t));
c[i5*Zc+Zc-1] = temp;
const int temp = c[i5 * Zc];
memmove(&c[i5 * Zc], &c[i5 * Zc + 1], Zc - 1);
c[i5 * Zc + Zc - 1] = temp;
}
// calculate each row in base graph
for (i1=0; i1 < nrows-no_punctured_columns; i1++)
{
channel_temp=0;
unsigned char channel_temp = 0;
for (i3=0; i3 < Kb; i3++)
{
temp_prime=i1 * ncols + i3;
for (i3 = 0; i3 < Kb; i3++) {
temp_prime = i1 * ncols + i3;
for (i4=0; i4 < no_shift_values[temp_prime]; i4++)
{
channel_temp = channel_temp ^ c[ i3*Zc + Gen_shift_values[ pointer_shift_values[temp_prime]+i4 ] ];
for (i4 = 0; i4 < no_shift_values[temp_prime]; i4++) {
channel_temp = channel_temp ^ c[i3 * Zc + Gen_shift_values[pointer_shift_values[temp_prime] + i4]];
}
}
d[i2+i1*Zc]=channel_temp;
//channel_input[t+i1*Zc]=channel_temp;
// output[t+i1*Zc]=channel_temp;
}
}
}
// information part and puncture columns
memcpy(&channel_input[0], &c[2*Zc], (block_length-2*Zc)*sizeof(uint8_t));
memcpy(&channel_input[block_length-2*Zc], &d[0], ((nrows-no_punctured_columns) * Zc-removed_bit)*sizeof(uint8_t));
//memcpy(channel_input,c,Kb*Zc*sizeof(uint8_t));
return 0;
}
int nrLDPC_encod(uint8_t **test_input,uint8_t **channel_input,int Zc,int Kb,short block_length, short BG, encoder_implemparams_t *impp) {
return ldpc_encoder_orig(test_input[0],channel_input[0],Zc,Kb,block_length,BG,impp->gen_code);
memcpy(&output[0], &c[2 * Zc], block_length - 2 * Zc);
memcpy(&output[block_length - 2 * Zc], &d[0], (nrows - no_punctured_columns) * Zc - removed_bit);
// memcpy(output,c,Kb*Zc*sizeof(unsigned char));
return block_length - 2 * Zc + (nrows - no_punctured_columns) * Zc - removed_bit;
}
This diff is collapsed.
......@@ -37,15 +37,17 @@
#include "common/utils/LOG/log.h"
#include "time_meas.h"
#include "openair1/PHY/CODING/nrLDPC_defs.h"
#include "openair1/PHY/CODING/nrLDPC_extern.h"
#include "ldpc_encode_parity_check.c"
#include "ldpc_generate_coefficient.c"
int nrLDPC_encod(unsigned char **test_input,unsigned char **channel_input,int Zc,int Kb,short block_length, short BG, encoder_implemparams_t *impp)
int LDPCencoder(uint8_t **test_input, uint8_t **channel_input, encoder_implemparams_t *impp)
{
short nrows=0,ncols=0;
int Zc = impp->Zc;
int Kb = impp->Kb;
int block_length = impp->K;
int BG = impp->BG;
int nrows=0,ncols=0;
int rate=3;
int no_punctured_columns,removed_bit;
......@@ -79,17 +81,17 @@ int nrLDPC_encod(unsigned char **test_input,unsigned char **channel_input,int Zc
if ((Zc&31) > 0) simd_size = 16;
else simd_size = 32;
unsigned char c[22*Zc] __attribute__((aligned(32))); //padded input, unpacked, max size
unsigned char d[46*Zc] __attribute__((aligned(32))); //coded parity part output, unpacked, max size
uint8_t c[22*Zc] __attribute__((aligned(32))); //padded input, unpacked, max size
uint8_t d[46*Zc] __attribute__((aligned(32))); //coded parity part output, unpacked, max size
// calculate number of punctured bits
no_punctured_columns=(int)((nrows-2)*Zc+block_length-block_length*rate)/Zc;
removed_bit=(nrows-no_punctured_columns-2) * Zc+block_length-(int)(block_length*rate);
removed_bit = (nrows - no_punctured_columns - 2) * Zc + block_length - block_length * rate;
// printf("%d\n",no_punctured_columns);
// printf("%d\n",removed_bit);
// unpack input
memset(c,0,sizeof(unsigned char) * ncols * Zc);
memset(d,0,sizeof(unsigned char) * nrows * Zc);
memset(c, 0, sizeof(c));
memset(d, 0, sizeof(d));
if(impp->tinput != NULL) start_meas(impp->tinput);
for (int i=0; i<block_length; i++) {
......
......@@ -37,13 +37,17 @@
#include "common/utils/LOG/log.h"
#include "time_meas.h"
#include "openair1/PHY/CODING/nrLDPC_defs.h"
#include "openair1/PHY/CODING/nrLDPC_extern.h"
#include "ldpc_encode_parity_check.c"
#include "ldpc_generate_coefficient.c"
#include "PHY/sse_intrin.h"
int nrLDPC_encod(unsigned char **test_input,unsigned char **channel_input,int Zc,int Kb,short block_length, short BG, encoder_implemparams_t *impp)
int LDPCencoder(uint8_t **test_input, uint8_t **channel_input, encoder_implemparams_t *impp)
{
int Zc = impp->Zc;
int Kb = impp->Kb;
short block_length = impp->K;
short BG = impp->BG;
short nrows=0,ncols=0;
int i,i1,j,rate=3;
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment