Commit e7a84976 authored by Romain Beurdouche's avatar Romain Beurdouche

feat(ldpc-offload-xdma): transport block decoding with XDMA enabled in...

feat(ldpc-offload-xdma): transport block decoding with XDMA enabled in nr_ulsim on experimental testbench
parent da5ce90d
...@@ -513,6 +513,14 @@ endif() ...@@ -513,6 +513,14 @@ endif()
########################################################## ##########################################################
# LDPC offload with xdma driver
##########################################################
add_library(ldpc_xdma STATIC ${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder_offload_xdma.c)
target_include_directories(ldpc_xdma PRIVATE "${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder_offload_xdma")
##########################################################
include_directories ("${OPENAIR_DIR}/radio/COMMON") include_directories ("${OPENAIR_DIR}/radio/COMMON")
############################################################## ##############################################################
...@@ -2073,7 +2081,7 @@ target_link_libraries(nr-softmodem PRIVATE ...@@ -2073,7 +2081,7 @@ target_link_libraries(nr-softmodem PRIVATE
ITTI ${NAS_UE_LIB} lte_rrc nr_rrc ITTI ${NAS_UE_LIB} lte_rrc nr_rrc
ngap s1ap L2_LTE_NR L2_NR MAC_NR_COMMON NFAPI_COMMON_LIB NFAPI_LIB NFAPI_VNF_LIB NFAPI_PNF_LIB NFAPI_USER_LIB SIMU SIMU_ETH ngap s1ap L2_LTE_NR L2_NR MAC_NR_COMMON NFAPI_COMMON_LIB NFAPI_LIB NFAPI_VNF_LIB NFAPI_PNF_LIB NFAPI_USER_LIB SIMU SIMU_ETH
x2ap f1ap m2ap m3ap e1ap shlib_loader x2ap f1ap m2ap m3ap e1ap shlib_loader
-Wl,--end-group z dl) -Wl,--end-group z dl ldpc_xdma)
target_link_libraries(nr-softmodem PRIVATE pthread m CONFIG_LIB rt sctp) target_link_libraries(nr-softmodem PRIVATE pthread m CONFIG_LIB rt sctp)
target_link_libraries(nr-softmodem PRIVATE ${T_LIB}) target_link_libraries(nr-softmodem PRIVATE ${T_LIB})
...@@ -2227,7 +2235,7 @@ add_executable(nr_dlschsim ...@@ -2227,7 +2235,7 @@ add_executable(nr_dlschsim
) )
target_link_libraries(nr_dlschsim PRIVATE target_link_libraries(nr_dlschsim PRIVATE
-Wl,--start-group UTIL SIMU PHY_COMMON PHY_NR_COMMON PHY_NR PHY_NR_UE SCHED_NR_LIB CONFIG_LIB MAC_NR_COMMON -Wl,--end-group -Wl,--start-group UTIL SIMU PHY_COMMON PHY_NR_COMMON PHY_NR PHY_NR_UE SCHED_NR_LIB CONFIG_LIB MAC_NR_COMMON -Wl,--end-group
m pthread ${T_LIB} ITTI dl shlib_loader m pthread ${T_LIB} ITTI dl shlib_loader ldpc_xdma
) )
target_link_libraries(nr_dlschsim PRIVATE asn1_nr_rrc_hdrs) target_link_libraries(nr_dlschsim PRIVATE asn1_nr_rrc_hdrs)
...@@ -2238,7 +2246,7 @@ add_executable(nr_pbchsim ...@@ -2238,7 +2246,7 @@ add_executable(nr_pbchsim
) )
target_link_libraries(nr_pbchsim PRIVATE target_link_libraries(nr_pbchsim PRIVATE
-Wl,--start-group UTIL SIMU PHY_COMMON PHY_NR_COMMON PHY_NR PHY_NR_UE SCHED_NR_LIB CONFIG_LIB MAC_NR_COMMON -Wl,--end-group -Wl,--start-group UTIL SIMU PHY_COMMON PHY_NR_COMMON PHY_NR PHY_NR_UE SCHED_NR_LIB CONFIG_LIB MAC_NR_COMMON -Wl,--end-group
m pthread ${T_LIB} ITTI dl shlib_loader m pthread ${T_LIB} ITTI dl shlib_loader ldpc_xdma
) )
target_link_libraries(nr_pbchsim PRIVATE asn1_nr_rrc_hdrs asn1_lte_rrc_hdrs) target_link_libraries(nr_pbchsim PRIVATE asn1_nr_rrc_hdrs asn1_lte_rrc_hdrs)
...@@ -2251,7 +2259,7 @@ add_executable(nr_pucchsim ...@@ -2251,7 +2259,7 @@ add_executable(nr_pucchsim
) )
target_link_libraries(nr_pucchsim PRIVATE target_link_libraries(nr_pucchsim PRIVATE
-Wl,--start-group UTIL SIMU PHY_COMMON PHY_NR_COMMON PHY_NR PHY_NR_UE SCHED_NR_LIB CONFIG_LIB MAC_NR_COMMON -Wl,--end-group -Wl,--start-group UTIL SIMU PHY_COMMON PHY_NR_COMMON PHY_NR PHY_NR_UE SCHED_NR_LIB CONFIG_LIB MAC_NR_COMMON -Wl,--end-group
m pthread ${T_LIB} ITTI dl shlib_loader m pthread ${T_LIB} ITTI dl shlib_loader ldpc_xdma
) )
target_link_libraries(nr_pucchsim PRIVATE asn1_nr_rrc_hdrs asn1_lte_rrc_hdrs) target_link_libraries(nr_pucchsim PRIVATE asn1_nr_rrc_hdrs asn1_lte_rrc_hdrs)
...@@ -2268,7 +2276,7 @@ add_executable(nr_dlsim ...@@ -2268,7 +2276,7 @@ add_executable(nr_dlsim
) )
target_link_libraries(nr_dlsim PRIVATE target_link_libraries(nr_dlsim PRIVATE
-Wl,--start-group UTIL SIMU SIMU_ETH PHY_COMMON PHY_NR_COMMON PHY_NR PHY_NR_UE SCHED_NR_LIB SCHED_NR_UE_LIB MAC_UE_NR MAC_NR_COMMON nr_rrc CONFIG_LIB L2_NR HASHTABLE x2ap SECURITY ngap -lz -Wl,--end-group -Wl,--start-group UTIL SIMU SIMU_ETH PHY_COMMON PHY_NR_COMMON PHY_NR PHY_NR_UE SCHED_NR_LIB SCHED_NR_UE_LIB MAC_UE_NR MAC_NR_COMMON nr_rrc CONFIG_LIB L2_NR HASHTABLE x2ap SECURITY ngap -lz -Wl,--end-group
m pthread ${T_LIB} ITTI dl shlib_loader m pthread ${T_LIB} ITTI dl shlib_loader ldpc_xdma
) )
target_link_libraries(nr_dlsim PRIVATE asn1_nr_rrc_hdrs asn1_lte_rrc_hdrs) target_link_libraries(nr_dlsim PRIVATE asn1_nr_rrc_hdrs asn1_lte_rrc_hdrs)
...@@ -2290,7 +2298,7 @@ add_executable(nr_ulschsim ...@@ -2290,7 +2298,7 @@ add_executable(nr_ulschsim
) )
target_link_libraries(nr_ulschsim PRIVATE target_link_libraries(nr_ulschsim PRIVATE
-Wl,--start-group UTIL SIMU PHY_COMMON PHY_NR_COMMON PHY_NR PHY_NR_UE SCHED_NR_LIB CONFIG_LIB MAC_NR_COMMON -Wl,--end-group -Wl,--start-group UTIL SIMU PHY_COMMON PHY_NR_COMMON PHY_NR PHY_NR_UE SCHED_NR_LIB CONFIG_LIB MAC_NR_COMMON -Wl,--end-group
m pthread ${T_LIB} ITTI dl shlib_loader m pthread ${T_LIB} ITTI dl shlib_loader ldpc_xdma
) )
target_link_libraries(nr_ulschsim PRIVATE asn1_nr_rrc_hdrs asn1_lte_rrc_hdrs) target_link_libraries(nr_ulschsim PRIVATE asn1_nr_rrc_hdrs asn1_lte_rrc_hdrs)
...@@ -2312,7 +2320,7 @@ endif() ...@@ -2312,7 +2320,7 @@ endif()
target_link_libraries(nr_ulsim PRIVATE target_link_libraries(nr_ulsim PRIVATE
-Wl,--start-group UTIL SIMU SIMU_ETH PHY_COMMON PHY_NR_COMMON PHY_NR PHY_NR_UE SCHED_NR_LIB SCHED_NR_UE_LIB MAC_UE_NR MAC_NR_COMMON nr_rrc CONFIG_LIB L2_NR HASHTABLE x2ap SECURITY ngap -lz -Wl,--end-group -Wl,--start-group UTIL SIMU SIMU_ETH PHY_COMMON PHY_NR_COMMON PHY_NR PHY_NR_UE SCHED_NR_LIB SCHED_NR_UE_LIB MAC_UE_NR MAC_NR_COMMON nr_rrc CONFIG_LIB L2_NR HASHTABLE x2ap SECURITY ngap -lz -Wl,--end-group
m pthread ${T_LIB} ITTI dl shlib_loader m pthread ${T_LIB} ITTI dl shlib_loader ldpc_xdma
) )
target_link_libraries(nr_ulsim PRIVATE asn1_nr_rrc_hdrs asn1_lte_rrc_hdrs) target_link_libraries(nr_ulsim PRIVATE asn1_nr_rrc_hdrs asn1_lte_rrc_hdrs)
......
/*
* This file is part of the Xilinx DMA IP Core driver tools for Linux
*
* Copyright (c) 2016-present, Xilinx, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#define _BSD_SOURCE
#define _XOPEN_SOURCE 500
#include <assert.h>
#include <fcntl.h>
#include <getopt.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>
#include <time.h>
#include <byteswap.h>
#include <signal.h>
#include <ctype.h>
#include <termios.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include "xdma_diag.h"
#include "nrLDPC_decoder_offload_xdma.h"
typedef unsigned long long U64;
static struct option const long_opts[] = {{"device", required_argument, NULL, 'd'},
{"address", required_argument, NULL, 'a'},
{"size", required_argument, NULL, 's'},
{"offset", required_argument, NULL, 'o'},
{"count", required_argument, NULL, 'c'},
{"data infile", required_argument, NULL, 'f'},
{"data outfile", required_argument, NULL, 'w'},
{"help", no_argument, NULL, 'h'},
{"verbose", no_argument, NULL, 'v'},
{0, 0, 0, 0}};
#if 0
/* ltoh: little to host */
/* htol: little to host */
#if __BYTE_ORDER == __LITTLE_ENDIAN
#define ltohl(x) (x)
#define ltohs(x) (x)
#define htoll(x) (x)
#define htols(x) (x)
#elif __BYTE_ORDER == __BIG_ENDIAN
#define ltohl(x) __bswap_32(x)
#define ltohs(x) __bswap_16(x)
#define htoll(x) __bswap_32(x)
#define htols(x) __bswap_16(x)
#endif
#define FATAL \
do { \
fprintf(stderr, "Error at line %d, file %s (%d) [%s]\n", __LINE__, __FILE__, errno, strerror(errno)); \
exit(1); \
} while (0)
#define MAP_SIZE (32 * 1024UL)
#define MAP_MASK (MAP_SIZE - 1)
#define DEVICE_NAME_DEFAULT_READ "/dev/xdma0_c2h_1"
#define DEVICE_NAME_DEFAULT_WRITE "/dev/xdma0_h2c_1"
#define SIZE_DEFAULT (32)
#define COUNT_DEFAULT (1)
#define OFFSET_DEC_IN 0x0000
#define OFFSET_DEC_OUT 0x0004
#define OFFSET_ENC_IN 0x0008
#define OFFSET_ENC_OUT 0x000c
#define OFFSET_RESET 0x0020
#define CB_PROCESS_NUMBER 12 // add by JW
#endif
void* map_base;
int fd;
int fd_enc_write, fd_enc_read;
int fd_dec_write, fd_dec_read;
char *allocated_write, *allocated_read;
// dma_from_device.c
#if 0
int test_dma_end_read();
int test_dma_enc_write();
#endif
static int no_write = 0;
// [Start] #include "dma_utils.c" ===================================
/*
* man 2 write:
* On Linux, write() (and similar system calls) will transfer at most
* 0x7ffff000 (2,147,479,552) bytes, returning the number of bytes
* actually transferred. (This is true on both 32-bit and 64-bit
* systems.)
*/
#define RW_MAX_SIZE 0x7ffff000
int verbose = 0;
uint64_t getopt_integer(char* optarg)
{
int rc;
uint64_t value;
rc = sscanf(optarg, "0x%lx", &value);
if (rc <= 0)
rc = sscanf(optarg, "%lu", &value);
// printf("sscanf() = %d, value = 0x%lx\n", rc, value);
return value;
}
ssize_t read_to_buffer(char* fname, int fd, char* buffer, uint64_t size, uint64_t base)
{
ssize_t rc;
uint64_t count = 0;
char* buf = buffer;
off_t offset = base;
while (count < size) {
uint64_t bytes = size - count;
if (bytes > RW_MAX_SIZE)
bytes = RW_MAX_SIZE;
if (offset) {
rc = lseek(fd, offset, SEEK_SET);
if (rc != offset) {
fprintf(stderr, "%s, seek off 0x%lx != 0x%lx.\n", fname, rc, offset);
perror("seek file");
return -EIO;
}
}
/* read data from file into memory buffer */
rc = read(fd, buf, bytes);
if (rc != bytes) {
fprintf(stderr, "%s, R off 0x%lx, 0x%lx != 0x%lx.\n", fname, count, rc, bytes);
perror("read file");
return -EIO;
}
count += bytes;
buf += bytes;
offset += bytes;
}
if (count != size) {
fprintf(stderr, "%s, R failed 0x%lx != 0x%lx.\n", fname, count, size);
return -EIO;
}
return count;
}
ssize_t write_from_buffer(char* fname, int fd, char* buffer, uint64_t size, uint64_t base)
{
ssize_t rc;
uint64_t count = 0;
char* buf = buffer;
off_t offset = base;
while (count < size) {
uint64_t bytes = size - count;
if (bytes > RW_MAX_SIZE)
bytes = RW_MAX_SIZE;
if (offset) {
rc = lseek(fd, offset, SEEK_SET);
if (rc != offset) {
fprintf(stderr, "%s, seek off 0x%lx != 0x%lx.\n", fname, rc, offset);
perror("seek file");
return -EIO;
}
}
/* write data to file from memory buffer */
rc = write(fd, buf, bytes);
if (rc != bytes) {
fprintf(stderr, "%s, W off 0x%lx, 0x%lx != 0x%lx.\n", fname, offset, rc, bytes);
perror("write file");
return -EIO;
}
count += bytes;
buf += bytes;
offset += bytes;
}
if (count != size) {
fprintf(stderr, "%s, R failed 0x%lx != 0x%lx.\n", fname, count, size);
return -EIO;
}
return count;
}
/* Subtract timespec t2 from t1
*
* Both t1 and t2 must already be normalized
* i.e. 0 <= nsec < 1000000000
*/
static int timespec_check(struct timespec* t)
{
if ((t->tv_nsec < 0) || (t->tv_nsec >= 1000000000))
return -1;
return 0;
}
/*
void timespec_sub(struct timespec* t1, struct timespec* t2)
{
if (timespec_check(t1) < 0) {
fprintf(stderr, "invalid time #1: %lld.%.9ld.\n", (long long)t1->tv_sec, t1->tv_nsec);
return;
}
if (timespec_check(t2) < 0) {
fprintf(stderr, "invalid time #2: %lld.%.9ld.\n", (long long)t2->tv_sec, t2->tv_nsec);
return;
}
t1->tv_sec -= t2->tv_sec;
t1->tv_nsec -= t2->tv_nsec;
if (t1->tv_nsec >= 1000000000) {
t1->tv_sec++;
t1->tv_nsec -= 1000000000;
} else if (t1->tv_nsec < 0) {
t1->tv_sec--;
t1->tv_nsec += 1000000000;
}
}
*/
// [End] #include "dma_utils.c" ===================================
int test_dma_enc_read(char* EncOut, EncIPConf Confparam)
{
// U64 tTotal = MLogPhyTick();
ssize_t rc;
uint64_t i;
void* virt_addr;
uint64_t size;
uint32_t writeval;
uint32_t Z_val;
uint16_t max_schedule, mb, id, bg, z_j, kb, z_a;
uint16_t z_set;
uint32_t ctrl_data;
uint32_t CB_num = CB_PROCESS_NUMBER;
// this values should be given by Shane
max_schedule = 0;
mb = Confparam.mb;
id = CB_num;
bg = Confparam.BGSel - 1;
z_set = Confparam.z_set - 1;
z_j = Confparam.z_j;
if (z_set == 0)
z_a = 2;
else if (z_set == 1)
z_a = 3;
else if (z_set == 2)
z_a = 5;
else if (z_set == 3)
z_a = 7;
else if (z_set == 4)
z_a = 9;
else if (z_set == 5)
z_a = 11;
else if (z_set == 6)
z_a = 13;
else
z_a = 15;
if (bg == 0)
kb = 22;
else if (bg == 1)
kb = 10;
else if (bg == 2)
kb = 9;
else if (bg == 3)
kb = 8;
else
kb = 6;
mb = Confparam.kb_1 + kb;
Z_val = (unsigned int)(z_a << z_j);
ctrl_data = (max_schedule << 30) | ((mb - kb) << 24) | (id << 19) | (bg << 6) | (z_set << 3) | z_j;
// printf("max_schedule:%d (mb - kb):%d id:%d bg:%d z_set:%d z_j:%d\n",max_schedule,(mb - kb),id,bg,z_set,z_j);
uint32_t OutDataNUM = Z_val * mb;
uint32_t Out_dwNumItems_p128;
uint32_t Out_dwNumItems;
if ((OutDataNUM & 0x7F) == 0)
Out_dwNumItems_p128 = OutDataNUM >> 5;
else
Out_dwNumItems_p128 = ((OutDataNUM >> 7) + 1) << 2;
// printf("0x%04x \n",Out_dwNumItems_p128);
Out_dwNumItems = ((Out_dwNumItems_p128 << 2) * CB_num);
// printf("0x%04x \n",Out_dwNumItems);
// MLogPhyTask(PID_DL_FEC_GEN3_R1, tTotal, MLogPhyTick());
size = Out_dwNumItems;
writeval = ctrl_data;
// printf("read : %d byte, ctrl : 0x%08x\n",size,writeval);
/* calculate the virtual address to be accessed */
virt_addr = map_base + OFFSET_ENC_OUT;
/* swap 32-bit endianess if host is not little-endian */
writeval = htoll(writeval);
*((uint32_t*)virt_addr) = writeval;
// MLogPhyTask(PID_DL_FEC_GEN3_R2, tTotal, MLogPhyTick());
if (fd_enc_read < 0) {
fprintf(stderr, "unable to open device %s, %d.\n", DEVICE_NAME_DEFAULT_ENC_READ, fd_enc_read);
perror("open device");
return -EINVAL;
}
/* lseek & read data from AXI MM into buffer using SGDMA */
rc = read_to_buffer(DEVICE_NAME_DEFAULT_ENC_READ, fd_enc_read, EncOut, size, 0);
// rc = read_to_buffer(DEVICE_NAME_DEFAULT_ENC_READ, fd_enc_read, allocated_read, size, 0);
// MLogPhyTask(PID_DL_FEC_GEN3_R3, tTotal, MLogPhyTick());
if (rc < 0)
goto out;
rc = 0;
out:
return rc;
}
int test_dma_enc_write(char* data, EncIPConf Confparam)
{
uint64_t i;
ssize_t rc;
// U64 tTotal = MLogPhyTick();
void* virt_addr;
uint64_t size;
uint32_t writeval;
uint32_t Z_val;
uint16_t max_schedule, mb, id, bg, z_j, kb, z_a;
uint16_t z_set;
uint32_t ctrl_data;
uint32_t CB_num = CB_PROCESS_NUMBER;
// this values should be given by Shane
max_schedule = 0;
mb = Confparam.mb;
id = CB_num;
bg = Confparam.BGSel - 1;
z_set = Confparam.z_set - 1;
z_j = Confparam.z_j;
if (z_set == 0)
z_a = 2;
else if (z_set == 1)
z_a = 3;
else if (z_set == 2)
z_a = 5;
else if (z_set == 3)
z_a = 7;
else if (z_set == 4)
z_a = 9;
else if (z_set == 5)
z_a = 11;
else if (z_set == 6)
z_a = 13;
else
z_a = 15;
if (bg == 0)
kb = 22;
else if (bg == 1)
kb = 10;
else if (bg == 2)
kb = 9;
else if (bg == 3)
kb = 8;
else
kb = 6;
mb = Confparam.kb_1 + kb;
Z_val = (unsigned int)(z_a << z_j);
ctrl_data = (max_schedule << 30) | ((mb - kb) << 24) | (id << 19) | (bg << 6) | (z_set << 3) | z_j;
// printf("max_schedule:%d (mb - kb):%d id:%d bg:%d z_set:%d z_j:%d\n",max_schedule,(mb - kb),id,bg,z_set,z_j);
uint32_t InDataNUM = Z_val * kb;
uint32_t In_dwNumItems_p128;
uint32_t In_dwNumItems;
if ((InDataNUM & 0x7F) == 0)
In_dwNumItems_p128 = InDataNUM >> 5;
else
In_dwNumItems_p128 = ((InDataNUM >> 7) + 1) << 2;
In_dwNumItems = ((In_dwNumItems_p128 << 2) * CB_num);
// MLogPhyTask(PID_DL_FEC_GEN3_W1, tTotal, MLogPhyTick());
size = In_dwNumItems;
writeval = ctrl_data;
/* calculate the virtual address to be accessed */
virt_addr = map_base + OFFSET_ENC_IN;
/* swap 32-bit endianess if host is not little-endian */
writeval = htoll(writeval);
*((uint32_t*)virt_addr) = writeval;
// MLogPhyTask(PID_DL_FEC_GEN3_W2, tTotal, MLogPhyTick());
if (fd_enc_write < 0) {
fprintf(stderr, "unable to open device %s, %d.\n", DEVICE_NAME_DEFAULT_ENC_WRITE, fd_enc_write);
perror("open device");
return -EINVAL;
}
rc = write_from_buffer(DEVICE_NAME_DEFAULT_ENC_WRITE, fd_enc_write, data, size, 0);
// rc = write_from_buffer(DEVICE_NAME_DEFAULT_ENC_WRITE, fd_enc_write, allocated_write, size, 0);
if (rc < 0)
goto out;
// MLogPhyTask(PID_DL_FEC_GEN3_W3, tTotal, MLogPhyTick());
rc = 0;
out:
return rc;
}
// int test_dma_dec_read(unsigned int *DecOut, DecIPConf Confparam)
int test_dma_dec_read(char* DecOut, DecIPConf Confparam)
{
struct timespec read_start_2, read_end_2;
ssize_t rc;
uint64_t i;
void* virt_addr;
uint64_t size;
uint32_t writeval;
uint32_t Z_val;
uint16_t max_schedule, mb, id, bg, z_j, kb, z_a, max_iter, sc_idx;
uint16_t z_set;
uint32_t ctrl_data;
uint32_t CB_num = Confparam.CB_num; // CB_PROCESS_NUMBER_Dec;//
// this values should be given by Shane
max_schedule = 0;
mb = Confparam.mb;
id = CB_num;
bg = Confparam.BGSel - 1;
z_set = Confparam.z_set - 1;
z_j = Confparam.z_j;
// max_iter = 4;
max_iter = 8;
sc_idx = 12;
if (z_set == 0)
z_a = 2;
else if (z_set == 1)
z_a = 3;
else if (z_set == 2)
z_a = 5;
else if (z_set == 3)
z_a = 7;
else if (z_set == 4)
z_a = 9;
else if (z_set == 5)
z_a = 11;
else if (z_set == 6)
z_a = 13;
else
z_a = 15;
if (bg == 0)
kb = 22;
else if (bg == 1)
kb = 10;
else if (bg == 2)
kb = 9;
else if (bg == 3)
kb = 8;
else
kb = 6;
Z_val = (unsigned int)(z_a << z_j);
ctrl_data = (max_schedule << 30) | ((mb - kb) << 24) | (id << 19) | (max_iter << 13) | (sc_idx << 9) | (bg << 6) | (z_set) << 3 | z_j;
uint32_t OutDataNUM = Z_val * kb;
uint32_t Out_dwNumItems_p128;
uint32_t Out_dwNumItems;
if (CB_num & 0x01) // odd cb number
{
if ((OutDataNUM & 0xFF) == 0)
Out_dwNumItems_p128 = OutDataNUM;
else
Out_dwNumItems_p128 = 256 * ((OutDataNUM / 256) + 1);
Out_dwNumItems = (Out_dwNumItems_p128 * CB_num) >> 3;
// printf("Z_val%d CB_num%d OutDataNUM%d Out_dwNumItems_p128%d Out_dwNumItems%d\n" , Z_val, CB_num, OutDataNUM, Out_dwNumItems_p128, Out_dwNumItems);
} else {
if ((OutDataNUM & 0x7F) == 0)
Out_dwNumItems_p128 = OutDataNUM;
else
Out_dwNumItems_p128 = 128 * ((OutDataNUM / 128) + 1);
Out_dwNumItems = (Out_dwNumItems_p128 * CB_num) >> 3;
// printf("Z_val%d CB_num%d OutDataNUM%d Out_dwNumItems_p128%d Out_dwNumItems%d\n" , Z_val, CB_num, OutDataNUM, Out_dwNumItems_p128, Out_dwNumItems);
if ((Out_dwNumItems & 0x1f) != 0)
Out_dwNumItems = ((Out_dwNumItems + 31) >> 5) << 5;
// printf("Z_val%d kb%d OutDataNUM%d Out_dwNumItems_p128%d Out_dwNumItems%d CB_num=%d\n" , Z_val, kb, OutDataNUM, Out_dwNumItems_p128, Out_dwNumItems, CB_num);
}
size = Out_dwNumItems;
writeval = ctrl_data;
/* calculate the virtual address to be accessed */
virt_addr = map_base + OFFSET_DEC_OUT;
/* swap 32-bit endianess if host is not little-endian */
writeval = htoll(writeval);
*((uint32_t*)virt_addr) = writeval;
if (fd_dec_read < 0) {
fprintf(stderr, "unable to open device %s, %d.\n", DEVICE_NAME_DEFAULT_DEC_READ, fd_dec_read);
perror("open device");
return -EINVAL;
}
// clock_gettime(CLOCK_MONOTONIC, &read_start_2);
/* lseek & read data from AXI MM into buffer using SGDMA */
rc = read_to_buffer(DEVICE_NAME_DEFAULT_DEC_READ, fd_dec_read, DecOut, size, 0);
if (rc < 0)
goto out;
rc = 0;
// clock_gettime(CLOCK_MONOTONIC, &read_end_2);
// timespec_sub(&read_end_2, &read_start_2);
// printf("[2]read_to_buffer() time %.2f µsec\n", (float)(read_end_2.tv_nsec) / 1000);
out:
return rc;
}
// int test_dma_dec_write(unsigned int *data, DecIPConf Confparam)
int test_dma_dec_write(char* data, DecIPConf Confparam)
{
uint64_t i;
ssize_t rc;
void* virt_addr;
uint64_t size;
uint32_t writeval;
uint32_t Z_val;
uint16_t max_schedule, mb, id, bg, z_j, kb, z_a, max_iter, sc_idx;
uint16_t z_set;
uint32_t ctrl_data;
uint32_t CB_num = Confparam.CB_num; // CB_PROCESS_NUMBER_Dec;//
// this values should be given by Shane
max_schedule = 0;
mb = Confparam.mb;
id = CB_num;
bg = Confparam.BGSel - 1;
z_set = Confparam.z_set - 1;
z_j = Confparam.z_j;
// max_iter = 4;
max_iter = 8;
sc_idx = 12;
if (z_set == 0)
z_a = 2;
else if (z_set == 1)
z_a = 3;
else if (z_set == 2)
z_a = 5;
else if (z_set == 3)
z_a = 7;
else if (z_set == 4)
z_a = 9;
else if (z_set == 5)
z_a = 11;
else if (z_set == 6)
z_a = 13;
else
z_a = 15;
if (bg == 0)
kb = 22;
else if (bg == 1)
kb = 10;
else if (bg == 2)
kb = 9;
else if (bg == 3)
kb = 8;
else
kb = 6;
Z_val = (unsigned int)(z_a << z_j);
ctrl_data = (max_schedule << 30) | ((mb - kb) << 24) | (id << 19) | (max_iter << 13) | (sc_idx << 9) | (bg << 6) | (z_set) << 3 | z_j;
uint32_t InDataNUM = Z_val * mb;
uint32_t In_dwNumItems_p128;
uint32_t In_dwNumItems;
InDataNUM = Z_val * mb * 8;
if ((InDataNUM & 0x7F) == 0)
In_dwNumItems_p128 = InDataNUM;
else
In_dwNumItems_p128 = 128 * ((InDataNUM / 128) + 1);
In_dwNumItems = (In_dwNumItems_p128 * CB_num) >> 3;
if ((In_dwNumItems & 0x1f) != 0)
In_dwNumItems = ((In_dwNumItems + 31) >> 5) << 5;
// printf("Z_val[%d] CB_num[%d] mb[%d] InDataNUM[%d] In_dwNumItems_p128[%d] In_dwNumItems[%d]\n" , Z_val, CB_num, mb, InDataNUM, In_dwNumItems_p128, In_dwNumItems);
size = In_dwNumItems;
writeval = ctrl_data;
/* calculate the virtual address to be accessed */
virt_addr = map_base + OFFSET_DEC_IN;
/* swap 32-bit endianess if host is not little-endian */
writeval = htoll(writeval);
*((uint32_t*)virt_addr) = writeval;
if (fd_dec_write < 0) {
fprintf(stderr, "unable to open device %s, %d.\n", DEVICE_NAME_DEFAULT_DEC_WRITE, fd_dec_write);
perror("open device");
return -EINVAL;
}
rc = write_from_buffer(DEVICE_NAME_DEFAULT_DEC_WRITE, fd_dec_write, data, size, 0);
if (rc < 0)
goto out;
rc = 0;
out:
return rc;
}
void test_dma_init()
{
/* access width */
int access_width = 'w';
char* device2 = "/dev/xdma0_user"; //
uint32_t size1 = 24 * 1024;
uint32_t size2 = 24 * 1024 * 3;
// printf("\n###################################################\n");
if ((fd = open(device2, O_RDWR | O_SYNC)) == -1)
FATAL;
// printf("# CHARACTER DEVICE %s OPENED. #\n", device2);
fflush(stdout);
/* map one page */
map_base = mmap(0, MAP_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (map_base == (void*)-1)
FATAL;
// printf("# MEMORY MAPPED AT ADDRESS %p. #\n", map_base);
// printf("###################################################\n\n");
void* virt_addr;
virt_addr = map_base + OFFSET_RESET;
*((uint32_t*)virt_addr) = 1;
fd_enc_write = open(DEVICE_NAME_DEFAULT_ENC_WRITE, O_RDWR);
fd_enc_read = open(DEVICE_NAME_DEFAULT_ENC_READ, O_RDWR);
fd_dec_write = open(DEVICE_NAME_DEFAULT_DEC_WRITE, O_RDWR);
fd_dec_read = open(DEVICE_NAME_DEFAULT_DEC_READ, O_RDWR);
fflush(stdout);
allocated_write = NULL;
posix_memalign((void**)&allocated_write, 4096 /*alignment */, size1 + 4096);
allocated_read = NULL;
posix_memalign((void**)&allocated_read, 4096 /*alignment */, size2 + 4096);
}
void dma_reset()
{
char* device2 = "/dev/xdma0_user"; //
void* virt_addr;
virt_addr = map_base + PCIE_OFF;
*((uint32_t*)virt_addr) = 1;
if (munmap(map_base, MAP_SIZE) == -1)
FATAL;
close(fd_enc_write);
close(fd_enc_read);
close(fd_dec_write);
close(fd_dec_read);
close(fd);
// printf("\n###################################################\n");
if ((fd = open(device2, O_RDWR | O_SYNC)) == -1)
FATAL;
// printf("# CHARACTER DEVICE %s OPENED. #\n", device2);
fflush(stdout);
/* map one page */
map_base = mmap(0, MAP_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (map_base == (void*)-1)
FATAL;
// printf("# MEMORY MAPPED AT ADDRESS %p. #\n", map_base);
// printf("###################################################\n\n");
// void *virt_addr;
virt_addr = map_base + PCIE_OFF;
*((uint32_t*)virt_addr) = 1;
// void *virt_addr;
virt_addr = map_base + OFFSET_RESET;
*((uint32_t*)virt_addr) = 1;
fd_enc_write = open(DEVICE_NAME_DEFAULT_ENC_WRITE, O_RDWR);
fd_enc_read = open(DEVICE_NAME_DEFAULT_ENC_READ, O_RDWR);
fd_dec_write = open(DEVICE_NAME_DEFAULT_DEC_WRITE, O_RDWR);
fd_dec_read = open(DEVICE_NAME_DEFAULT_DEC_READ, O_RDWR);
fflush(stdout);
}
void test_dma_shutdown()
{
free(allocated_write);
free(allocated_read);
}
#if 1
// reg_rx.c
int nrLDPC_decoder_FPGA_PYM(int8_t* buf_in, int8_t* buf_out, DecIFConf dec_conf)
{
struct timespec ts_start, ts_end; // evaluate core xdma run time
struct timespec ts_start0, ts_end0; // ealuate time from input setting to output setting include xdma
struct timespec read_start, read_end;
struct timespec write_start, write_end;
int Zc;
int nRows;
int baseGraph;
int CB_num;
DecIPConf Confparam;
int z_a, z_tmp, ii, jj, i;
int z_j = 0;
int numChannelLlrs; // input soft bits length, Zc x 66 - length of filler bits
int numFillerBits; // filler bits length
char in_softbits[26112 * 52 + 1]; // Random by default value, 52 is max CBs in UL 272RBs and 2 layers with 64QAM
char out_MessageBytes[1056 * 52 + 1]; // 52 = max number of code block
int iterationAtTermination; // output results
int parityPassedAtTermination; // output results
// char buffer_in[26112 * 52 + 1];
char buffer_out[1056 * 52 + 1];
int numMsgBits, numMsgBytes, input_CBoffset, output_CBoffset;
uint8_t i_LS;
static int init_flag = 0;
if (init_flag == 0) {
/*Init*/
test_dma_init();
init_flag = 1;
} else {
dma_reset();
}
clock_gettime(CLOCK_MONOTONIC, &ts_start0); // time start0
// LDPC input parameter
Zc = dec_conf.Zc; // shifting size
nRows = dec_conf.nRows; // number of Rows
baseGraph = dec_conf.BG; // base graph
CB_num = dec_conf.numCB; // 31 number of code block
numChannelLlrs = dec_conf.numChannelLls; // input soft bits length, Zc x 66 - length of filler bits
numFillerBits = dec_conf.numFillerBits; // filler bits length
// calc xdma LDPC parameter
// calc i_LS
if ((Zc % 15) == 0)
i_LS = 7;
else if ((Zc % 13) == 0)
i_LS = 6;
else if ((Zc % 11) == 0)
i_LS = 5;
else if ((Zc % 9) == 0)
i_LS = 4;
else if ((Zc % 7) == 0)
i_LS = 3;
else if ((Zc % 5) == 0)
i_LS = 2;
else if ((Zc % 3) == 0)
i_LS = 1;
else
i_LS = 0;
// calc z_a
if (i_LS == 0)
z_a = 2;
else
z_a = i_LS * 2 + 1;
// calc z_j
z_tmp = Zc / z_a;
while (z_tmp % 2 == 0) {
z_j = z_j + 1;
z_tmp = z_tmp / 2;
}
// calc CB_num and mb
Confparam.CB_num = CB_num;
if (baseGraph == 1)
Confparam.mb = 22 + nRows;
else
Confparam.mb = 10 + nRows;
// set BGSel, z_set, z_j
Confparam.BGSel = baseGraph;
Confparam.z_set = i_LS + 1;
Confparam.z_j = z_j;
// calc output numMsgBits
if (baseGraph == 1)
numMsgBits = Zc * 22 - numFillerBits;
else
numMsgBits = Zc * 10 - numFillerBits;
// Calc input CB offset
input_CBoffset = Zc * Confparam.mb * 8;
if ((input_CBoffset & 0x7F) == 0)
input_CBoffset = input_CBoffset / 8;
else
input_CBoffset = 16 * ((input_CBoffset / 128) + 1);
// Calc output CB offset
output_CBoffset = Zc * (Confparam.mb - nRows);
if ((output_CBoffset & 0x7F) == 0)
output_CBoffset = output_CBoffset / 8;
else
output_CBoffset = 16 * ((output_CBoffset / 128) + 1);
// memset(buf_in, 0, 26112 * 52 + 1);
// memset(buf_out, 0, 27500); // memset(buffer_out, 0, 1056 * 52 + 1);
#if 1 // Input Setting FPGA
// set input buffer_in from the llr output (in_softbits)
// Arrange data format
// for (jj = 0; jj < CB_num; jj++) {
// for (ii = 0; ii < (numChannelLlrs + numFillerBits + Zc * 2); ii++) {
// if (buf_in[ii + input_CBoffset * jj] == -128) {
// buf_in[ii + input_CBoffset * jj] = -127;
// }
// if (ii < Zc * 2)
// buf_in[ii + input_CBoffset * jj] = 0x00;
// else if (ii < numMsgBits)
// buf_in[ii + input_CBoffset * jj] = ((buf_in[ii - Zc * 2 + numChannelLlrs * jj]) ^ (0xFF)) + 1;
// else if (ii < (numMsgBits + numFillerBits))
// buf_in[ii + input_CBoffset * jj] = 0x80;
// else
// buf_in[ii + input_CBoffset * jj] = ((buf_in[ii - Zc * 2 - numFillerBits + numChannelLlrs * jj]) ^ (0xFF)) + 1;
// }
// printf("\nInput_LLR[%d] = ", jj);
// for (i = 0; i < 20; i++) {
// printf("%d,", buf_in[i + input_CBoffset * jj + 2 * Zc]);
// }
// }
// printf("input setting done\n");
#endif // Input Setting FPGA
// LDPC accelerator start
// printf("[%s] Start DMA write\n", __func__);
// clock_gettime(CLOCK_MONOTONIC, &ts_start); // time start
// ===================================================
// printf("[%s] DMA write 0\n", __func__);
// write into accelerator
// clock_gettime(CLOCK_MONOTONIC, &write_start);
if (test_dma_dec_write(buf_in, Confparam) != 0) {
exit(1);
printf("write exit!!\n");
}
// clock_gettime(CLOCK_MONOTONIC, &write_end);
// timespec_sub(&write_end, &write_start);
// printf("Write time %.2f µsec\n", (float)(write_end.tv_nsec) / 1000);
// ===================================================
// printf("[%s] DMA read 0\n", __func__);
// read output of accelerator
// clock_gettime(CLOCK_MONOTONIC, &read_start);
if (test_dma_dec_read(buf_out, Confparam) != 0) {
exit(1);
printf("read exit!!\n");
}
// clock_gettime(CLOCK_MONOTONIC, &read_end);
// timespec_sub(&read_end, &read_start);
// printf("[1]Read time %.2f µsec\n", (float)(read_end.tv_nsec) / 1000);
// // ===================================================
// clock_gettime(CLOCK_MONOTONIC, &ts_end); // time end
// printf("[%s] End DMA read\n", __func__);
// LDPC accelerator end
// timespec_sub(&ts_end, &ts_start);
// printf("[%s] finish DMA, CB_num[%d], total time %ld nsec\n", __func__, CB_num, ts_end.tv_nsec);
#if 1 // Output Setting FPGA
// set output out_MessageBytes from the xdma output (buffer_out) , iterationAtTermination , parityPassedAtTermination
for (jj = 0; jj < CB_num; jj++) {
if ((numMsgBits & 0x7) == 0)
numMsgBytes = numMsgBits / 8;
else{
numMsgBytes = (numMsgBits / 8) + 1;
}
iterationAtTermination = 1; // output
parityPassedAtTermination = 1; // output
// memcpy((int8_t*)&buf_out[output_CBoffset * jj], (int8_t*)&buffer_out[output_CBoffset * jj], numMsgBytes);
// -----------------------------------
// Compare output information:
// -----------------------------------
// printf("buffer_out[%d] = ", jj);
// for (i = 0; i < 10; i++) {
// printf("%d, ", buffer_out[i + output_CBoffset * jj]);
// }
// printf("\n");
// printf("buf_out[%d] = ", jj);
// for (i = 0; i < 10; i++) {
// printf("%d, ", buf_out[i + output_CBoffset * jj]);
// }
// printf("\n");
}
// printf("[%s] Output setting done\n", __func__);
#endif // Output Setting FPGA
// clock_gettime(CLOCK_MONOTONIC, &ts_end0); // time end0
// timespec_sub(&ts_end0, &ts_start0);
// printf("[%s] finish LDPC, CB_num[%d], total time %ld nsec\n", __func__, CB_num, ts_end0.tv_nsec);
// printf("Accelerator card is completed!\n");
return 0;
}
#endif
/*! \file PHY/CODING/nrLDPC_decoder_offload_xdma/nrLDPC_decoder_offload_xdma.h
* \briefFPGA accelerator integrated into OAI (for one and multi code block)
* \author Sendren Xu, SY Yeh(fdragon), Hongming, Terng-Yin Hsu
* \date 2022-05-31
* \version 5.0
* \email: summery19961210@gmail.com
*/
#include <stdint.h>
/**
\brief LDPC input parameter
\param Zc shifting size
\param Rows
\param baseGraph base graph
\param CB_num number of code block
\param numChannelLlrs input soft bits length, Zc x 66 - length of filler bits
\param numFillerBits filler bits length
*/
typedef struct {
unsigned char max_schedule;
unsigned char SetIdx;
int Zc;
unsigned char numCB;
unsigned char BG;
unsigned char max_iter;
int nRows;
int numChannelLls;
int numFillerBits;
} DecIFConf;
int nrLDPC_decoder_FPGA_8038(int8_t *buf_in, int8_t *buf_out, DecIFConf dec_conf);
int nrLDPC_decoder_FPGA_PYM(int8_t *buf_in, int8_t *buf_out, DecIFConf dec_conf);
// int nrLDPC_decoder_FPGA_PYM();
/*
* This file is part of the Xilinx DMA IP Core driver tools for Linux
*
* Copyright (c) 2016-present, Xilinx, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef MODULES_TXCTRL_INC_XDMA_DIAG_H_
#define MODULES_TXCTRL_INC_XDMA_DIAG_H_
#ifdef __cplusplus
extern "C" {
#endif
// #define _BSD_SOURCE
// #define _XOPEN_SOURCE 500
//#include "../../LDPC/LDPC_api.h"
//#include "dma_utils.c"
/*static struct option const long_opts[] = {
{"device", required_argument, NULL, 'd'},
{"address", required_argument, NULL, 'a'},
{"size", required_argument, NULL, 's'},
{"offset", required_argument, NULL, 'o'},
{"count", required_argument, NULL, 'c'},
{"data infile", required_argument, NULL, 'f'},
{"data outfile", required_argument, NULL, 'w'},
{"help", no_argument, NULL, 'h'},
{"verbose", no_argument, NULL, 'v'},
{0, 0, 0, 0}
};*/
typedef struct {
unsigned char max_schedule; // max_schedule = 0;
unsigned char mb; // mb = 32;
unsigned char CB_num; // id = CB_num;
unsigned char BGSel; // bg = 1;
unsigned char z_set; // z_set = 0;
unsigned char z_j; // z_j = 6;
unsigned char max_iter; // max_iter = 8;
unsigned char SetIdx; // sc_idx = 12;
} DecIPConf;
typedef struct {
int SetIdx;
int NumCBSegm;
int PayloadLen;
int Z;
int z_set;
int z_j;
int Kbmax;
int BGSel;
unsigned mb;
unsigned char CB_num;
unsigned char kb_1;
} EncIPConf;
/* ltoh: little to host */
/* htol: little to host */
#if __BYTE_ORDER == __LITTLE_ENDIAN
#define ltohl(x) (x)
#define ltohs(x) (x)
#define htoll(x) (x)
#define htols(x) (x)
#elif __BYTE_ORDER == __BIG_ENDIAN
#define ltohl(x) __bswap_32(x)
#define ltohs(x) __bswap_16(x)
#define htoll(x) __bswap_32(x)
#define htols(x) __bswap_16(x)
#endif
#define FATAL \
do { \
fprintf(stderr, "Error at line %d, file %s (%d) [%s]\n", __LINE__, __FILE__, errno, strerror(errno)); \
exit(1); \
} while (0)
#define MAP_SIZE (32 * 1024UL)
#define MAP_MASK (MAP_SIZE - 1)
#define DEVICE_NAME_DEFAULT_ENC_READ "/dev/xdma0_c2h_1"
#define DEVICE_NAME_DEFAULT_ENC_WRITE "/dev/xdma0_h2c_1"
#define DEVICE_NAME_DEFAULT_DEC_READ "/dev/xdma0_c2h_0"
#define DEVICE_NAME_DEFAULT_DEC_WRITE "/dev/xdma0_h2c_0"
#define SIZE_DEFAULT (32)
#define COUNT_DEFAULT (1)
#define OFFSET_DEC_IN 0x0000
#define OFFSET_DEC_OUT 0x0004
#define OFFSET_ENC_IN 0x0008
#define OFFSET_ENC_OUT 0x000c
#define OFFSET_RESET 0x0020
#define PCIE_OFF 0x0030
#define CB_PROCESS_NUMBER 24 // add by JW
#define CB_PROCESS_NUMBER_Dec 24
// dma_from_device.c
int test_dma_enc_read(char *EncOut, EncIPConf Confparam);
int test_dma_enc_write(char *data, EncIPConf Confparam);
int test_dma_dec_read(char *DecOut, DecIPConf Confparam);
int test_dma_dec_write(char *data, DecIPConf Confparam);
void test_dma_init();
void test_dma_shutdown();
void dma_reset();
#ifdef __cplusplus
}
#endif
#endif
...@@ -37,6 +37,7 @@ ...@@ -37,6 +37,7 @@
#include "PHY/CODING/coding_defs.h" #include "PHY/CODING/coding_defs.h"
#include "PHY/CODING/lte_interleaver_inline.h" #include "PHY/CODING/lte_interleaver_inline.h"
#include "PHY/CODING/nrLDPC_extern.h" #include "PHY/CODING/nrLDPC_extern.h"
#include "PHY/CODING/nrLDPC_decoder/nrLDPC_decoder_offload_xdma.h"
#include "PHY/NR_TRANSPORT/nr_transport_common_proto.h" #include "PHY/NR_TRANSPORT/nr_transport_common_proto.h"
#include "PHY/NR_TRANSPORT/nr_transport_proto.h" #include "PHY/NR_TRANSPORT/nr_transport_proto.h"
#include "PHY/NR_TRANSPORT/nr_ulsch.h" #include "PHY/NR_TRANSPORT/nr_ulsch.h"
...@@ -317,6 +318,433 @@ int decode_offload(PHY_VARS_gNB *phy_vars_gNB, ...@@ -317,6 +318,433 @@ int decode_offload(PHY_VARS_gNB *phy_vars_gNB,
return 0; return 0;
} }
/*!
* \typedef args_fpga_decode_prepare_t
* \struct args_fpga_decode_prepare_s
* \brief arguments structure for passing arguments to the nr_ulsch_FPGA_decoding_prepare_blocks function
*
* \var multi_indata
* pointer to the head of the block destination array that is then passed to the FPGA decoding
* \var no_iteration_ldpc
* pointer to the number of iteration set by this function
* \var r_first
* index of the first block to be prepared within this function
* \var r_span
* number of blocks to be prepared within this function
* \var n_layers
* number of MIMO layers
* \var G
* number of soft channel bits
* \var ulsch_harq
* harq process information
* \var decoderParams
* decoder parameters
* \var ulsch
* uplink shared channel information
* \var ulsch_llr
* pointer to the head of the block source array
* \var rv_index
* an argument of rate dematching
* \var E
* size of the block between deinterleaving and rate matching
* \var Qm
* modulation order
* \var r_offset
* r index expressed in bits
* \var tbslbrm
* an argument of rate dematching
*
*/
typedef struct args_fpga_decode_prepare_s
{
int8_t *multi_indata;
int no_iteration_ldpc;
uint32_t r_first;
uint32_t r_span;
uint8_t n_layers;
uint32_t G;
NR_UL_gNB_HARQ_t *ulsch_harq;
t_nrLDPC_dec_params *decoderParms;
NR_gNB_ULSCH_t *ulsch;
short* ulsch_llr;
int rv_index;
int E;
int Qm;
int r_offset;
uint32_t tbslbrm;
} args_fpga_decode_prepare_t;
/*!
* \fn nr_ulsch_FPGA_decoding_prepare_blocks(void *args)
* \brief prepare blocks for LDPC decoding on FPGA
*
* \param args pointer to the arguments of the function in a structure of type args_fpga_decode_prepare_t
*
*/
void nr_ulsch_FPGA_decoding_prepare_blocks(void *args)
{
//extract the arguments
args_fpga_decode_prepare_t *arguments = (args_fpga_decode_prepare_t *)args;
int8_t *multi_indata = arguments->multi_indata;
int no_iteration_ldpc = arguments->no_iteration_ldpc;
uint32_t r_first = arguments->r_first;
uint32_t r_span = arguments->r_span;
uint8_t n_layers = arguments->n_layers;
uint32_t G = arguments->G;
short* ulsch_llr = arguments->ulsch_llr;
NR_UL_gNB_HARQ_t *harq_process = arguments->ulsch_harq;
t_nrLDPC_dec_params *decParams = arguments->decoderParms;
NR_gNB_ULSCH_t *ulsch = arguments->ulsch;
int E = arguments->E;
int Qm = arguments->Qm;
uint32_t r_offset = arguments->r_offset;
/*
* extract additional required information
*
* Kr number of bits per block
*
* initialise other required variables
*
* dtx_det
* input_CBoffset
* kc
* K_bits_F
*
*/
int Kr = harq_process->K;
uint8_t dtx_det = 0;
int mbmb = 0;
if (decParams->BG == 1)
mbmb = 68;
else
mbmb = 52;
// Calc input CB offset
int input_CBoffset = decParams->Z * mbmb * 8;
if ((input_CBoffset & 0x7F) == 0)
input_CBoffset = input_CBoffset / 8;
else
input_CBoffset = 16 * ((input_CBoffset / 128) + 1);
int kc;
if (decParams->BG == 2) {
kc = 52;
} else {
kc = 68;
}
int K_bits_F = Kr - harq_process->F;
int16_t z[68 * 384 + 16] __attribute__((aligned(16)));
simde__m128i *pv = (simde__m128i *)&z;
/*
* the function processes r_span blocks starting from block at index r_first in ulsch_llr
*/
for(uint32_t r = r_first; r < ( r_first + r_span ); r++)
{
E = nr_get_E(G, harq_process->C, Qm, n_layers, r);
// ----------------------- FPGA pre process ------------------------
simde__m128i ones = simde_mm_set1_epi8(255); // Generate a vector with all elements set to 255
simde__m128i *temp_multi_indata = (simde__m128i *)&multi_indata[r * input_CBoffset];
// -----------------------------------------------------------------
// code blocks after bit selection in rate matching for LDPC code (38.212 V15.4.0 section 5.4.2.1)
int16_t harq_e[E];
// -------------------------------------------------------------------------------------------
// deinterleaving
// -------------------------------------------------------------------------------------------
//start_meas(&phy_vars_gNB->ulsch_deinterleaving_stats);
nr_deinterleaving_ldpc(E, Qm, harq_e, ulsch_llr + r_offset);
//stop_meas(&phy_vars_gNB->ulsch_deinterleaving_stats);
// -------------------------------------------------------------------------------------------
// dematching
// -------------------------------------------------------------------------------------------
//start_meas(&phy_vars_gNB->ulsch_rate_unmatching_stats);
if (nr_rate_matching_ldpc_rx(arguments->tbslbrm,
decParams->BG,
decParams->Z,
harq_process->d[r],
harq_e,
harq_process->C,
arguments->rv_index,
harq_process->d_to_be_cleared[r],
E,
harq_process->F,
Kr - harq_process->F - 2 * (decParams->Z)
) == -1)
{
//stop_meas(&phy_vars_gNB->ulsch_rate_unmatching_stats);
LOG_E(PHY, "ulsch_decoding.c: Problem in rate_matching\n");
no_iteration_ldpc = ulsch->max_ldpc_iterations + 1;
return;
} else {
//stop_meas(&phy_vars_gNB->ulsch_rate_unmatching_stats);
}
harq_process->d_to_be_cleared[r] = false;
memset(harq_process->c[r], 0, Kr >> 3);
// set first 2*Z_c bits to zeros
memset(&z[0], 0, 2 * harq_process->Z * sizeof(int16_t));
// set Filler bits
memset((&z[0] + K_bits_F), 127, harq_process->F * sizeof(int16_t));
// Move coded bits before filler bits
memcpy((&z[0] + 2 * harq_process->Z), harq_process->d[r], (K_bits_F - 2 * harq_process->Z) * sizeof(int16_t));
// skip filler bits
memcpy((&z[0] + Kr), harq_process->d[r] + (Kr - 2 * harq_process->Z), (kc * harq_process->Z - Kr) * sizeof(int16_t));
// Saturate coded bits before decoding into 8 bits values
for (int i = 0, j = 0; j < ((kc * harq_process->Z) >> 4); i += 2, j++) {
temp_multi_indata[j] = simde_mm_xor_si128(simde_mm_packs_epi16(pv[i], pv[i + 1]), simde_mm_cmpeq_epi32(ones, ones)); // Perform NOT operation and write the result to temp_multi_indata[j]
}
// the last bytes before reaching "kc * harq_process->Z" should not be written 128 bits at a time to avoid overwritting the following block in multi_indata
simde__m128i tmp = simde_mm_xor_si128(simde_mm_packs_epi16(pv[2*((kc * harq_process->Z) >> 4)], pv[2*((kc * harq_process->Z) >> 4) + 1]), simde_mm_cmpeq_epi32(ones, ones)); // Perform NOT operation and write the result to temp_multi_indata[j]
int8_t *tmp_p = (int8_t *)&tmp;
for (int i = 0, j = ((kc * harq_process->Z)&0xfffffff0); j < kc * harq_process->Z; i++, j++) {
multi_indata[r * input_CBoffset + j] = tmp_p[i];
}
r_offset += E;
}
arguments->no_iteration_ldpc=no_iteration_ldpc;
}
int decode_xdma(PHY_VARS_gNB *phy_vars_gNB,
uint8_t ULSCH_id,
short *ulsch_llr,
nfapi_nr_pusch_pdu_t *pusch_pdu,
t_nrLDPC_dec_params *decParams,
uint32_t frame,
uint8_t nr_tti_rx,
uint8_t harq_pid,
uint32_t G)
{
NR_gNB_ULSCH_t *ulsch = &phy_vars_gNB->ulsch[ULSCH_id];
NR_UL_gNB_HARQ_t *harq_process = ulsch->harq_process;
uint8_t Qm = pusch_pdu->qam_mod_order;
uint8_t n_layers = pusch_pdu->nrOfLayers;
const int Kr = harq_process->K;
const int Kr_bytes = Kr >> 3;
uint32_t A = (harq_process->TBS) << 3;
const int kc = decParams->BG == 2 ? 52 : 68;
ulsch->max_ldpc_iterations = 20;
int r_offset = 0, offset = 0;
//LDPC decode is offloaded to FPGA using the xdma driver
int K_bits_F = Kr - harq_process->F;
//-------------------- FPGA parameter preprocessing ---------------------
static int8_t multi_indata[27000 * 25]; // FPGA input data
static int8_t multi_outdata[1100 * 25]; // FPGA output data
int mbmb = 0;
if (decParams->BG == 1)
mbmb = 68;
else
mbmb = 52;
int bg_len = 0;
if (decParams->BG == 1)
bg_len = 22;
else
bg_len = 10;
// Calc input CB offset
int input_CBoffset = decParams->Z * mbmb * 8;
if ((input_CBoffset & 0x7F) == 0)
input_CBoffset = input_CBoffset / 8;
else
input_CBoffset = 16 * ((input_CBoffset / 128) + 1);
DecIFConf dec_conf;
dec_conf.Zc = decParams->Z;
dec_conf.BG = decParams->BG;
dec_conf.max_iter = decParams->numMaxIter;
dec_conf.numCB = harq_process->C;
dec_conf.numChannelLls = (K_bits_F - 2 * harq_process->Z) + (kc * harq_process->Z - Kr); // input soft bits length, Zc x 66 - length of filler bits
dec_conf.numFillerBits = harq_process->F; // filler bits length
dec_conf.max_iter = 8;
dec_conf.max_schedule = 0;
dec_conf.SetIdx = 12;
// dec_conf.max_iter = 8;
if (dec_conf.BG == 1)
dec_conf.nRows = 46;
else
dec_conf.nRows = 42;
int out_CBoffset = dec_conf.Zc * bg_len;
if ((out_CBoffset & 0x7F) == 0)
out_CBoffset = out_CBoffset / 8;
else
out_CBoffset = 16 * ((out_CBoffset / 128) + 1);
#ifdef LDPC_DATA
printf("\n------------------------\n");
printf("BG:\t\t%d\n", dec_conf.BG);
printf("harq_process->B: %d\n", harq_process->B);
printf("harq_process->C: %d\n", harq_process->C);
printf("harq_process->K: %d\n", harq_process->K);
printf("harq_process->Z: %d\n", harq_process->Z);
printf("harq_process->F: %d\n", harq_process->F);
printf("numChannelLls:\t %d = (%d - 2 * %d) + (%d * %d - %d)\n", dec_conf.numChannelLls, K_bits_F, harq_process->Z, kc, harq_process->Z, Kr);
printf("numFillerBits:\t %d\n", harq_process->F);
printf("------------------------\n");
// ===================================
// debug mode
// ===================================
FILE *fptr_llr, *fptr_ldpc;
fptr_llr = fopen("../../../cmake_targets/log/ulsim_ldpc_llr.txt", "w");
fptr_ldpc = fopen("../../../cmake_targets/log/ulsim_ldpc_output.txt", "w");
// ===================================
#endif
//----------------------------------------------------------------------
int length_dec = lenWithCrc(harq_process->C, A);
uint8_t crc_type = crcType(harq_process->C, A);
int no_iteration_ldpc = 2;
uint8_t dtx_det = 0;
uint32_t num_threads_prepare_max = &phy_vars_gNB->ldpc_xdma_number_threads_predecoding;
uint32_t num_threads_prepare = 0;
uint32_t r_remaining = 0;
//start the prepare jobs
for (uint32_t r = 0; r < harq_process->C; r++) {
int E = nr_get_E(G, harq_process->C, Qm, n_layers, r);
if (r_remaining == 0 ) {
void (*nr_ulsch_FPGA_decoding_prepare_blocks_ptr)(void *) = &nr_ulsch_FPGA_decoding_prepare_blocks;
union ldpcReqUnion id = {.s={ulsch->rnti,frame,nr_tti_rx,0,0}};
notifiedFIFO_elt_t *req = newNotifiedFIFO_elt(sizeof(args_fpga_decode_prepare_t), id.p, &phy_vars_gNB->respDecode, nr_ulsch_FPGA_decoding_prepare_blocks_ptr);
args_fpga_decode_prepare_t * args = (args_fpga_decode_prepare_t *) NotifiedFifoData(req);
args->multi_indata = multi_indata;
args->no_iteration_ldpc = 2;
args->r_first = r;
uint32_t r_span_max = ((harq_process->C-r)%(num_threads_prepare_max-num_threads_prepare))==0 ? (harq_process->C-r)/(num_threads_prepare_max-num_threads_prepare) : ((harq_process->C-r)/(num_threads_prepare_max-num_threads_prepare))+1 ;
uint32_t r_span = harq_process->C-r<r_span_max ? harq_process->C-r : r_span_max;
args->r_span = r_span;
r_remaining = r_span;
args->n_layers = n_layers;
args->G = G;
args->ulsch_harq = harq_process;
args->decoderParms = decParams;
args->ulsch = ulsch;
args->ulsch_llr = ulsch_llr;
args->rv_index = pusch_pdu->pusch_data.rv_index;
args->E = E;
args->Qm = Qm;
args->r_offset = r_offset;
args->tbslbrm = pusch_pdu->maintenance_parms_v3.tbSizeLbrmBytes;
pushTpool(&phy_vars_gNB->threadPool, req);
LOG_D(PHY, "Added %d block(s) to prepare for decoding, in pipe: %d to %d\n", r_span, r, r+r_span-1);
num_threads_prepare++;
}
r_offset += E;
offset += (Kr_bytes - (harq_process->F >> 3) - ((harq_process->C > 1) ? 3 : 0));
r_remaining -= 1;
//////////////////////////////////////////////////////////////////////////////////////////
}
//reset offset in order to properly fill the output array later
offset = 0;
//wait for the prepare jobs to complete
while(num_threads_prepare>0){
notifiedFIFO_elt_t *req = (notifiedFIFO_elt_t *)pullTpool(&phy_vars_gNB->respDecode, &phy_vars_gNB->threadPool);
if (req == NULL)
LOG_E(PHY, "FPGA decoding preparation: pullTpool returned NULL\n");
args_fpga_decode_prepare_t *args = (args_fpga_decode_prepare_t *)NotifiedFifoData(req);
if (args->no_iteration_ldpc > ulsch->max_ldpc_iterations)
no_iteration_ldpc = ulsch->max_ldpc_iterations + 1;
num_threads_prepare -= 1;
}
//launch decode with FPGA
// printf("Run the LDPC ------[FPGA version]------\n");
//==================================================================
// Xilinx FPGA LDPC decoding function -> nrLDPC_decoder_FPGA_PYM()
//==================================================================
start_meas(&phy_vars_gNB->ulsch_ldpc_decoding_stats);
nrLDPC_decoder_FPGA_PYM((int8_t *)&multi_indata[0], (int8_t *)&multi_outdata[0], dec_conf);
// printf("Xilinx FPGA -> CB = %d\n", harq_process->C);
// nrLDPC_decoder_FPGA_PYM((int8_t *)&temp_multi_indata[0], (int8_t *)&multi_outdata[0], dec_conf);
stop_meas(&phy_vars_gNB->ulsch_ldpc_decoding_stats);
for (uint32_t r = 0; r < harq_process->C; r++) {
// -----------------------------------------------------------------------------------------------
// --------------------- copy FPGA output to harq_process->c[r][i] -------------------------------
// -----------------------------------------------------------------------------------------------
if (check_crc((uint8_t *)multi_outdata, length_dec, crc_type)) {
#ifdef PRINT_CRC_CHECK
LOG_I(PHY, "Segment %d CRC OK\n", r);
#endif
no_iteration_ldpc = 2;
} else {
#ifdef PRINT_CRC_CHECK
LOG_I(PHY, "segment %d CRC NOK\n", r);
#endif
no_iteration_ldpc = ulsch->max_ldpc_iterations + 1;
}
for (int i = 0; i < out_CBoffset; i++) {
harq_process->c[r][i] = (uint8_t)multi_outdata[i + r * out_CBoffset];
}
bool decodeSuccess = (no_iteration_ldpc <= ulsch->max_ldpc_iterations);
if (decodeSuccess) {
memcpy(harq_process->b + offset, harq_process->c[r], Kr_bytes - (harq_process->F >> 3) - ((harq_process->C > 1) ? 3 : 0));
offset += (Kr_bytes - (harq_process->F >> 3) - ((harq_process->C > 1) ? 3 : 0));
harq_process->processedSegments++;
} else {
LOG_D(PHY, "uplink segment error %d/%d\n", r, harq_process->C);
LOG_D(PHY, "ULSCH %d in error\n", ULSCH_id);
break; // don't even attempt to decode other segments
}
}
VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_PHY_gNB_ULSCH_DECODING, 0);
if (harq_process->processedSegments == harq_process->C) {
LOG_D(PHY, "[gNB %d] ULSCH: Setting ACK for slot %d TBS %d\n", phy_vars_gNB->Mod_id, ulsch->slot, harq_process->TBS);
ulsch->active = false;
harq_process->round = 0;
LOG_D(PHY, "ULSCH received ok \n");
nr_fill_indication(phy_vars_gNB, ulsch->frame, ulsch->slot, ULSCH_id, harq_pid, 0, 0);
} else {
LOG_D(PHY,
"[gNB %d] ULSCH: Setting NAK for SFN/SF %d/%d (pid %d, status %d, round %d, TBS %d)\n",
phy_vars_gNB->Mod_id,
ulsch->frame,
ulsch->slot,
harq_pid,
ulsch->active,
harq_process->round,
harq_process->TBS);
ulsch->handled = 1;
no_iteration_ldpc = ulsch->max_ldpc_iterations + 1;
LOG_D(PHY, "ULSCH %d in error\n", ULSCH_id);
nr_fill_indication(phy_vars_gNB, ulsch->frame, ulsch->slot, ULSCH_id, harq_pid, 1, 0);
}
ulsch->last_iteration_cnt = no_iteration_ldpc;
}
int nr_ulsch_decoding(PHY_VARS_gNB *phy_vars_gNB, int nr_ulsch_decoding(PHY_VARS_gNB *phy_vars_gNB,
uint8_t ULSCH_id, uint8_t ULSCH_id,
short *ulsch_llr, short *ulsch_llr,
...@@ -430,6 +858,9 @@ int nr_ulsch_decoding(PHY_VARS_gNB *phy_vars_gNB, ...@@ -430,6 +858,9 @@ int nr_ulsch_decoding(PHY_VARS_gNB *phy_vars_gNB,
if (phy_vars_gNB->ldpc_offload_flag) if (phy_vars_gNB->ldpc_offload_flag)
return decode_offload(phy_vars_gNB, ULSCH_id, ulsch_llr, pusch_pdu, &decParams, harq_pid, G); return decode_offload(phy_vars_gNB, ULSCH_id, ulsch_llr, pusch_pdu, &decParams, harq_pid, G);
if (phy_vars_gNB->ldpc_xdma_flag)
return decode_xdma(phy_vars_gNB, ULSCH_id, ulsch_llr, pusch_pdu, &decParams, frame, nr_tti_rx, harq_pid, G);
uint32_t offset = 0, r_offset = 0; uint32_t offset = 0, r_offset = 0;
set_abort(&harq_process->abort_decode, false); set_abort(&harq_process->abort_decode, false);
for (int r = 0; r < harq_process->C; r++) { for (int r = 0; r < harq_process->C; r++) {
......
...@@ -640,6 +640,10 @@ typedef struct PHY_VARS_gNB_s { ...@@ -640,6 +640,10 @@ typedef struct PHY_VARS_gNB_s {
int ldpc_offload_flag; int ldpc_offload_flag;
int ldpc_xdma_flag;
// Number of threads created for deinterleaving and rate dematching with XDMA decoding
int ldpc_xdma_number_threads_predecoding;
int reorder_thread_disable; int reorder_thread_disable;
int max_ldpc_iterations; int max_ldpc_iterations;
......
...@@ -187,6 +187,8 @@ int main(int argc, char *argv[]) ...@@ -187,6 +187,8 @@ int main(int argc, char *argv[])
double effTP; double effTP;
float eff_tp_check = 100; float eff_tp_check = 100;
int ldpc_offload_flag = 0; int ldpc_offload_flag = 0;
int ldpc_xdma_flag = 0;
int ldpc_xdma_number_threads_predecoding = 1;
uint8_t max_rounds = 4; uint8_t max_rounds = 4;
int chest_type[2] = {0}; int chest_type[2] = {0};
int enable_ptrs = 0; int enable_ptrs = 0;
...@@ -223,7 +225,7 @@ int main(int argc, char *argv[]) ...@@ -223,7 +225,7 @@ int main(int argc, char *argv[])
/* initialize the sin-cos table */ /* initialize the sin-cos table */
InitSinLUT(); InitSinLUT();
while ((c = getopt(argc, argv, "a:b:c:d:ef:g:h:i:k:m:n:op:q:r:s:t:u:v:w:y:z:C:F:G:H:I:M:N:PR:S:T:U:L:ZW:E:X:")) != -1) { while ((c = getopt(argc, argv, "a:b:c:d:ef:g:h:i:k:m:n:o:op:q:r:s:t:u:v:w:y:z:C:F:G:H:I:M:N:PR:S:T:U:L:ZW:E:X:")) != -1) {
printf("handling optarg %c\n",c); printf("handling optarg %c\n",c);
switch (c) { switch (c) {
...@@ -331,7 +333,22 @@ int main(int argc, char *argv[]) ...@@ -331,7 +333,22 @@ int main(int argc, char *argv[])
break; break;
case 'o': case 'o':
ldpc_offload_flag = 1; //switch on optarg to choose the offload option
switch ((char) *optarg) {
case '1':
ldpc_offload_flag = 1;
break;
case '2':
ldpc_xdma_flag = 1;
if (optarg[1]=',') {
ldpc_xdma_number_threads_predecoding = atoi(&optarg[2]);
} else {
printf("Expecting \"-o 2,<number of predecoding threads>\"! Falling back to one predecoding thread\n");
}
break;
default:
printf("Invalid offload mode!\n");
}
break; break;
case 'p': case 'p':
...@@ -493,7 +510,7 @@ int main(int argc, char *argv[]) ...@@ -493,7 +510,7 @@ int main(int argc, char *argv[])
printf("-k 3/4 sampling\n"); printf("-k 3/4 sampling\n");
printf("-m MCS value\n"); printf("-m MCS value\n");
printf("-n Number of trials to simulate\n"); printf("-n Number of trials to simulate\n");
printf("-o ldpc offload flag\n"); printf("-o <mode, 1(T1/T2), 2(XDMA)>[,<number of predecoding threads (mode 2)>] ldpc offload flag\n");
printf("-p Use extended prefix mode\n"); printf("-p Use extended prefix mode\n");
printf("-q MCS table\n"); printf("-q MCS table\n");
printf("-r Number of allocated resource blocks for PUSCH\n"); printf("-r Number of allocated resource blocks for PUSCH\n");
...@@ -630,6 +647,9 @@ int main(int argc, char *argv[]) ...@@ -630,6 +647,9 @@ int main(int argc, char *argv[])
// nr_phy_config_request_sim(gNB,N_RB_DL,N_RB_DL,mu,0,0x01); // nr_phy_config_request_sim(gNB,N_RB_DL,N_RB_DL,mu,0,0x01);
gNB->ldpc_offload_flag = ldpc_offload_flag; gNB->ldpc_offload_flag = ldpc_offload_flag;
gNB->ldpc_xdma_flag = ldpc_xdma_flag;
gNB->ldpc_xdma_number_threads_predecoding = ldpc_xdma_number_threads_predecoding;
AssertFatal(gNB->ldpc_xdma_number_threads_predecoding > 0, "Number of predecoding threads should be positive");
gNB->chest_freq = chest_type[0]; gNB->chest_freq = chest_type[0];
gNB->chest_time = chest_type[1]; gNB->chest_time = chest_type[1];
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment