Commit 6bec8779 authored by Jaroslava Fiedlerova's avatar Jaroslava Fiedlerova

Enhance T2 Offload for gNB and UE

- Reworked nrLDPC_decoder_offload.c to process all segments in a TB with a single call to LDPCdecoder() or LDPCencoder().
- Perform CRC checks on the T2.
- Modified nr_ulsch_decoding (gNB decoder) to:
  - Count processed segments.
  - Properly perform CRC checks.
- Created a constant NR_LDPC_MAX_NUM_CB to define the maximum number of codeblocks.
- Conditional library loading:
  - On gNB: Load ldpc_t2 library if offload is enabled using the --ldpc-offload-enable flag.
  - On UE: Load both ldpc_t2 and ldpc libraries since only encoder offload is supported.
- General cleanup of nrLDPC_decoder_offload.c for improved readability and maintainability.
- Modified the structure of LDPC encoder/decoder/offload parameters:
  - Introduced a structure for code block (CB) related parameters.
  - Removed parameter E from the encoder and offload parameter structures for clarity.
  - Replaced E with perCB->E_cb in the UE encoder code.
parent e5fc1ebd
...@@ -286,6 +286,7 @@ void set_options(int CC_id, PHY_VARS_NR_UE *UE){ ...@@ -286,6 +286,7 @@ void set_options(int CC_id, PHY_VARS_NR_UE *UE){
UE->rf_map.card = card_offset; UE->rf_map.card = card_offset;
UE->rf_map.chain = CC_id + chain_offset; UE->rf_map.chain = CC_id + chain_offset;
UE->max_ldpc_iterations = nrUE_params.max_ldpc_iterations; UE->max_ldpc_iterations = nrUE_params.max_ldpc_iterations;
UE->ldpc_offload_enable = nrUE_params.ldpc_offload_flag;
UE->UE_scan_carrier = nrUE_params.UE_scan_carrier; UE->UE_scan_carrier = nrUE_params.UE_scan_carrier;
UE->UE_fo_compensation = nrUE_params.UE_fo_compensation; UE->UE_fo_compensation = nrUE_params.UE_fo_compensation;
UE->if_freq = nrUE_params.if_freq; UE->if_freq = nrUE_params.if_freq;
...@@ -491,7 +492,11 @@ int main(int argc, char **argv) ...@@ -491,7 +492,11 @@ int main(int argc, char **argv)
cpuf=get_cpu_freq_GHz(); cpuf=get_cpu_freq_GHz();
itti_init(TASK_MAX, tasks_info); itti_init(TASK_MAX, tasks_info);
init_opt() ; init_opt();
if (nrUE_params.ldpc_offload_flag)
load_LDPClib("_t2", &ldpc_interface_offload);
load_LDPClib(NULL, &ldpc_interface); load_LDPClib(NULL, &ldpc_interface);
if (ouput_vcd) { if (ouput_vcd) {
......
...@@ -37,6 +37,7 @@ ...@@ -37,6 +37,7 @@
{"dlsch-parallel", CONFIG_HLP_DLSCH_PARA, 0, .u8ptr=NULL, .defintval=0, TYPE_UINT8, 0}, \ {"dlsch-parallel", CONFIG_HLP_DLSCH_PARA, 0, .u8ptr=NULL, .defintval=0, TYPE_UINT8, 0}, \
{"offset-divisor", CONFIG_HLP_OFFSET_DIV, 0, .uptr=&nrUE_params.ofdm_offset_divisor, .defuintval=8, TYPE_UINT32, 0}, \ {"offset-divisor", CONFIG_HLP_OFFSET_DIV, 0, .uptr=&nrUE_params.ofdm_offset_divisor, .defuintval=8, TYPE_UINT32, 0}, \
{"max-ldpc-iterations", CONFIG_HLP_MAX_LDPC_ITERATIONS, 0, .iptr=&nrUE_params.max_ldpc_iterations, .defuintval=8, TYPE_UINT8, 0}, \ {"max-ldpc-iterations", CONFIG_HLP_MAX_LDPC_ITERATIONS, 0, .iptr=&nrUE_params.max_ldpc_iterations, .defuintval=8, TYPE_UINT8, 0}, \
{"ldpc-offload-enable", CONFIG_HLP_LDPC_OFFLOAD, PARAMFLAG_BOOL, .iptr=&(nrUE_params.ldpc_offload_flag), .defintval=0, TYPE_INT, 0}, \
{"nr-dlsch-demod-shift", CONFIG_HLP_DLSHIFT, 0, .iptr=(int32_t *)&nr_dlsch_demod_shift, .defintval=0, TYPE_INT, 0}, \ {"nr-dlsch-demod-shift", CONFIG_HLP_DLSHIFT, 0, .iptr=(int32_t *)&nr_dlsch_demod_shift, .defintval=0, TYPE_INT, 0}, \
{"V" , CONFIG_HLP_VCD, PARAMFLAG_BOOL, .iptr=&vcdflag, .defintval=0, TYPE_INT, 0}, \ {"V" , CONFIG_HLP_VCD, PARAMFLAG_BOOL, .iptr=&vcdflag, .defintval=0, TYPE_INT, 0}, \
{"uecap_file", CONFIG_HLP_UECAP_FILE, 0, .strptr=&uecap_file, .defstrval="./uecap_ports1.xml", TYPE_STRING, 0}, \ {"uecap_file", CONFIG_HLP_UECAP_FILE, 0, .strptr=&uecap_file, .defstrval="./uecap_ports1.xml", TYPE_STRING, 0}, \
...@@ -80,6 +81,7 @@ typedef struct { ...@@ -80,6 +81,7 @@ typedef struct {
int nb_antennas_tx; int nb_antennas_tx;
int N_RB_DL; int N_RB_DL;
int ssb_start_subcarrier; int ssb_start_subcarrier;
int ldpc_offload_flag;
} nrUE_params_t; } nrUE_params_t;
extern uint64_t get_nrUE_optmask(void); extern uint64_t get_nrUE_optmask(void);
extern uint64_t set_nrUE_optmask(uint64_t bitmask); extern uint64_t set_nrUE_optmask(uint64_t bitmask);
......
...@@ -259,7 +259,7 @@ one_measurement_t test_ldpc(short max_iterations, ...@@ -259,7 +259,7 @@ one_measurement_t test_ldpc(short max_iterations,
printf("To: %d\n", (Kb + nrows - no_punctured_columns) * Zc - removed_bit); printf("To: %d\n", (Kb + nrows - no_punctured_columns) * Zc - removed_bit);
printf("number of undecoded bits: %d\n", (Kb + nrows - no_punctured_columns - 2) * Zc - removed_bit); printf("number of undecoded bits: %d\n", (Kb + nrows - no_punctured_columns - 2) * Zc - removed_bit);
encoder_implemparams_t impp = {.Zc = Zc, .Kb = Kb, .E = block_length, .BG = BG, .Kr = block_length, .K = block_length}; encoder_implemparams_t impp = {.Zc = Zc, .Kb = Kb, .BG = BG, .Kr = block_length, .K = block_length};
impp.gen_code = 2; impp.gen_code = 2;
if (ntrials==0) if (ntrials==0)
......
...@@ -3,13 +3,8 @@ ...@@ -3,13 +3,8 @@
*/ */
/*!\file nrLDPC_decoder_offload.c /*!\file nrLDPC_decoder_offload.c
* \brief Defines the LDPC decoder
* \author openairinterface
* \date 12-06-2021
* \version 1.0
* \note: based on testbbdev test_bbdev_perf.c functions. Harq buffer offset added. * \note: based on testbbdev test_bbdev_perf.c functions. Harq buffer offset added.
* \mbuf and mempool allocated at the init step, LDPC parameters updated from OAI. * \mbuf and mempool allocated at the init step, LDPC parameters updated from OAI.
* \warning
*/ */
#include <stdint.h> #include <stdint.h>
...@@ -55,17 +50,12 @@ ...@@ -55,17 +50,12 @@
// this socket is the NUMA socket, so the hardware CPU id (numa is complex) // this socket is the NUMA socket, so the hardware CPU id (numa is complex)
#define GET_SOCKET(socket_id) (((socket_id) == SOCKET_ID_ANY) ? 0 : (socket_id)) #define GET_SOCKET(socket_id) (((socket_id) == SOCKET_ID_ANY) ? 0 : (socket_id))
#define MAX_QUEUES 32
#define MAX_QUEUES 16
#define OPS_CACHE_SIZE 256U #define OPS_CACHE_SIZE 256U
#define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */ #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */
#define SYNC_WAIT 0 #define SYNC_WAIT 0
#define SYNC_START 1 #define SYNC_START 1
#define INVALID_OPAQUE -1
#define TIME_OUT_POLL 1e8 #define TIME_OUT_POLL 1e8
#define INVALID_QUEUE_ID -1
/* Increment for next code block in external HARQ memory */ /* Increment for next code block in external HARQ memory */
#define HARQ_INCR 32768 #define HARQ_INCR 32768
/* Headroom for filler LLRs insertion in HARQ buffer */ /* Headroom for filler LLRs insertion in HARQ buffer */
...@@ -102,7 +92,7 @@ struct active_device { ...@@ -102,7 +92,7 @@ struct active_device {
static int nb_active_devs; static int nb_active_devs;
/* Data buffers used by BBDEV ops */ /* Data buffers used by BBDEV ops */
struct test_buffers { struct data_buffers {
struct rte_bbdev_op_data *inputs; struct rte_bbdev_op_data *inputs;
struct rte_bbdev_op_data *hard_outputs; struct rte_bbdev_op_data *hard_outputs;
struct rte_bbdev_op_data *soft_outputs; struct rte_bbdev_op_data *soft_outputs;
...@@ -121,7 +111,7 @@ struct test_op_params { ...@@ -121,7 +111,7 @@ struct test_op_params {
uint16_t num_lcores; uint16_t num_lcores;
int vector_mask; int vector_mask;
rte_atomic16_t sync; rte_atomic16_t sync;
struct test_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES]; struct data_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES];
}; };
/* Contains per lcore params */ /* Contains per lcore params */
...@@ -129,16 +119,9 @@ struct thread_params { ...@@ -129,16 +119,9 @@ struct thread_params {
uint8_t dev_id; uint8_t dev_id;
uint16_t queue_id; uint16_t queue_id;
uint32_t lcore_id; uint32_t lcore_id;
uint64_t start_time;
double ops_per_sec;
double mbps;
uint8_t iter_count;
double iter_average;
double bler;
struct nrLDPCoffload_params *p_offloadParams; struct nrLDPCoffload_params *p_offloadParams;
uint8_t iter_count;
uint8_t *p_out; uint8_t *p_out;
uint8_t r;
uint8_t harq_pid;
uint8_t ulsch_id; uint8_t ulsch_id;
rte_atomic16_t nb_dequeued; rte_atomic16_t nb_dequeued;
rte_atomic16_t processing_status; rte_atomic16_t processing_status;
...@@ -176,11 +159,11 @@ optimal_mempool_size(unsigned int val) ...@@ -176,11 +159,11 @@ optimal_mempool_size(unsigned int val)
return rte_align32pow2(val + 1) - 1; return rte_align32pow2(val + 1) - 1;
} }
// DPDK BBDEV modified - sizes passed to the function, use of data_len and nb_segments, remove code related to Soft outputs, HARQ // based on DPDK BBDEV create_mempools
// inputs, HARQ outputs
static int create_mempools(struct active_device *ad, int socket_id, uint16_t num_ops, int out_buff_sz, int in_max_sz) static int create_mempools(struct active_device *ad, int socket_id, uint16_t num_ops, int out_buff_sz, int in_max_sz)
{ {
unsigned int ops_pool_size, mbuf_pool_size, data_room_size = 0; unsigned int ops_pool_size, mbuf_pool_size, data_room_size = 0;
num_ops = 1;
uint8_t nb_segments = 1; uint8_t nb_segments = 1;
ops_pool_size = optimal_mempool_size(RTE_MAX( ops_pool_size = optimal_mempool_size(RTE_MAX(
/* Ops used plus 1 reference op */ /* Ops used plus 1 reference op */
...@@ -289,8 +272,7 @@ const char *ldpcdec_flag_bitmask[] = { ...@@ -289,8 +272,7 @@ const char *ldpcdec_flag_bitmask[] = {
"RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS", "RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS",
}; };
// DPDK BBDEV modified - in DPDK this function is named add_bbdev_dev, removed code for RTE_BASEBAND_ACC100, IMO we can also remove // based on DPDK BBDEV add_bbdev_dev
// RTE_LIBRTE_PMD_BBDEV_FPGA_LTE_FEC and RTE_LIBRTE_PMD_BBDEV_FPGA_5GNR_FEC - to be checked
static int add_dev(uint8_t dev_id, struct rte_bbdev_info *info) static int add_dev(uint8_t dev_id, struct rte_bbdev_info *info)
{ {
int ret; int ret;
...@@ -346,7 +328,6 @@ static int add_dev(uint8_t dev_id, struct rte_bbdev_info *info) ...@@ -346,7 +328,6 @@ static int add_dev(uint8_t dev_id, struct rte_bbdev_info *info)
if (ret == 0) { if (ret == 0) {
printf("Found LDCP encoding queue (id=%d) at prio%u on dev%u\n", queue_id, qconf.priority, dev_id); printf("Found LDCP encoding queue (id=%d) at prio%u on dev%u\n", queue_id, qconf.priority, dev_id);
qconf.priority++; qconf.priority++;
//ret = rte_bbdev_queue_configure(ad->dev_id, queue_id, &qconf);
ad->enc_queue = queue_id; ad->enc_queue = queue_id;
ad->queue_ids[queue_id] = queue_id; ad->queue_ids[queue_id] = queue_id;
break; break;
...@@ -361,7 +342,6 @@ static int add_dev(uint8_t dev_id, struct rte_bbdev_info *info) ...@@ -361,7 +342,6 @@ static int add_dev(uint8_t dev_id, struct rte_bbdev_info *info)
if (ret == 0) { if (ret == 0) {
printf("Found LDCP decoding queue (id=%d) at prio%u on dev%u\n", queue_id, qconf.priority, dev_id); printf("Found LDCP decoding queue (id=%d) at prio%u on dev%u\n", queue_id, qconf.priority, dev_id);
qconf.priority++; qconf.priority++;
//ret = rte_bbdev_queue_configure(ad->dev_id, queue_id, &qconf);
ad->dec_queue = queue_id; ad->dec_queue = queue_id;
ad->queue_ids[queue_id] = queue_id; ad->queue_ids[queue_id] = queue_id;
break; break;
...@@ -372,28 +352,27 @@ static int add_dev(uint8_t dev_id, struct rte_bbdev_info *info) ...@@ -372,28 +352,27 @@ static int add_dev(uint8_t dev_id, struct rte_bbdev_info *info)
return TEST_SUCCESS; return TEST_SUCCESS;
} }
// DPDK BBDEV modified - nb_segments used, we are not using struct op_data_entries *ref_entries, but struct rte_mbuf *m_head, // based on DPDK BBDEV init_op_data_objs
// rte_pktmbuf_reset(m_head) added? if ((op_type == DATA_INPUT) || (op_type == DATA_HARQ_INPUT)) -> no code in else? static int init_op_data_objs_dec(struct rte_bbdev_op_data *bufs,
static int init_op_data_objs(struct rte_bbdev_op_data *bufs, uint8_t *input,
uint8_t *input, t_nrLDPCoffload_params *offloadParams,
uint32_t data_len, struct rte_mempool *mbuf_pool,
struct rte_mbuf *m_head, const uint16_t n,
struct rte_mempool *mbuf_pool, enum op_data_type op_type,
const uint16_t n, uint16_t min_alignment)
enum op_data_type op_type,
uint16_t min_alignment)
{ {
int ret;
unsigned int i, j;
bool large_input = false; bool large_input = false;
uint8_t nb_segments = 1; for (int i = 0; i < n; ++i) {
for (i = 0; i < n; ++i) { uint32_t data_len = offloadParams->perCB[i].E_cb;
char *data; char *data;
struct rte_mbuf *m_head = rte_pktmbuf_alloc(mbuf_pool);
TEST_ASSERT_NOT_NULL(m_head,
"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
op_type,
n,
mbuf_pool->size);
if (data_len > RTE_BBDEV_LDPC_E_MAX_MBUF) { if (data_len > RTE_BBDEV_LDPC_E_MAX_MBUF) {
/*
* Special case when DPDK mbuf cannot handle
* the required input size
*/
printf("Warning: Larger input size than DPDK mbuf %u\n", data_len); printf("Warning: Larger input size than DPDK mbuf %u\n", data_len);
large_input = true; large_input = true;
} }
...@@ -406,67 +385,79 @@ static int init_op_data_objs(struct rte_bbdev_op_data *bufs, ...@@ -406,67 +385,79 @@ static int init_op_data_objs(struct rte_bbdev_op_data *bufs,
/* Allocate a fake overused mbuf */ /* Allocate a fake overused mbuf */
data = rte_malloc(NULL, data_len, 0); data = rte_malloc(NULL, data_len, 0);
TEST_ASSERT_NOT_NULL(data, "rte malloc failed with %u bytes", data_len); TEST_ASSERT_NOT_NULL(data, "rte malloc failed with %u bytes", data_len);
memcpy(data, input, data_len); memcpy(data, &input[i * LDPC_MAX_CB_SIZE], data_len);
m_head->buf_addr = data; m_head->buf_addr = data;
m_head->buf_iova = rte_malloc_virt2iova(data); m_head->buf_iova = rte_malloc_virt2iova(data);
m_head->data_off = 0; m_head->data_off = 0;
m_head->data_len = data_len; m_head->data_len = data_len;
} else { } else {
// rte_pktmbuf_reset added
rte_pktmbuf_reset(m_head); rte_pktmbuf_reset(m_head);
data = rte_pktmbuf_append(m_head, data_len); data = rte_pktmbuf_append(m_head, data_len);
TEST_ASSERT_NOT_NULL(data, "Couldn't append %u bytes to mbuf from %d data type mbuf pool", data_len, op_type); TEST_ASSERT_NOT_NULL(data, "Couldn't append %u bytes to mbuf from %d data type mbuf pool", data_len, op_type);
TEST_ASSERT(data == RTE_PTR_ALIGN(data, min_alignment), TEST_ASSERT(data == RTE_PTR_ALIGN(data, min_alignment),
"Data addr in mbuf (%p) is not aligned to device min alignment (%u)", "Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
data, data,
min_alignment); min_alignment);
rte_memcpy(data, input, data_len); rte_memcpy(data, &input[i * LDPC_MAX_CB_SIZE], data_len);
} }
bufs[i].length += data_len; bufs[i].length += data_len;
}
}
return 0;
}
for (j = 1; j < nb_segments; ++j) { // based on DPDK BBDEV init_op_data_objs
struct rte_mbuf *m_tail = rte_pktmbuf_alloc(mbuf_pool); static int init_op_data_objs_enc(struct rte_bbdev_op_data *bufs,
TEST_ASSERT_NOT_NULL(m_tail, uint8_t **input_enc,
"Not enough mbufs in %d data type mbuf pool (needed %d, available %u)", t_nrLDPCoffload_params *offloadParams,
op_type, struct rte_mbuf *m_head,
n * nb_segments, struct rte_mempool *mbuf_pool,
mbuf_pool->size); const uint16_t n,
enum op_data_type op_type,
uint16_t min_alignment)
{
bool large_input = false;
for (int i = 0; i < n; ++i) {
uint32_t data_len = offloadParams->Kr;
char *data;
struct rte_mbuf *m_head = rte_pktmbuf_alloc(mbuf_pool);
TEST_ASSERT_NOT_NULL(m_head,
"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
op_type,
n,
mbuf_pool->size);
data = rte_pktmbuf_append(m_tail, data_len); if (data_len > RTE_BBDEV_LDPC_E_MAX_MBUF) {
TEST_ASSERT_NOT_NULL(data, "Couldn't append %u bytes to mbuf from %d data type mbuf pool", data_len, op_type); printf("Warning: Larger input size than DPDK mbuf %u\n", data_len);
large_input = true;
}
bufs[i].data = m_head;
bufs[i].offset = 0;
bufs[i].length = 0;
if ((op_type == DATA_INPUT) || (op_type == DATA_HARQ_INPUT)) {
if ((op_type == DATA_INPUT) && large_input) {
/* Allocate a fake overused mbuf */
data = rte_malloc(NULL, data_len, 0);
TEST_ASSERT_NOT_NULL(data, "rte malloc failed with %u bytes", data_len);
memcpy(data, &input_enc[0], data_len);
m_head->buf_addr = data;
m_head->buf_iova = rte_malloc_virt2iova(data);
m_head->data_off = 0;
m_head->data_len = data_len;
} else {
rte_pktmbuf_reset(m_head);
data = rte_pktmbuf_append(m_head, data_len);
TEST_ASSERT_NOT_NULL(data, "Couldn't append %u bytes to mbuf from %d data type mbuf pool", data_len, op_type);
TEST_ASSERT(data == RTE_PTR_ALIGN(data, min_alignment), TEST_ASSERT(data == RTE_PTR_ALIGN(data, min_alignment),
"Data addr in mbuf (%p) is not aligned to device min alignment (%u)", "Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
data, data,
min_alignment); min_alignment);
rte_memcpy(data, input, data_len); rte_memcpy(data, input_enc[i], data_len);
bufs[i].length += data_len;
ret = rte_pktmbuf_chain(m_head, m_tail);
TEST_ASSERT_SUCCESS(ret, "Couldn't chain mbufs from %d data type mbuf pool", op_type);
} }
} else { bufs[i].length += data_len;
/* allocate chained-mbuf for output buffer */
/*for (j = 1; j < nb_segments; ++j) {
struct rte_mbuf *m_tail =
rte_pktmbuf_alloc(mbuf_pool);
TEST_ASSERT_NOT_NULL(m_tail,
"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
op_type,
n * nb_segments,
mbuf_pool->size);
ret = rte_pktmbuf_chain(m_head, m_tail);
TEST_ASSERT_SUCCESS(ret,
"Couldn't chain mbufs from %d data type mbuf pool",
op_type);
}*/
} }
} }
return 0; return 0;
} }
...@@ -501,7 +492,7 @@ free_buffers(struct active_device *ad, struct test_op_params *op_params) ...@@ -501,7 +492,7 @@ free_buffers(struct active_device *ad, struct test_op_params *op_params)
rte_mempool_free(ad->harq_in_mbuf_pool); rte_mempool_free(ad->harq_in_mbuf_pool);
rte_mempool_free(ad->harq_out_mbuf_pool); rte_mempool_free(ad->harq_out_mbuf_pool);
for (int i = 0; i < rte_lcore_count(); ++i) { for (int i = 2; i < rte_lcore_count(); ++i) {
for (int j = 0; j < RTE_MAX_NUMA_NODES; ++j) { for (int j = 0; j < RTE_MAX_NUMA_NODES; ++j) {
rte_free(op_params->q_bufs[j][i].inputs); rte_free(op_params->q_bufs[j][i].inputs);
rte_free(op_params->q_bufs[j][i].hard_outputs); rte_free(op_params->q_bufs[j][i].hard_outputs);
...@@ -512,27 +503,22 @@ free_buffers(struct active_device *ad, struct test_op_params *op_params) ...@@ -512,27 +503,22 @@ free_buffers(struct active_device *ad, struct test_op_params *op_params)
} }
} }
// OAI / DPDK BBDEV modified - in DPDK named copy_reference_dec_op, here we are passing t_nrLDPCoffload_params *p_offloadParams to // based on DPDK BBDEV copy_reference_ldpc_dec_op
// the function, check what is value of n, commented code for code block mode static void set_ldpc_dec_op(struct rte_bbdev_dec_op **ops,
static void unsigned int start_idx,
set_ldpc_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n, struct data_buffers *bufs,
unsigned int start_idx, uint8_t ulsch_id,
struct test_buffers *bufs, t_nrLDPCoffload_params *p_offloadParams)
struct rte_bbdev_dec_op *ref_op,
uint8_t r,
uint8_t harq_pid,
uint8_t ulsch_id,
t_nrLDPCoffload_params *p_offloadParams)
{ {
unsigned int i; unsigned int i;
for (i = 0; i < n; ++i) { for (i = 0; i < p_offloadParams->C; ++i) {
ops[i]->ldpc_dec.cb_params.e = p_offloadParams->E; ops[i]->ldpc_dec.cb_params.e = p_offloadParams->perCB[i].E_cb;
ops[i]->ldpc_dec.basegraph = p_offloadParams->BG; ops[i]->ldpc_dec.basegraph = p_offloadParams->BG;
ops[i]->ldpc_dec.z_c = p_offloadParams->Z; ops[i]->ldpc_dec.z_c = p_offloadParams->Z;
ops[i]->ldpc_dec.q_m = p_offloadParams->Qm; ops[i]->ldpc_dec.q_m = p_offloadParams->Qm;
ops[i]->ldpc_dec.n_filler = p_offloadParams->F; ops[i]->ldpc_dec.n_filler = p_offloadParams->F;
ops[i]->ldpc_dec.n_cb = p_offloadParams->n_cb; ops[i]->ldpc_dec.n_cb = p_offloadParams->n_cb;
ops[i]->ldpc_dec.iter_max = 20; ops[i]->ldpc_dec.iter_max = p_offloadParams->numMaxIter;
ops[i]->ldpc_dec.rv_index = p_offloadParams->rv; ops[i]->ldpc_dec.rv_index = p_offloadParams->rv;
ops[i]->ldpc_dec.op_flags = RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE | ops[i]->ldpc_dec.op_flags = RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE |
RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE | RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE |
...@@ -541,10 +527,13 @@ set_ldpc_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n, ...@@ -541,10 +527,13 @@ set_ldpc_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
if (p_offloadParams->setCombIn) { if (p_offloadParams->setCombIn) {
ops[i]->ldpc_dec.op_flags |= RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE; ops[i]->ldpc_dec.op_flags |= RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE;
} }
LOG_D(PHY,"ULSCH %02d HARQPID %02d R %02d COMBIN %d RV %d NCB %05d \n", ulsch_id, harq_pid, r, p_offloadParams->setCombIn, p_offloadParams->rv, p_offloadParams->n_cb); if (p_offloadParams->C > 1) {
ops[i]->ldpc_dec.code_block_mode = 1; // ldpc_dec->code_block_mode; ops[i]->ldpc_dec.op_flags |= RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP;
ops[i]->ldpc_dec.harq_combined_input.offset = ulsch_id * 64 * LDPC_MAX_CB_SIZE + r * LDPC_MAX_CB_SIZE; ops[i]->ldpc_dec.op_flags |= RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK;
ops[i]->ldpc_dec.harq_combined_output.offset = ulsch_id * 64 * LDPC_MAX_CB_SIZE + r * LDPC_MAX_CB_SIZE; }
ops[i]->ldpc_dec.code_block_mode = 1;
ops[i]->ldpc_dec.harq_combined_input.offset = ulsch_id * NR_LDPC_MAX_NUM_CB * LDPC_MAX_CB_SIZE + i * LDPC_MAX_CB_SIZE;
ops[i]->ldpc_dec.harq_combined_output.offset = ulsch_id * NR_LDPC_MAX_NUM_CB * LDPC_MAX_CB_SIZE + i * LDPC_MAX_CB_SIZE;
if (bufs->hard_outputs != NULL) if (bufs->hard_outputs != NULL)
ops[i]->ldpc_dec.hard_output = bufs->hard_outputs[start_idx + i]; ops[i]->ldpc_dec.hard_output = bufs->hard_outputs[start_idx + i];
if (bufs->inputs != NULL) if (bufs->inputs != NULL)
...@@ -558,17 +547,15 @@ set_ldpc_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n, ...@@ -558,17 +547,15 @@ set_ldpc_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
} }
} }
// based on DPDK BBDEV copy_reference_ldpc_enc_op
static void set_ldpc_enc_op(struct rte_bbdev_enc_op **ops, static void set_ldpc_enc_op(struct rte_bbdev_enc_op **ops,
unsigned int n,
unsigned int start_idx, unsigned int start_idx,
struct rte_bbdev_op_data *inputs, struct rte_bbdev_op_data *inputs,
struct rte_bbdev_op_data *outputs, struct rte_bbdev_op_data *outputs,
struct rte_bbdev_enc_op *ref_op,
t_nrLDPCoffload_params *p_offloadParams) t_nrLDPCoffload_params *p_offloadParams)
{ {
// struct rte_bbdev_op_ldpc_enc *ldpc_enc = &ref_op->ldpc_enc; for (int i = 0; i < p_offloadParams->C; ++i) {
for (int i = 0; i < n; ++i) { ops[i]->ldpc_enc.cb_params.e = p_offloadParams->perCB[i].E_cb;
ops[i]->ldpc_enc.cb_params.e = p_offloadParams->E;
ops[i]->ldpc_enc.basegraph = p_offloadParams->BG; ops[i]->ldpc_enc.basegraph = p_offloadParams->BG;
ops[i]->ldpc_enc.z_c = p_offloadParams->Z; ops[i]->ldpc_enc.z_c = p_offloadParams->Z;
ops[i]->ldpc_enc.q_m = p_offloadParams->Qm; ops[i]->ldpc_enc.q_m = p_offloadParams->Qm;
...@@ -582,47 +569,52 @@ static void set_ldpc_enc_op(struct rte_bbdev_enc_op **ops, ...@@ -582,47 +569,52 @@ static void set_ldpc_enc_op(struct rte_bbdev_enc_op **ops,
} }
} }
// DPDK BBDEV modified - in DPDK called validate_dec_op, int8_t* p_out added, remove code related to op_data_entries, turbo_dec static int retrieve_ldpc_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n, const int vector_mask, uint8_t *p_out)
// replaced by ldpc_dec, removed coderelated to soft_output, memcpy(p_out, data+m->data_off, data_len)
static int retrieve_ldpc_dec_op(struct rte_bbdev_dec_op **ops,
const uint16_t n,
struct rte_bbdev_dec_op *ref_op,
const int vector_mask,
uint8_t *p_out)
{ {
struct rte_bbdev_op_data *hard_output; struct rte_bbdev_op_data *hard_output;
uint16_t data_len = 0;
struct rte_mbuf *m; struct rte_mbuf *m;
unsigned int i; unsigned int i;
char *data; char *data;
int offset = 0;
for (i = 0; i < n; ++i) { for (i = 0; i < n; ++i) {
hard_output = &ops[i]->ldpc_dec.hard_output; hard_output = &ops[i]->ldpc_dec.hard_output;
m = hard_output->data; m = hard_output->data;
uint16_t data_len = rte_pktmbuf_data_len(m) - hard_output->offset; data_len = rte_pktmbuf_data_len(m) - hard_output->offset;
data = m->buf_addr; data = m->buf_addr;
memcpy(p_out, data + m->data_off, data_len); memcpy(&p_out[offset], data + m->data_off, data_len);
offset += data_len;
rte_pktmbuf_free(ops[i]->ldpc_dec.hard_output.data);
rte_pktmbuf_free(ops[i]->ldpc_dec.input.data);
} }
return 0; return 0;
} }
static int retrieve_ldpc_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n, struct rte_bbdev_enc_op *ref_op, uint8_t *p_out) static int retrieve_ldpc_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n, uint8_t *p_out, nrLDPC_params_per_cb_t *perCB)
{ {
struct rte_bbdev_op_data *output; struct rte_bbdev_op_data *output;
struct rte_mbuf *m; struct rte_mbuf *m;
unsigned int i; unsigned int i;
char *data; char *data;
uint8_t *out;
int offset = 0;
for (i = 0; i < n; ++i) { for (i = 0; i < n; ++i) {
output = &ops[i]->ldpc_enc.output; output = &ops[i]->ldpc_enc.output;
m = output->data; m = output->data;
uint16_t data_len = min((LDPC_MAX_CB_SIZE) / 8, rte_pktmbuf_data_len(m)); // fix me uint16_t data_len = rte_pktmbuf_data_len(m) - output->offset;
out = &p_out[offset];
data = m->buf_addr; data = m->buf_addr;
for (int byte = 0; byte < data_len; byte++) for (int byte = 0; byte < data_len; byte++)
for (int bit = 0; bit < 8; bit++) for (int bit = 0; bit < 8; bit++)
p_out[byte * 8 + bit] = (data[m->data_off + byte] >> (7 - bit)) & 1; out[byte * 8 + bit] = (data[m->data_off + byte] >> (7 - bit)) & 1;
offset += perCB[i].E_cb;
rte_pktmbuf_free(ops[i]->ldpc_enc.output.data);
rte_pktmbuf_free(ops[i]->ldpc_enc.input.data);
} }
return 0; return 0;
} }
// DPDK BBDEV copy // based on DPDK BBDEV init_test_op_params
static int init_test_op_params(struct test_op_params *op_params, static int init_test_op_params(struct test_op_params *op_params,
enum rte_bbdev_op_type op_type, enum rte_bbdev_op_type op_type,
struct rte_mempool *ops_mp, struct rte_mempool *ops_mp,
...@@ -632,10 +624,10 @@ static int init_test_op_params(struct test_op_params *op_params, ...@@ -632,10 +624,10 @@ static int init_test_op_params(struct test_op_params *op_params,
{ {
int ret = 0; int ret = 0;
if (op_type == RTE_BBDEV_OP_LDPC_DEC) { if (op_type == RTE_BBDEV_OP_LDPC_DEC) {
ret = rte_bbdev_dec_op_alloc_bulk(ops_mp, &op_params->ref_dec_op, 1); ret = rte_bbdev_dec_op_alloc_bulk(ops_mp, &op_params->ref_dec_op, num_to_process);
op_params->mp_dec = ops_mp; op_params->mp_dec = ops_mp;
} else { } else {
ret = rte_bbdev_enc_op_alloc_bulk(ops_mp, &op_params->ref_enc_op, 1); ret = rte_bbdev_enc_op_alloc_bulk(ops_mp, &op_params->ref_enc_op, num_to_process);
op_params->mp_enc = ops_mp; op_params->mp_enc = ops_mp;
} }
...@@ -647,179 +639,125 @@ static int init_test_op_params(struct test_op_params *op_params, ...@@ -647,179 +639,125 @@ static int init_test_op_params(struct test_op_params *op_params,
return 0; return 0;
} }
// DPDK BBDEV modified - in DPDK called throughput_pmd_lcore_ldpc_dec, code related to extDdr removed // based on DPDK BBDEV throughput_pmd_lcore_ldpc_dec
static int static int
pmd_lcore_ldpc_dec(void *arg) pmd_lcore_ldpc_dec(void *arg)
{ {
struct thread_params *tp = arg; struct thread_params *tp = arg;
uint16_t enq, deq; uint16_t enq, deq;
int time_out = 0;
const uint16_t queue_id = tp->queue_id; const uint16_t queue_id = tp->queue_id;
const uint16_t burst_sz = tp->op_params->burst_sz; const uint16_t num_segments = tp->p_offloadParams->C;
const uint16_t num_ops = tp->op_params->num_to_process; struct rte_bbdev_dec_op *ops_enq[num_segments];
struct rte_bbdev_dec_op *ops_enq[num_ops]; struct rte_bbdev_dec_op *ops_deq[num_segments];
struct rte_bbdev_dec_op *ops_deq[num_ops];
struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
uint8_t r = tp->r;
uint8_t harq_pid = tp->harq_pid;
uint8_t ulsch_id = tp->ulsch_id; uint8_t ulsch_id = tp->ulsch_id;
struct test_buffers *bufs = NULL; struct data_buffers *bufs = NULL;
int i, j, ret; int i, ret;
struct rte_bbdev_info info; struct rte_bbdev_info info;
uint16_t num_to_enq; uint16_t num_to_enq;
uint8_t *p_out = tp->p_out; uint8_t *p_out = tp->p_out;
t_nrLDPCoffload_params *p_offloadParams = tp->p_offloadParams; t_nrLDPCoffload_params *p_offloadParams = tp->p_offloadParams;
TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), "BURST_SIZE should be <= %u", MAX_BURST); TEST_ASSERT_SUCCESS((num_segments > MAX_BURST), "BURST_SIZE should be <= %u", MAX_BURST);
rte_bbdev_info_get(tp->dev_id, &info); rte_bbdev_info_get(tp->dev_id, &info);
bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
rte_pause(); rte_pause();
ret = rte_mempool_get_bulk(tp->op_params->mp_dec, (void **)ops_enq, num_ops);
// looks like a bbdev internal error for the free operation, workaround here ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp_dec, ops_enq, num_segments);
ops_enq[0]->mempool = tp->op_params->mp_dec; TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_segments);
// ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops); set_ldpc_dec_op(ops_enq, 0, bufs, ulsch_id, p_offloadParams);
TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
for (enq = 0, deq = 0; enq < num_segments;) {
set_ldpc_dec_op(ops_enq, num_to_enq = num_segments;
num_ops, if (unlikely(num_segments - enq < num_to_enq))
0, num_to_enq = num_segments - enq;
bufs,
ref_op,
r,
harq_pid,
ulsch_id,
p_offloadParams);
/* Set counter to validate the ordering */
for (j = 0; j < num_ops; ++j)
ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
for (enq = 0, deq = 0; enq < num_ops;) {
num_to_enq = burst_sz;
if (unlikely(num_ops - enq < num_to_enq))
num_to_enq = num_ops - enq;
enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id, queue_id, &ops_enq[enq], num_to_enq); enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id, queue_id, &ops_enq[enq], num_to_enq);
deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id, queue_id, &ops_deq[deq], enq - deq); deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id, queue_id, &ops_deq[deq], enq - deq);
} }
/* dequeue the remaining */ /* dequeue the remaining */
int time_out = 0;
while (deq < enq) { while (deq < enq) {
deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id, queue_id, &ops_deq[deq], enq - deq); deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id, queue_id, &ops_deq[deq], enq - deq);
time_out++; time_out++;
DevAssert(time_out <= TIME_OUT_POLL); DevAssert(time_out <= TIME_OUT_POLL);
} }
// This if statement is not in DPDK
if (deq == enq) { if (deq == enq) {
tp->iter_count = 0; tp->iter_count = 0;
/* get the max of iter_count for all dequeued ops */ /* get the max of iter_count for all dequeued ops */
for (i = 0; i < num_ops; ++i) { for (i = 0; i < num_segments; ++i) {
uint8_t *status = tp->p_offloadParams->perCB[i].p_status_cb;
tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count, tp->iter_count); tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count, tp->iter_count);
*status = ops_enq[i]->status;
} }
ret = retrieve_ldpc_dec_op(ops_deq, num_ops, ref_op, tp->op_params->vector_mask, p_out); ret = retrieve_ldpc_dec_op(ops_deq, num_segments, tp->op_params->vector_mask, p_out);
TEST_ASSERT_SUCCESS(ret, "Validation failed!"); TEST_ASSERT_SUCCESS(ret, "LDPC offload decoder failed!");
} }
if (num_ops > 0) rte_bbdev_dec_op_free_bulk(ops_enq, num_segments);
rte_mempool_put_bulk(ops_enq[0]->mempool, (void **)ops_enq, num_ops); // Return the max number of iterations accross all segments
// Return the worst decoding number of iterations for all segments
return tp->iter_count; return tp->iter_count;
} }
// DPDK BBDEV copy - in DPDK called throughput_pmd_lcore_ldpc_enc // based on DPDK BBDEV throughput_pmd_lcore_ldpc_enc
static int pmd_lcore_ldpc_enc(void *arg) static int pmd_lcore_ldpc_enc(void *arg)
{ {
struct thread_params *tp = arg; struct thread_params *tp = arg;
uint16_t enq, deq; uint16_t enq, deq;
int time_out = 0;
const uint16_t queue_id = tp->queue_id; const uint16_t queue_id = tp->queue_id;
const uint16_t burst_sz = tp->op_params->burst_sz; const uint16_t num_segments = tp->p_offloadParams->C;
const uint16_t num_ops = tp->op_params->num_to_process; tp->op_params->num_to_process = num_segments;
struct rte_bbdev_enc_op *ops_enq[num_ops]; struct rte_bbdev_enc_op *ops_enq[num_segments];
struct rte_bbdev_enc_op *ops_deq[num_ops]; struct rte_bbdev_enc_op *ops_deq[num_segments];
struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op; struct rte_bbdev_info info;
int j, ret; int ret;
struct data_buffers *bufs = NULL;
uint16_t num_to_enq; uint16_t num_to_enq;
uint8_t *p_out = tp->p_out; uint8_t *p_out = tp->p_out;
t_nrLDPCoffload_params *p_offloadParams = tp->p_offloadParams; t_nrLDPCoffload_params *p_offloadParams = tp->p_offloadParams;
TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), "BURST_SIZE should be <= %u", MAX_BURST); TEST_ASSERT_SUCCESS((num_segments > MAX_BURST), "BURST_SIZE should be <= %u", MAX_BURST);
struct rte_bbdev_info info;
rte_bbdev_info_get(tp->dev_id, &info); rte_bbdev_info_get(tp->dev_id, &info);
bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim), "NUM_OPS cannot exceed %u for this device", info.drv.queue_size_lim);
struct test_buffers *bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
rte_pause(); rte_pause();
ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp_enc, ops_enq, num_segments);
ret = rte_mempool_get_bulk(tp->op_params->mp_enc, (void **)ops_enq, num_ops); TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_segments);
// ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops); set_ldpc_enc_op(ops_enq, 0, bufs->inputs, bufs->hard_outputs, p_offloadParams);
ops_enq[0]->mempool = tp->op_params->mp_enc; for (enq = 0, deq = 0; enq < num_segments;) {
num_to_enq = num_segments;
set_ldpc_enc_op(ops_enq, num_ops, 0, bufs->inputs, bufs->hard_outputs, ref_op, p_offloadParams); if (unlikely(num_segments - enq < num_to_enq))
num_to_enq = num_segments - enq;
/* Set counter to validate the ordering */
for (j = 0; j < num_ops; ++j)
ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
for (j = 0; j < num_ops; ++j)
mbuf_reset(ops_enq[j]->ldpc_enc.output.data);
for (enq = 0, deq = 0; enq < num_ops;) {
num_to_enq = burst_sz;
if (unlikely(num_ops - enq < num_to_enq))
num_to_enq = num_ops - enq;
enq += rte_bbdev_enqueue_ldpc_enc_ops(tp->dev_id, queue_id, &ops_enq[enq], num_to_enq); enq += rte_bbdev_enqueue_ldpc_enc_ops(tp->dev_id, queue_id, &ops_enq[enq], num_to_enq);
deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id, queue_id, &ops_deq[deq], enq - deq); deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id, queue_id, &ops_deq[deq], enq - deq);
} }
/* dequeue the remaining */ /* dequeue the remaining */
int time_out = 0;
while (deq < enq) { while (deq < enq) {
deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id, queue_id, &ops_deq[deq], enq - deq); deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id, queue_id, &ops_deq[deq], enq - deq);
time_out++; time_out++;
DevAssert(time_out <= TIME_OUT_POLL); DevAssert(time_out <= TIME_OUT_POLL);
} }
ret = retrieve_ldpc_enc_op(ops_deq, num_ops, ref_op, p_out); ret = retrieve_ldpc_enc_op(ops_deq, num_segments, p_out, tp->p_offloadParams->perCB);
TEST_ASSERT_SUCCESS(ret, "Validation failed!"); TEST_ASSERT_SUCCESS(ret, "Validation failed!");
// rte_bbdev_enc_op_free_bulk(ops_enq, num_ops); rte_bbdev_enc_op_free_bulk(ops_enq, num_segments);
if (num_ops > 0)
rte_mempool_put_bulk(ops_enq[0]->mempool, (void **)ops_enq, num_ops);
return ret; return ret;
} }
// based on DPDK BBDEV throughput_pmd_lcore_dec
/*
* Test function that determines how long an enqueue + dequeue of a burst
* takes on available lcores.
*/
// OAI / DPDK BBDEV modified - in DPDK called throughput_test, here we pass more parameters to the function (t_nrLDPCoffload_params
// *p_offloadParams, uint8_t r, ...), many commented lines Removed code which specified which function to use based on the op_type,
// now we are using only pmd_lcore_ldpc_dec for RTE_BBDEV_OP_LDPC_DEC op type. Encoder is RTE_BBDEV_OP_LDPC_ENC op type,
// pmd_lcore_ldpc_enc to be implemented.
int start_pmd_dec(struct active_device *ad, int start_pmd_dec(struct active_device *ad,
struct test_op_params *op_params, struct test_op_params *op_params,
t_nrLDPCoffload_params *p_offloadParams, t_nrLDPCoffload_params *p_offloadParams,
uint8_t r,
uint8_t harq_pid,
uint8_t ulsch_id, uint8_t ulsch_id,
uint8_t *p_out) uint8_t *p_out)
{ {
int ret; int ret;
unsigned int lcore_id, used_cores = 0; unsigned int lcore_id, used_cores = 0;
// struct rte_bbdev_info info;
uint16_t num_lcores; uint16_t num_lcores;
// rte_bbdev_info_get(ad->dev_id, &info);
/* Set number of lcores */ /* Set number of lcores */
num_lcores = (ad->nb_queues < (op_params->num_lcores)) ? ad->nb_queues : op_params->num_lcores; num_lcores = (ad->nb_queues < (op_params->num_lcores)) ? ad->nb_queues : op_params->num_lcores;
/* Allocate memory for thread parameters structure */ /* Allocate memory for thread parameters structure */
...@@ -828,20 +766,16 @@ int start_pmd_dec(struct active_device *ad, ...@@ -828,20 +766,16 @@ int start_pmd_dec(struct active_device *ad,
"Failed to alloc %zuB for t_params", "Failed to alloc %zuB for t_params",
RTE_ALIGN(sizeof(struct thread_params) * num_lcores, RTE_CACHE_LINE_SIZE)); RTE_ALIGN(sizeof(struct thread_params) * num_lcores, RTE_CACHE_LINE_SIZE));
rte_atomic16_set(&op_params->sync, SYNC_WAIT); rte_atomic16_set(&op_params->sync, SYNC_WAIT);
/* Master core is set at first entry */ /* Master core is set at first entry */
t_params[0].dev_id = ad->dev_id; t_params[0].dev_id = ad->dev_id;
t_params[0].lcore_id = 15; t_params[0].lcore_id = 15;
t_params[0].op_params = op_params; t_params[0].op_params = op_params;
t_params[0].queue_id = ad->dec_queue; t_params[0].queue_id = ad->dec_queue;
used_cores++;
t_params[0].iter_count = 0; t_params[0].iter_count = 0;
t_params[0].p_out = p_out; t_params[0].p_out = p_out;
t_params[0].p_offloadParams = p_offloadParams; t_params[0].p_offloadParams = p_offloadParams;
t_params[0].r = r;
t_params[0].harq_pid = harq_pid;
t_params[0].ulsch_id = ulsch_id; t_params[0].ulsch_id = ulsch_id;
used_cores++;
// For now, we never enter here, we don't use the DPDK thread pool // For now, we never enter here, we don't use the DPDK thread pool
RTE_LCORE_FOREACH_WORKER(lcore_id) { RTE_LCORE_FOREACH_WORKER(lcore_id) {
if (used_cores >= num_lcores) if (used_cores >= num_lcores)
...@@ -853,58 +787,38 @@ int start_pmd_dec(struct active_device *ad, ...@@ -853,58 +787,38 @@ int start_pmd_dec(struct active_device *ad,
t_params[used_cores].iter_count = 0; t_params[used_cores].iter_count = 0;
t_params[used_cores].p_out = p_out; t_params[used_cores].p_out = p_out;
t_params[used_cores].p_offloadParams = p_offloadParams; t_params[used_cores].p_offloadParams = p_offloadParams;
t_params[used_cores].r = r;
t_params[used_cores].harq_pid = harq_pid;
t_params[used_cores].ulsch_id = ulsch_id; t_params[used_cores].ulsch_id = ulsch_id;
rte_eal_remote_launch(pmd_lcore_ldpc_dec, &t_params[used_cores++], lcore_id); rte_eal_remote_launch(pmd_lcore_ldpc_dec, &t_params[used_cores++], lcore_id);
} }
rte_atomic16_set(&op_params->sync, SYNC_START); rte_atomic16_set(&op_params->sync, SYNC_START);
ret = pmd_lcore_ldpc_dec(&t_params[0]); ret = pmd_lcore_ldpc_dec(&t_params[0]);
/* Master core is always used */ /* Master core is always used */
// for (used_cores = 1; used_cores < num_lcores; used_cores++) // for (used_cores = 1; used_cores < num_lcores; used_cores++)
// ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id); // ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
/* Return if test failed */
if (ret < 0) {
rte_free(t_params);
return ret;
}
rte_free(t_params); rte_free(t_params);
return ret; return ret;
} }
// based on DPDK BBDEV throughput_pmd_lcore_enc
int32_t start_pmd_enc(struct active_device *ad, int32_t start_pmd_enc(struct active_device *ad,
struct test_op_params *op_params, struct test_op_params *op_params,
t_nrLDPCoffload_params *p_offloadParams, t_nrLDPCoffload_params *p_offloadParams,
uint8_t *p_out) uint8_t *p_out)
{ {
int ret;
unsigned int lcore_id, used_cores = 0; unsigned int lcore_id, used_cores = 0;
uint16_t num_lcores; uint16_t num_lcores;
int ret;
num_lcores = (ad->nb_queues < (op_params->num_lcores)) ? ad->nb_queues : op_params->num_lcores; num_lcores = (ad->nb_queues < (op_params->num_lcores)) ? ad->nb_queues : op_params->num_lcores;
/* Allocate memory for thread parameters structure */
struct thread_params *t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params), RTE_CACHE_LINE_SIZE); struct thread_params *t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params), RTE_CACHE_LINE_SIZE);
TEST_ASSERT_NOT_NULL(t_params,
"Failed to alloc %zuB for t_params",
RTE_ALIGN(sizeof(struct thread_params) * num_lcores, RTE_CACHE_LINE_SIZE));
rte_atomic16_set(&op_params->sync, SYNC_WAIT); rte_atomic16_set(&op_params->sync, SYNC_WAIT);
/* Master core is set at first entry */
t_params[0].dev_id = ad->dev_id; t_params[0].dev_id = ad->dev_id;
t_params[0].lcore_id = 14; t_params[0].lcore_id = 14;
t_params[0].op_params = op_params; t_params[0].op_params = op_params;
// t_params[0].queue_id = ad->queue_ids[used_cores++];
used_cores++;
t_params[0].queue_id = ad->enc_queue; t_params[0].queue_id = ad->enc_queue;
t_params[0].iter_count = 0; t_params[0].iter_count = 0;
t_params[0].p_out = p_out; t_params[0].p_out = p_out;
t_params[0].p_offloadParams = p_offloadParams; t_params[0].p_offloadParams = p_offloadParams;
used_cores++;
// For now, we never enter here, we don't use the DPDK thread pool // For now, we never enter here, we don't use the DPDK thread pool
RTE_LCORE_FOREACH_WORKER(lcore_id) { RTE_LCORE_FOREACH_WORKER(lcore_id) {
if (used_cores >= num_lcores) if (used_cores >= num_lcores)
...@@ -916,14 +830,8 @@ int32_t start_pmd_enc(struct active_device *ad, ...@@ -916,14 +830,8 @@ int32_t start_pmd_enc(struct active_device *ad,
t_params[used_cores].iter_count = 0; t_params[used_cores].iter_count = 0;
rte_eal_remote_launch(pmd_lcore_ldpc_enc, &t_params[used_cores++], lcore_id); rte_eal_remote_launch(pmd_lcore_ldpc_enc, &t_params[used_cores++], lcore_id);
} }
rte_atomic16_set(&op_params->sync, SYNC_START); rte_atomic16_set(&op_params->sync, SYNC_START);
ret = pmd_lcore_ldpc_enc(&t_params[0]); ret = pmd_lcore_ldpc_enc(&t_params[0]);
if (ret) {
rte_free(t_params);
return ret;
}
rte_free(t_params); rte_free(t_params);
return ret; return ret;
} }
...@@ -944,7 +852,7 @@ int32_t LDPCinit() ...@@ -944,7 +852,7 @@ int32_t LDPCinit()
char *dpdk_dev = "d8:00.0"; //PCI address of the card char *dpdk_dev = "d8:00.0"; //PCI address of the card
char *argv_re[] = {"bbdev", "-a", dpdk_dev, "-l", "14-15", "--file-prefix=b6", "--"}; char *argv_re[] = {"bbdev", "-a", dpdk_dev, "-l", "14-15", "--file-prefix=b6", "--"};
// EAL initialization, if already initialized (init in xran lib) try to probe DPDK device // EAL initialization, if already initialized (init in xran lib) try to probe DPDK device
ret = rte_eal_init(5, argv_re); ret = rte_eal_init(sizeofArray(argv_re), argv_re);
if (ret < 0) { if (ret < 0) {
printf("EAL initialization failed, probing DPDK device %s\n", dpdk_dev); printf("EAL initialization failed, probing DPDK device %s\n", dpdk_dev);
if (rte_dev_probe(dpdk_dev) != 0) { if (rte_dev_probe(dpdk_dev) != 0) {
...@@ -969,50 +877,18 @@ int32_t LDPCinit() ...@@ -969,50 +877,18 @@ int32_t LDPCinit()
int socket_id = GET_SOCKET(info.socket_id); int socket_id = GET_SOCKET(info.socket_id);
int out_max_sz = 8448; // max code block size (for BG1), 22 * 384 int out_max_sz = 8448; // max code block size (for BG1), 22 * 384
int in_max_sz = LDPC_MAX_CB_SIZE; // max number of encoded bits (for BG2 and MCS0) int in_max_sz = LDPC_MAX_CB_SIZE; // max number of encoded bits (for BG2 and MCS0)
int num_ops = 1; int num_queues = 1;
int f_ret = create_mempools(ad, socket_id, num_ops, out_max_sz, in_max_sz); int f_ret = create_mempools(ad, socket_id, num_queues, out_max_sz, in_max_sz);
if (f_ret != TEST_SUCCESS) { if (f_ret != TEST_SUCCESS) {
printf("Couldn't create mempools"); printf("Couldn't create mempools");
return -1; return -1;
} }
// get_num_lcores() hardcoded to 1: we use one core for decode, and another for encode f_ret = init_test_op_params(op_params, RTE_BBDEV_OP_LDPC_DEC, ad->bbdev_dec_op_pool, num_queues, num_queues, 1);
// this code from bbdev test example is not considering encode and decode test f_ret = init_test_op_params(op_params, RTE_BBDEV_OP_LDPC_ENC, ad->bbdev_enc_op_pool, num_queues, num_queues, 1);
// get_num_ops() replaced by 1: LDPC decode and ldpc encode (7th param)
f_ret = init_test_op_params(op_params, RTE_BBDEV_OP_LDPC_DEC, ad->bbdev_dec_op_pool, 1, 1, 1);
f_ret |= init_test_op_params(op_params, RTE_BBDEV_OP_LDPC_ENC, ad->bbdev_enc_op_pool, 1, 1, 1);
if (f_ret != TEST_SUCCESS) { if (f_ret != TEST_SUCCESS) {
printf("Couldn't init test op params"); printf("Couldn't init test op params");
return -1; return -1;
} }
// fill_queue_buffers -> allocate_buffers_on_socket
for (int i = 0; i < ad->nb_queues; ++i) {
const uint16_t n = op_params->num_to_process;
struct rte_mempool *in_mp = ad->in_mbuf_pool;
struct rte_mempool *hard_out_mp = ad->hard_out_mbuf_pool;
struct rte_mempool *soft_out_mp = ad->soft_out_mbuf_pool;
struct rte_mempool *harq_in_mp = ad->harq_in_mbuf_pool;
struct rte_mempool *harq_out_mp = ad->harq_out_mbuf_pool;
struct rte_mempool *mbuf_pools[DATA_NUM_TYPES] = {in_mp, soft_out_mp, hard_out_mp, harq_in_mp, harq_out_mp};
uint8_t queue_id = ad->queue_ids[i];
struct rte_bbdev_op_data **queue_ops[DATA_NUM_TYPES] = {&op_params->q_bufs[socket_id][queue_id].inputs,
&op_params->q_bufs[socket_id][queue_id].soft_outputs,
&op_params->q_bufs[socket_id][queue_id].hard_outputs,
&op_params->q_bufs[socket_id][queue_id].harq_inputs,
&op_params->q_bufs[socket_id][queue_id].harq_outputs};
for (enum op_data_type type = DATA_INPUT; type < 3; type += 2) {
int ret = allocate_buffers_on_socket(queue_ops[type], n * sizeof(struct rte_bbdev_op_data), socket_id);
TEST_ASSERT_SUCCESS(ret, "Couldn't allocate memory for rte_bbdev_op_data structs");
m_head[type] = rte_pktmbuf_alloc(mbuf_pools[type]);
TEST_ASSERT_NOT_NULL(m_head[type],
"Not enough mbufs in %d data type mbuf pool (needed %d, available %u)",
type,
1,
mbuf_pools[type]->size);
}
}
return 0; return 0;
} }
...@@ -1023,7 +899,6 @@ int32_t LDPCshutdown() ...@@ -1023,7 +899,6 @@ int32_t LDPCshutdown()
struct rte_bbdev_stats stats; struct rte_bbdev_stats stats;
free_buffers(ad, op_params); free_buffers(ad, op_params);
rte_free(op_params); rte_free(op_params);
// Stop and close bbdev
rte_bbdev_stats_get(dev_id, &stats); rte_bbdev_stats_get(dev_id, &stats);
rte_bbdev_stop(dev_id); rte_bbdev_stop(dev_id);
rte_bbdev_close(dev_id); rte_bbdev_close(dev_id);
...@@ -1043,17 +918,23 @@ int32_t LDPCdecoder(struct nrLDPC_dec_params *p_decParams, ...@@ -1043,17 +918,23 @@ int32_t LDPCdecoder(struct nrLDPC_dec_params *p_decParams,
{ {
pthread_mutex_lock(&decode_mutex); pthread_mutex_lock(&decode_mutex);
// hardcoded we use first device // hardcoded we use first device
struct active_device *ad = active_devs; struct active_device *ad = active_devs;
t_nrLDPCoffload_params offloadParams = {.E = p_decParams->E, t_nrLDPCoffload_params offloadParams = {.n_cb = (p_decParams->BG == 1) ? (66 * p_decParams->Z) : (50 * p_decParams->Z),
.n_cb = (p_decParams->BG == 1) ? (66 * p_decParams->Z) : (50 * p_decParams->Z),
.BG = p_decParams->BG, .BG = p_decParams->BG,
.Z = p_decParams->Z, .Z = p_decParams->Z,
.rv = p_decParams->rv, .rv = p_decParams->rv,
.F = p_decParams->F, .F = p_decParams->F,
.Qm = p_decParams->Qm, .Qm = p_decParams->Qm,
.numMaxIter = p_decParams->numMaxIter,
.C = C,
.setCombIn = p_decParams->setCombIn}; .setCombIn = p_decParams->setCombIn};
for (int r = 0; r < C; r++) {
offloadParams.perCB[r].E_cb = p_decParams->perCB[r].E_cb;
offloadParams.perCB[r].p_status_cb = &(p_decParams->perCB[r].status_cb);
}
struct rte_bbdev_info info; struct rte_bbdev_info info;
int ret;
rte_bbdev_info_get(ad->dev_id, &info); rte_bbdev_info_get(ad->dev_id, &info);
int socket_id = GET_SOCKET(info.socket_id); int socket_id = GET_SOCKET(info.socket_id);
// fill_queue_buffers -> init_op_data_objs // fill_queue_buffers -> init_op_data_objs
...@@ -1069,24 +950,23 @@ int32_t LDPCdecoder(struct nrLDPC_dec_params *p_decParams, ...@@ -1069,24 +950,23 @@ int32_t LDPCdecoder(struct nrLDPC_dec_params *p_decParams,
&op_params->q_bufs[socket_id][queue_id].hard_outputs, &op_params->q_bufs[socket_id][queue_id].hard_outputs,
&op_params->q_bufs[socket_id][queue_id].harq_inputs, &op_params->q_bufs[socket_id][queue_id].harq_inputs,
&op_params->q_bufs[socket_id][queue_id].harq_outputs}; &op_params->q_bufs[socket_id][queue_id].harq_outputs};
// this should be modified
// enum rte_bbdev_op_type op_type = RTE_BBDEV_OP_LDPC_DEC;
for (enum op_data_type type = DATA_INPUT; type < 3; type += 2) { for (enum op_data_type type = DATA_INPUT; type < 3; type += 2) {
int ret = init_op_data_objs(*queue_ops[type], ret = allocate_buffers_on_socket(queue_ops[type], C * sizeof(struct rte_bbdev_op_data), socket_id);
TEST_ASSERT_SUCCESS(ret, "Couldn't allocate memory for rte_bbdev_op_data structs");
ret = init_op_data_objs_dec(*queue_ops[type],
(uint8_t *)p_llr, (uint8_t *)p_llr,
p_decParams->E, &offloadParams,
m_head[type],
mbuf_pools[type], mbuf_pools[type],
1, C,
type, type,
info.drv.min_alignment); info.drv.min_alignment);
TEST_ASSERT_SUCCESS(ret, "Couldn't init rte_bbdev_op_data structs"); TEST_ASSERT_SUCCESS(ret, "Couldn't init rte_bbdev_op_data structs");
} }
int ret = start_pmd_dec(ad, op_params, &offloadParams, C, harq_pid, ulsch_id, (uint8_t *)p_out); ret = start_pmd_dec(ad, op_params, &offloadParams, ulsch_id, (uint8_t *)p_out);
if (ret < 0) { if (ret < 0) {
printf("Couldn't start pmd dec\n"); printf("Couldn't start pmd dec\n");
pthread_mutex_unlock(&decode_mutex); pthread_mutex_unlock(&decode_mutex);
return (20); // Fix me: we should propoagate max_iterations properly in the call (impp struct) return (p_decParams->numMaxIter);
} }
pthread_mutex_unlock(&decode_mutex); pthread_mutex_unlock(&decode_mutex);
return ret; return ret;
...@@ -1097,16 +977,23 @@ int32_t LDPCencoder(unsigned char **input, unsigned char **output, encoder_imple ...@@ -1097,16 +977,23 @@ int32_t LDPCencoder(unsigned char **input, unsigned char **output, encoder_imple
pthread_mutex_lock(&encode_mutex); pthread_mutex_lock(&encode_mutex);
// hardcoded to use the first found board // hardcoded to use the first found board
struct active_device *ad = active_devs; struct active_device *ad = active_devs;
int Zc = impp->Zc; int ret;
int BG = impp->BG; uint32_t Nref = 0;
t_nrLDPCoffload_params offloadParams = {.E = impp->E, t_nrLDPCoffload_params offloadParams = {.n_cb = (impp->BG == 1) ? (66 * impp->Zc) : (50 * impp->Zc),
.n_cb = (BG == 1) ? (66 * Zc) : (50 * Zc), .BG = impp->BG,
.BG = BG, .Z = impp->Zc,
.Z = Zc,
.rv = impp->rv, .rv = impp->rv,
.F = impp->F, .F = impp->F,
.Qm = impp->Qm, .Qm = impp->Qm,
.C = impp->n_segments,
.Kr = (impp->K - impp->F + 7) / 8}; .Kr = (impp->K - impp->F + 7) / 8};
for (int r = 0; r < impp->n_segments; r++) {
offloadParams.perCB[r].E_cb = impp->perCB[r].E_cb;
}
if (impp->Tbslbrm != 0) {
Nref = 3 * impp->Tbslbrm / (2 * impp->n_segments);
offloadParams.n_cb = min(offloadParams.n_cb, Nref);
}
struct rte_bbdev_info info; struct rte_bbdev_info info;
rte_bbdev_info_get(ad->dev_id, &info); rte_bbdev_info_get(ad->dev_id, &info);
int socket_id = GET_SOCKET(info.socket_id); int socket_id = GET_SOCKET(info.socket_id);
...@@ -1124,17 +1011,19 @@ int32_t LDPCencoder(unsigned char **input, unsigned char **output, encoder_imple ...@@ -1124,17 +1011,19 @@ int32_t LDPCencoder(unsigned char **input, unsigned char **output, encoder_imple
&op_params->q_bufs[socket_id][queue_id].harq_inputs, &op_params->q_bufs[socket_id][queue_id].harq_inputs,
&op_params->q_bufs[socket_id][queue_id].harq_outputs}; &op_params->q_bufs[socket_id][queue_id].harq_outputs};
for (enum op_data_type type = DATA_INPUT; type < 3; type += 2) { for (enum op_data_type type = DATA_INPUT; type < 3; type += 2) {
int ret = init_op_data_objs(*queue_ops[type], ret = allocate_buffers_on_socket(queue_ops[type], impp->n_segments * sizeof(struct rte_bbdev_op_data), socket_id);
*input, TEST_ASSERT_SUCCESS(ret, "Couldn't allocate memory for rte_bbdev_op_data structs");
offloadParams.Kr, ret = init_op_data_objs_enc(*queue_ops[type],
input,
&offloadParams,
m_head[type], m_head[type],
mbuf_pools[type], mbuf_pools[type],
1, impp->n_segments,
type, type,
info.drv.min_alignment); info.drv.min_alignment);
TEST_ASSERT_SUCCESS(ret, "Couldn't init rte_bbdev_op_data structs"); TEST_ASSERT_SUCCESS(ret, "Couldn't init rte_bbdev_op_data structs");
} }
int ret=start_pmd_enc(ad, op_params, &offloadParams, *output); ret = start_pmd_enc(ad, op_params, &offloadParams, *output);
pthread_mutex_unlock(&encode_mutex); pthread_mutex_unlock(&encode_mutex);
return ret; return ret;
} }
...@@ -78,6 +78,15 @@ typedef enum nrLDPC_outMode { ...@@ -78,6 +78,15 @@ typedef enum nrLDPC_outMode {
nrLDPC_outMode_LLRINT8 /**< Single LLR value per int8_t output */ nrLDPC_outMode_LLRINT8 /**< Single LLR value per int8_t output */
} e_nrLDPC_outMode; } e_nrLDPC_outMode;
/**
Structure containing LDPC parameters per CB
*/
typedef struct nrLDPC_params_per_cb {
uint32_t E_cb;
uint8_t status_cb;
uint8_t* p_status_cb;
} nrLDPC_params_per_cb_t;
/** /**
Structure containing LDPC decoder parameters. Structure containing LDPC decoder parameters.
*/ */
...@@ -94,18 +103,24 @@ typedef struct nrLDPC_dec_params { ...@@ -94,18 +103,24 @@ typedef struct nrLDPC_dec_params {
int crc_type; int crc_type;
int (*check_crc)(uint8_t* decoded_bytes, uint32_t n, uint8_t crc_type); int (*check_crc)(uint8_t* decoded_bytes, uint32_t n, uint8_t crc_type);
uint8_t setCombIn; uint8_t setCombIn;
nrLDPC_params_per_cb_t perCB[NR_LDPC_MAX_NUM_CB];
} t_nrLDPC_dec_params; } t_nrLDPC_dec_params;
/**
Structure containing LDPC offload parameters.
*/
typedef struct nrLDPCoffload_params { typedef struct nrLDPCoffload_params {
uint8_t BG; /**< Base graph */ uint8_t BG; /**< Base graph */
uint16_t Z; uint16_t Z;
uint16_t Kr; uint16_t Kr;
uint8_t rv; uint8_t rv;
uint32_t E;
uint16_t n_cb; uint16_t n_cb;
uint16_t F; /**< Filler bits */ uint16_t F; /**< Filler bits */
uint8_t Qm; /**< Modulation */ uint8_t Qm; /**< Modulation */
uint8_t C;
uint8_t numMaxIter;
uint8_t setCombIn; uint8_t setCombIn;
nrLDPC_params_per_cb_t perCB[NR_LDPC_MAX_NUM_CB];
} t_nrLDPCoffload_params; } t_nrLDPCoffload_params;
/** /**
......
...@@ -99,6 +99,8 @@ ...@@ -99,6 +99,8 @@
/** Maximum number of possible input LLR = NR_LDPC_NCOL_BG1*NR_LDPC_ZMAX */ /** Maximum number of possible input LLR = NR_LDPC_NCOL_BG1*NR_LDPC_ZMAX */
#define NR_LDPC_MAX_NUM_LLR 27000 #define NR_LDPC_MAX_NUM_LLR 27000
#define NR_LDPC_MAX_NUM_CB 72
// ============================================================================== // ==============================================================================
// GLOBAL CONSTANT VARIABLES // GLOBAL CONSTANT VARIABLES
......
...@@ -59,8 +59,9 @@ typedef struct { ...@@ -59,8 +59,9 @@ typedef struct {
uint32_t F; uint32_t F;
/// Modulation order /// Modulation order
uint8_t Qm; uint8_t Qm;
uint32_t E; uint32_t Tbslbrm;
unsigned int G; unsigned int G;
nrLDPC_params_per_cb_t perCB[NR_LDPC_MAX_NUM_CB];
// Redundancy version index // Redundancy version index
uint8_t rv; uint8_t rv;
} encoder_implemparams_t; } encoder_implemparams_t;
......
...@@ -137,6 +137,9 @@ int phy_init_nr_gNB(PHY_VARS_gNB *gNB) ...@@ -137,6 +137,9 @@ int phy_init_nr_gNB(PHY_VARS_gNB *gNB)
if (gNB->ldpc_offload_flag) if (gNB->ldpc_offload_flag)
load_LDPClib("_t2", &ldpc_interface_offload); load_LDPClib("_t2", &ldpc_interface_offload);
else
load_LDPClib(NULL, &ldpc_interface);
gNB->max_nb_pdsch = MAX_MOBILES_PER_GNB; gNB->max_nb_pdsch = MAX_MOBILES_PER_GNB;
init_delay_table(fp->ofdm_symbol_size, MAX_DELAY_COMP, NR_MAX_OFDM_SYMBOL_SIZE, fp->delay_table); init_delay_table(fp->ofdm_symbol_size, MAX_DELAY_COMP, NR_MAX_OFDM_SYMBOL_SIZE, fp->delay_table);
......
...@@ -360,6 +360,7 @@ int nr_dlsch_encoding(PHY_VARS_gNB *gNB, ...@@ -360,6 +360,7 @@ int nr_dlsch_encoding(PHY_VARS_gNB *gNB,
impp.harq = harq; impp.harq = harq;
if (gNB->ldpc_offload_flag) { if (gNB->ldpc_offload_flag) {
impp.Qm = rel15->qamModOrder[0]; impp.Qm = rel15->qamModOrder[0];
impp.Tbslbrm = rel15->maintenance_parms_v3.tbSizeLbrmBytes;
impp.rv = rel15->rvIndex[0]; impp.rv = rel15->rvIndex[0];
int nb_re_dmrs = int nb_re_dmrs =
(rel15->dmrsConfigType == NFAPI_NR_DMRS_TYPE1) ? (6 * rel15->numDmrsCdmGrpsNoData) : (4 * rel15->numDmrsCdmGrpsNoData); (rel15->dmrsConfigType == NFAPI_NR_DMRS_TYPE1) ? (6 * rel15->numDmrsCdmGrpsNoData) : (4 * rel15->numDmrsCdmGrpsNoData);
...@@ -370,13 +371,10 @@ int nr_dlsch_encoding(PHY_VARS_gNB *gNB, ...@@ -370,13 +371,10 @@ int nr_dlsch_encoding(PHY_VARS_gNB *gNB,
harq->unav_res, harq->unav_res,
rel15->qamModOrder[0], rel15->qamModOrder[0],
rel15->nrOfLayers); rel15->nrOfLayers);
int r_offset = 0;
for (int r = 0; r < impp.n_segments; r++) { for (int r = 0; r < impp.n_segments; r++) {
impp.E = nr_get_E(impp.G, impp.n_segments, impp.Qm, rel15->nrOfLayers, r); impp.perCB[r].E_cb = nr_get_E(impp.G, impp.n_segments, impp.Qm, rel15->nrOfLayers, r);
uint8_t *f = impp.output + r_offset;
ldpc_interface_offload.LDPCencoder(&harq->c[r], &f, &impp);
r_offset += impp.E;
} }
ldpc_interface_offload.LDPCencoder(harq->c, &impp.output, &impp);
} else { } else {
notifiedFIFO_t nf; notifiedFIFO_t nf;
initNotifiedFIFO(&nf); initNotifiedFIFO(&nf);
......
...@@ -232,64 +232,59 @@ int decode_offload(PHY_VARS_gNB *phy_vars_gNB, ...@@ -232,64 +232,59 @@ int decode_offload(PHY_VARS_gNB *phy_vars_gNB,
{ {
NR_gNB_ULSCH_t *ulsch = &phy_vars_gNB->ulsch[ULSCH_id]; NR_gNB_ULSCH_t *ulsch = &phy_vars_gNB->ulsch[ULSCH_id];
NR_UL_gNB_HARQ_t *harq_process = ulsch->harq_process; NR_UL_gNB_HARQ_t *harq_process = ulsch->harq_process;
int16_t z_ol[LDPC_MAX_CB_SIZE] __attribute__((aligned(16))); int16_t z_ol[NR_LDPC_MAX_NUM_CB * LDPC_MAX_CB_SIZE] __attribute__((aligned(16)));
int8_t l_ol[LDPC_MAX_CB_SIZE] __attribute__((aligned(16))); int8_t l_ol[NR_LDPC_MAX_NUM_CB * LDPC_MAX_CB_SIZE] __attribute__((aligned(16)));
uint8_t Qm = pusch_pdu->qam_mod_order; const int kc = decParams->BG == 2 ? 52 : 68;
uint8_t n_layers = pusch_pdu->nrOfLayers; uint32_t A = (harq_process->TBS) << 3;
const int Kr = harq_process->K; const int Kr = harq_process->K;
const int Kr_bytes = Kr >> 3; const int Kr_bytes = Kr >> 3;
uint32_t A = (harq_process->TBS) << 3; int8_t decodeIterations = 0;
const int kc = decParams->BG == 2 ? 52 : 68; int r_offset = 0;
ulsch->max_ldpc_iterations = 20; int offset = 0;
int decodeIterations = 2; // new data received, set processedSegments to 0
int r_offset = 0, offset = 0; if (!decParams->setCombIn)
harq_process->processedSegments = 0;
for (int r = 0; r < harq_process->C; r++) { for (int r = 0; r < harq_process->C; r++) {
int E = nr_get_E(G, harq_process->C, Qm, n_layers, r); decParams->perCB[r].E_cb = nr_get_E(G, harq_process->C, decParams->Qm, pusch_pdu->nrOfLayers, r);
memset(harq_process->c[r], 0, Kr_bytes); memcpy(&z_ol[offset], ulsch_llr + r_offset, decParams->perCB[r].E_cb * sizeof(*z_ol));
decParams->R = nr_get_R_ldpc_decoder(pusch_pdu->pusch_data.rv_index, simde__m128i *pv_ol128 = (simde__m128i *)&z_ol[offset];
E, simde__m128i *pl_ol128 = (simde__m128i *)&l_ol[offset];
decParams->BG,
decParams->Z,
&harq_process->llrLen,
harq_process->round);
memcpy(z_ol, ulsch_llr + r_offset, E * sizeof(short));
simde__m128i *pv_ol128 = (simde__m128i *)&z_ol;
simde__m128i *pl_ol128 = (simde__m128i *)&l_ol;
for (int i = 0, j = 0; j < ((kc * harq_process->Z) >> 4) + 1; i += 2, j++) { for (int i = 0, j = 0; j < ((kc * harq_process->Z) >> 4) + 1; i += 2, j++) {
pl_ol128[j] = simde_mm_packs_epi16(pv_ol128[i], pv_ol128[i + 1]); pl_ol128[j] = simde_mm_packs_epi16(pv_ol128[i], pv_ol128[i + 1]);
} }
decParams->E = E;
decParams->rv = pusch_pdu->pusch_data.rv_index;
decParams->F = harq_process->F; decParams->F = harq_process->F;
decParams->Qm = Qm; r_offset += decParams->perCB[r].E_cb;
decodeIterations = offset += LDPC_MAX_CB_SIZE;
ldpc_interface_offload }
.LDPCdecoder(decParams, harq_pid, ULSCH_id, r, (int8_t *)&pl_ol128[0], (int8_t *)harq_process->c[r], NULL, NULL);
if (decodeIterations < 0) { int8_t *p_outDec = calloc(harq_process->C * Kr_bytes, sizeof(int8_t));
LOG_E(PHY, "ulsch_decoding.c: Problem in LDPC decoder offload\n"); decodeIterations =
return -1; ldpc_interface_offload.LDPCdecoder(decParams, harq_pid, ULSCH_id, harq_process->C, (int8_t *)l_ol, p_outDec, NULL, NULL);
}
bool decodeSuccess = check_crc((uint8_t *)harq_process->c[r], lenWithCrc(harq_process->C, A), crcType(harq_process->C, A)); if (decodeIterations < 0) {
if (decodeSuccess) { LOG_E(PHY, "ulsch_decoding.c: Problem in LDPC decoder offload\n");
memcpy(harq_process->b + offset, harq_process->c[r], Kr_bytes - (harq_process->F >> 3) - ((harq_process->C > 1) ? 3 : 0)); return -1;
offset += (Kr_bytes - (harq_process->F >> 3) - ((harq_process->C > 1) ? 3 : 0)); }
int offset_b = 0;
for (int r = 0; r < harq_process->C; r++) {
if (decParams->perCB[r].status_cb == 0 || harq_process->C == 1) {
memcpy(harq_process->b + offset_b, &p_outDec[offset_b], Kr_bytes - (harq_process->F >> 3) - ((harq_process->C > 1) ? 3 : 0));
harq_process->processedSegments++; harq_process->processedSegments++;
} else {
LOG_D(PHY, "uplink segment error %d/%d\n", r, harq_process->C);
LOG_D(PHY, "ULSCH %d in error\n", ULSCH_id);
} }
r_offset += E; offset_b += (Kr_bytes - (harq_process->F >> 3) - ((harq_process->C > 1) ? 3 : 0));
} }
bool crc_valid = false; bool crc_valid = false;
// CRC check made by the T2, no need to perform CRC check for a single code block twice
if (harq_process->processedSegments == harq_process->C) { if (harq_process->processedSegments == harq_process->C) {
// When the number of code blocks is 1 (C = 1) and ulsch_harq->processedSegments = 1, we can assume a good TB because of the crc_valid = check_crc(harq_process->b, lenWithCrc(1, A), crcType(1, A));
// CRC check made by the LDPC for early termination, so, no need to perform CRC check twice for a single code block if (harq_process->C == 1 && !crc_valid) {
crc_valid = true; harq_process->processedSegments--;
if (harq_process->C > 1) {
crc_valid = check_crc(harq_process->b, lenWithCrc(1, A), crcType(1, A));
} }
} }
if (crc_valid) { if (crc_valid) {
LOG_D(PHY, "ULSCH: Setting ACK for slot %d TBS %d\n", ulsch->slot, harq_process->TBS); LOG_D(PHY, "ULSCH: Setting ACK for slot %d TBS %d\n", ulsch->slot, harq_process->TBS);
nr_fill_indication(phy_vars_gNB, ulsch->frame, ulsch->slot, ULSCH_id, harq_pid, 0, 0); nr_fill_indication(phy_vars_gNB, ulsch->frame, ulsch->slot, ULSCH_id, harq_pid, 0, 0);
...@@ -312,6 +307,7 @@ int decode_offload(PHY_VARS_gNB *phy_vars_gNB, ...@@ -312,6 +307,7 @@ int decode_offload(PHY_VARS_gNB *phy_vars_gNB,
} }
ulsch->last_iteration_cnt = decodeIterations; ulsch->last_iteration_cnt = decodeIterations;
free(p_outDec);
VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_PHY_gNB_ULSCH_DECODING,0); VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_PHY_gNB_ULSCH_DECODING,0);
return 0; return 0;
} }
...@@ -349,7 +345,6 @@ int nr_ulsch_decoding(PHY_VARS_gNB *phy_vars_gNB, ...@@ -349,7 +345,6 @@ int nr_ulsch_decoding(PHY_VARS_gNB *phy_vars_gNB,
const uint8_t n_layers = pusch_pdu->nrOfLayers; const uint8_t n_layers = pusch_pdu->nrOfLayers;
// ------------------------------------------------------------------ // ------------------------------------------------------------------
harq_process->processedSegments = 0;
harq_process->TBS = pusch_pdu->pusch_data.tb_size; harq_process->TBS = pusch_pdu->pusch_data.tb_size;
t_nrLDPC_dec_params decParams = {.check_crc = check_crc}; t_nrLDPC_dec_params decParams = {.check_crc = check_crc};
...@@ -418,6 +413,8 @@ int nr_ulsch_decoding(PHY_VARS_gNB *phy_vars_gNB, ...@@ -418,6 +413,8 @@ int nr_ulsch_decoding(PHY_VARS_gNB *phy_vars_gNB,
decParams.Z = harq_process->Z; decParams.Z = harq_process->Z;
decParams.numMaxIter = ulsch->max_ldpc_iterations; decParams.numMaxIter = ulsch->max_ldpc_iterations;
decParams.Qm = Qm;
decParams.rv = pusch_pdu->pusch_data.rv_index;
decParams.outMode = 0; decParams.outMode = 0;
decParams.setCombIn = !harq_process->harq_to_be_cleared; decParams.setCombIn = !harq_process->harq_to_be_cleared;
if (harq_process->harq_to_be_cleared) { if (harq_process->harq_to_be_cleared) {
...@@ -428,7 +425,7 @@ int nr_ulsch_decoding(PHY_VARS_gNB *phy_vars_gNB, ...@@ -428,7 +425,7 @@ int nr_ulsch_decoding(PHY_VARS_gNB *phy_vars_gNB,
if (phy_vars_gNB->ldpc_offload_flag) if (phy_vars_gNB->ldpc_offload_flag)
return decode_offload(phy_vars_gNB, ULSCH_id, ulsch_llr, pusch_pdu, &decParams, harq_pid, G); return decode_offload(phy_vars_gNB, ULSCH_id, ulsch_llr, pusch_pdu, &decParams, harq_pid, G);
harq_process->processedSegments = 0;
uint32_t offset = 0, r_offset = 0; uint32_t offset = 0, r_offset = 0;
set_abort(&harq_process->abort_decode, false); set_abort(&harq_process->abort_decode, false);
for (int r = 0; r < harq_process->C; r++) { for (int r = 0; r < harq_process->C; r++) {
......
...@@ -151,16 +151,13 @@ int nr_ulsch_encoding(PHY_VARS_NR_UE *ue, ...@@ -151,16 +151,13 @@ int nr_ulsch_encoding(PHY_VARS_NR_UE *ue,
start_meas(&ue->ulsch_ldpc_encoding_stats); start_meas(&ue->ulsch_ldpc_encoding_stats);
if (ldpc_interface_offload.LDPCencoder) { if (ldpc_interface_offload.LDPCencoder) {
for (int j = 0; j < impp.n_segments; j++) { for (int j = 0; j < impp.n_segments; j++) {
impp.E = nr_get_E(G, impp.n_segments, impp.Qm, ulsch->pusch_pdu.nrOfLayers, j); impp.perCB[j].E_cb = nr_get_E(G, impp.n_segments, impp.Qm, ulsch->pusch_pdu.nrOfLayers, j);
uint8_t *f = harq_process->f + r_offset;
ldpc_interface_offload.LDPCencoder(&harq_process->c[j], &f, &impp);
r_offset += impp.E;
} }
ldpc_interface_offload.LDPCencoder(harq_process->c, &harq_process->f, &impp);
} else { } else {
if (ulsch->pusch_pdu.pusch_data.new_data_indicator) { if (ulsch->pusch_pdu.pusch_data.new_data_indicator) {
for (int j = 0; j < (impp.n_segments / 8 + 1); j++) { for (int j = 0; j < (impp.n_segments / 8 + 1); j++) {
impp.macro_num = j; impp.macro_num = j;
impp.E = nr_get_E(G, impp.n_segments, impp.Qm, ulsch->pusch_pdu.nrOfLayers, j);
impp.Kr = impp.K; impp.Kr = impp.K;
ldpc_interface.LDPCencoder(harq_process->c, harq_process->d, &impp); ldpc_interface.LDPCencoder(harq_process->c, harq_process->d, &impp);
} }
...@@ -191,7 +188,7 @@ int nr_ulsch_encoding(PHY_VARS_NR_UE *ue, ...@@ -191,7 +188,7 @@ int nr_ulsch_encoding(PHY_VARS_NR_UE *ue,
ulsch->pusch_pdu.pusch_data.rv_index); ulsch->pusch_pdu.pusch_data.rv_index);
///////////////////////// d---->| Rate matching bit selection |---->e ///////////////////////// ///////////////////////// d---->| Rate matching bit selection |---->e /////////////////////////
impp.E = nr_get_E(G, impp.n_segments, impp.Qm, ulsch->pusch_pdu.nrOfLayers, r); impp.perCB[r].E_cb = nr_get_E(G, impp.n_segments, impp.Qm, ulsch->pusch_pdu.nrOfLayers, r);
VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_NR_RATE_MATCHING_LDPC, VCD_FUNCTION_IN); VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_NR_RATE_MATCHING_LDPC, VCD_FUNCTION_IN);
start_meas(&ue->ulsch_rate_matching_stats); start_meas(&ue->ulsch_rate_matching_stats);
...@@ -204,7 +201,8 @@ int nr_ulsch_encoding(PHY_VARS_NR_UE *ue, ...@@ -204,7 +201,8 @@ int nr_ulsch_encoding(PHY_VARS_NR_UE *ue,
impp.F, impp.F,
impp.Kr - impp.F - 2 * impp.Zc, impp.Kr - impp.F - 2 * impp.Zc,
impp.rv, impp.rv,
impp.E) == -1) impp.perCB[r].E_cb)
== -1)
return -1; return -1;
stop_meas(&ue->ulsch_rate_matching_stats); stop_meas(&ue->ulsch_rate_matching_stats);
...@@ -218,10 +216,7 @@ int nr_ulsch_encoding(PHY_VARS_NR_UE *ue, ...@@ -218,10 +216,7 @@ int nr_ulsch_encoding(PHY_VARS_NR_UE *ue,
///////////////////////// e---->| Rate matching bit interleaving |---->f ///////////////////////// ///////////////////////// e---->| Rate matching bit interleaving |---->f /////////////////////////
VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_NR_INTERLEAVING_LDPC, VCD_FUNCTION_IN); VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_NR_INTERLEAVING_LDPC, VCD_FUNCTION_IN);
start_meas(&ue->ulsch_interleaving_stats); start_meas(&ue->ulsch_interleaving_stats);
nr_interleaving_ldpc(impp.E, nr_interleaving_ldpc(impp.perCB[r].E_cb, impp.Qm, harq_process->e + r_offset, harq_process->f + r_offset);
impp.Qm,
harq_process->e + r_offset,
harq_process->f + r_offset);
stop_meas(&ue->ulsch_interleaving_stats); stop_meas(&ue->ulsch_interleaving_stats);
VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_NR_INTERLEAVING_LDPC, VCD_FUNCTION_OUT); VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_NR_INTERLEAVING_LDPC, VCD_FUNCTION_OUT);
...@@ -231,7 +226,7 @@ int nr_ulsch_encoding(PHY_VARS_NR_UE *ue, ...@@ -231,7 +226,7 @@ int nr_ulsch_encoding(PHY_VARS_NR_UE *ue,
if (r == impp.n_segments - 1) if (r == impp.n_segments - 1)
write_output("enc_output.m","enc", harq_process->f, G, 1, 4); write_output("enc_output.m","enc", harq_process->f, G, 1, 4);
#endif #endif
r_offset += impp.E; r_offset += impp.perCB[r].E_cb;
} }
} }
/////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////
......
...@@ -504,6 +504,7 @@ typedef struct PHY_VARS_NR_UE_s { ...@@ -504,6 +504,7 @@ typedef struct PHY_VARS_NR_UE_s {
uint8_t max_ldpc_iterations; uint8_t max_ldpc_iterations;
int ldpc_offload_enable;
/// SRS variables /// SRS variables
nr_srs_info_t *nr_srs_info; nr_srs_info_t *nr_srs_info;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment