Commit 6bec8779 authored by Jaroslava Fiedlerova's avatar Jaroslava Fiedlerova

Enhance T2 Offload for gNB and UE

- Reworked nrLDPC_decoder_offload.c to process all segments in a TB with a single call to LDPCdecoder() or LDPCencoder().
- Perform CRC checks on the T2.
- Modified nr_ulsch_decoding (gNB decoder) to:
  - Count processed segments.
  - Properly perform CRC checks.
- Created a constant NR_LDPC_MAX_NUM_CB to define the maximum number of codeblocks.
- Conditional library loading:
  - On gNB: Load ldpc_t2 library if offload is enabled using the --ldpc-offload-enable flag.
  - On UE: Load both ldpc_t2 and ldpc libraries since only encoder offload is supported.
- General cleanup of nrLDPC_decoder_offload.c for improved readability and maintainability.
- Modified the structure of LDPC encoder/decoder/offload parameters:
  - Introduced a structure for code block (CB) related parameters.
  - Removed parameter E from the encoder and offload parameter structures for clarity.
  - Replaced E with perCB->E_cb in the UE encoder code.
parent e5fc1ebd
......@@ -286,6 +286,7 @@ void set_options(int CC_id, PHY_VARS_NR_UE *UE){
UE->rf_map.card = card_offset;
UE->rf_map.chain = CC_id + chain_offset;
UE->max_ldpc_iterations = nrUE_params.max_ldpc_iterations;
UE->ldpc_offload_enable = nrUE_params.ldpc_offload_flag;
UE->UE_scan_carrier = nrUE_params.UE_scan_carrier;
UE->UE_fo_compensation = nrUE_params.UE_fo_compensation;
UE->if_freq = nrUE_params.if_freq;
......@@ -491,7 +492,11 @@ int main(int argc, char **argv)
cpuf=get_cpu_freq_GHz();
itti_init(TASK_MAX, tasks_info);
init_opt() ;
init_opt();
if (nrUE_params.ldpc_offload_flag)
load_LDPClib("_t2", &ldpc_interface_offload);
load_LDPClib(NULL, &ldpc_interface);
if (ouput_vcd) {
......
......@@ -37,6 +37,7 @@
{"dlsch-parallel", CONFIG_HLP_DLSCH_PARA, 0, .u8ptr=NULL, .defintval=0, TYPE_UINT8, 0}, \
{"offset-divisor", CONFIG_HLP_OFFSET_DIV, 0, .uptr=&nrUE_params.ofdm_offset_divisor, .defuintval=8, TYPE_UINT32, 0}, \
{"max-ldpc-iterations", CONFIG_HLP_MAX_LDPC_ITERATIONS, 0, .iptr=&nrUE_params.max_ldpc_iterations, .defuintval=8, TYPE_UINT8, 0}, \
{"ldpc-offload-enable", CONFIG_HLP_LDPC_OFFLOAD, PARAMFLAG_BOOL, .iptr=&(nrUE_params.ldpc_offload_flag), .defintval=0, TYPE_INT, 0}, \
{"nr-dlsch-demod-shift", CONFIG_HLP_DLSHIFT, 0, .iptr=(int32_t *)&nr_dlsch_demod_shift, .defintval=0, TYPE_INT, 0}, \
{"V" , CONFIG_HLP_VCD, PARAMFLAG_BOOL, .iptr=&vcdflag, .defintval=0, TYPE_INT, 0}, \
{"uecap_file", CONFIG_HLP_UECAP_FILE, 0, .strptr=&uecap_file, .defstrval="./uecap_ports1.xml", TYPE_STRING, 0}, \
......@@ -80,6 +81,7 @@ typedef struct {
int nb_antennas_tx;
int N_RB_DL;
int ssb_start_subcarrier;
int ldpc_offload_flag;
} nrUE_params_t;
extern uint64_t get_nrUE_optmask(void);
extern uint64_t set_nrUE_optmask(uint64_t bitmask);
......
......@@ -259,7 +259,7 @@ one_measurement_t test_ldpc(short max_iterations,
printf("To: %d\n", (Kb + nrows - no_punctured_columns) * Zc - removed_bit);
printf("number of undecoded bits: %d\n", (Kb + nrows - no_punctured_columns - 2) * Zc - removed_bit);
encoder_implemparams_t impp = {.Zc = Zc, .Kb = Kb, .E = block_length, .BG = BG, .Kr = block_length, .K = block_length};
encoder_implemparams_t impp = {.Zc = Zc, .Kb = Kb, .BG = BG, .Kr = block_length, .K = block_length};
impp.gen_code = 2;
if (ntrials==0)
......
......@@ -3,13 +3,8 @@
*/
/*!\file nrLDPC_decoder_offload.c
* \brief Defines the LDPC decoder
* \author openairinterface
* \date 12-06-2021
* \version 1.0
* \note: based on testbbdev test_bbdev_perf.c functions. Harq buffer offset added.
* \mbuf and mempool allocated at the init step, LDPC parameters updated from OAI.
* \warning
*/
#include <stdint.h>
......@@ -55,17 +50,12 @@
// this socket is the NUMA socket, so the hardware CPU id (numa is complex)
#define GET_SOCKET(socket_id) (((socket_id) == SOCKET_ID_ANY) ? 0 : (socket_id))
#define MAX_QUEUES 16
#define MAX_QUEUES 32
#define OPS_CACHE_SIZE 256U
#define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */
#define SYNC_WAIT 0
#define SYNC_START 1
#define INVALID_OPAQUE -1
#define TIME_OUT_POLL 1e8
#define INVALID_QUEUE_ID -1
/* Increment for next code block in external HARQ memory */
#define HARQ_INCR 32768
/* Headroom for filler LLRs insertion in HARQ buffer */
......@@ -102,7 +92,7 @@ struct active_device {
static int nb_active_devs;
/* Data buffers used by BBDEV ops */
struct test_buffers {
struct data_buffers {
struct rte_bbdev_op_data *inputs;
struct rte_bbdev_op_data *hard_outputs;
struct rte_bbdev_op_data *soft_outputs;
......@@ -121,7 +111,7 @@ struct test_op_params {
uint16_t num_lcores;
int vector_mask;
rte_atomic16_t sync;
struct test_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES];
struct data_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES];
};
/* Contains per lcore params */
......@@ -129,16 +119,9 @@ struct thread_params {
uint8_t dev_id;
uint16_t queue_id;
uint32_t lcore_id;
uint64_t start_time;
double ops_per_sec;
double mbps;
uint8_t iter_count;
double iter_average;
double bler;
struct nrLDPCoffload_params *p_offloadParams;
uint8_t iter_count;
uint8_t *p_out;
uint8_t r;
uint8_t harq_pid;
uint8_t ulsch_id;
rte_atomic16_t nb_dequeued;
rte_atomic16_t processing_status;
......@@ -176,11 +159,11 @@ optimal_mempool_size(unsigned int val)
return rte_align32pow2(val + 1) - 1;
}
// DPDK BBDEV modified - sizes passed to the function, use of data_len and nb_segments, remove code related to Soft outputs, HARQ
// inputs, HARQ outputs
// based on DPDK BBDEV create_mempools
static int create_mempools(struct active_device *ad, int socket_id, uint16_t num_ops, int out_buff_sz, int in_max_sz)
{
unsigned int ops_pool_size, mbuf_pool_size, data_room_size = 0;
num_ops = 1;
uint8_t nb_segments = 1;
ops_pool_size = optimal_mempool_size(RTE_MAX(
/* Ops used plus 1 reference op */
......@@ -289,8 +272,7 @@ const char *ldpcdec_flag_bitmask[] = {
"RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS",
};
// DPDK BBDEV modified - in DPDK this function is named add_bbdev_dev, removed code for RTE_BASEBAND_ACC100, IMO we can also remove
// RTE_LIBRTE_PMD_BBDEV_FPGA_LTE_FEC and RTE_LIBRTE_PMD_BBDEV_FPGA_5GNR_FEC - to be checked
// based on DPDK BBDEV add_bbdev_dev
static int add_dev(uint8_t dev_id, struct rte_bbdev_info *info)
{
int ret;
......@@ -346,7 +328,6 @@ static int add_dev(uint8_t dev_id, struct rte_bbdev_info *info)
if (ret == 0) {
printf("Found LDCP encoding queue (id=%d) at prio%u on dev%u\n", queue_id, qconf.priority, dev_id);
qconf.priority++;
//ret = rte_bbdev_queue_configure(ad->dev_id, queue_id, &qconf);
ad->enc_queue = queue_id;
ad->queue_ids[queue_id] = queue_id;
break;
......@@ -361,7 +342,6 @@ static int add_dev(uint8_t dev_id, struct rte_bbdev_info *info)
if (ret == 0) {
printf("Found LDCP decoding queue (id=%d) at prio%u on dev%u\n", queue_id, qconf.priority, dev_id);
qconf.priority++;
//ret = rte_bbdev_queue_configure(ad->dev_id, queue_id, &qconf);
ad->dec_queue = queue_id;
ad->queue_ids[queue_id] = queue_id;
break;
......@@ -372,28 +352,27 @@ static int add_dev(uint8_t dev_id, struct rte_bbdev_info *info)
return TEST_SUCCESS;
}
// DPDK BBDEV modified - nb_segments used, we are not using struct op_data_entries *ref_entries, but struct rte_mbuf *m_head,
// rte_pktmbuf_reset(m_head) added? if ((op_type == DATA_INPUT) || (op_type == DATA_HARQ_INPUT)) -> no code in else?
static int init_op_data_objs(struct rte_bbdev_op_data *bufs,
uint8_t *input,
uint32_t data_len,
struct rte_mbuf *m_head,
struct rte_mempool *mbuf_pool,
const uint16_t n,
enum op_data_type op_type,
uint16_t min_alignment)
// based on DPDK BBDEV init_op_data_objs
static int init_op_data_objs_dec(struct rte_bbdev_op_data *bufs,
uint8_t *input,
t_nrLDPCoffload_params *offloadParams,
struct rte_mempool *mbuf_pool,
const uint16_t n,
enum op_data_type op_type,
uint16_t min_alignment)
{
int ret;
unsigned int i, j;
bool large_input = false;
uint8_t nb_segments = 1;
for (i = 0; i < n; ++i) {
for (int i = 0; i < n; ++i) {
uint32_t data_len = offloadParams->perCB[i].E_cb;
char *data;
struct rte_mbuf *m_head = rte_pktmbuf_alloc(mbuf_pool);
TEST_ASSERT_NOT_NULL(m_head,
"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
op_type,
n,
mbuf_pool->size);
if (data_len > RTE_BBDEV_LDPC_E_MAX_MBUF) {
/*
* Special case when DPDK mbuf cannot handle
* the required input size
*/
printf("Warning: Larger input size than DPDK mbuf %u\n", data_len);
large_input = true;
}
......@@ -406,67 +385,79 @@ static int init_op_data_objs(struct rte_bbdev_op_data *bufs,
/* Allocate a fake overused mbuf */
data = rte_malloc(NULL, data_len, 0);
TEST_ASSERT_NOT_NULL(data, "rte malloc failed with %u bytes", data_len);
memcpy(data, input, data_len);
memcpy(data, &input[i * LDPC_MAX_CB_SIZE], data_len);
m_head->buf_addr = data;
m_head->buf_iova = rte_malloc_virt2iova(data);
m_head->data_off = 0;
m_head->data_len = data_len;
} else {
// rte_pktmbuf_reset added
rte_pktmbuf_reset(m_head);
data = rte_pktmbuf_append(m_head, data_len);
TEST_ASSERT_NOT_NULL(data, "Couldn't append %u bytes to mbuf from %d data type mbuf pool", data_len, op_type);
TEST_ASSERT(data == RTE_PTR_ALIGN(data, min_alignment),
"Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
data,
min_alignment);
rte_memcpy(data, input, data_len);
rte_memcpy(data, &input[i * LDPC_MAX_CB_SIZE], data_len);
}
bufs[i].length += data_len;
}
}
return 0;
}
for (j = 1; j < nb_segments; ++j) {
struct rte_mbuf *m_tail = rte_pktmbuf_alloc(mbuf_pool);
TEST_ASSERT_NOT_NULL(m_tail,
"Not enough mbufs in %d data type mbuf pool (needed %d, available %u)",
op_type,
n * nb_segments,
mbuf_pool->size);
// based on DPDK BBDEV init_op_data_objs
static int init_op_data_objs_enc(struct rte_bbdev_op_data *bufs,
uint8_t **input_enc,
t_nrLDPCoffload_params *offloadParams,
struct rte_mbuf *m_head,
struct rte_mempool *mbuf_pool,
const uint16_t n,
enum op_data_type op_type,
uint16_t min_alignment)
{
bool large_input = false;
for (int i = 0; i < n; ++i) {
uint32_t data_len = offloadParams->Kr;
char *data;
struct rte_mbuf *m_head = rte_pktmbuf_alloc(mbuf_pool);
TEST_ASSERT_NOT_NULL(m_head,
"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
op_type,
n,
mbuf_pool->size);
data = rte_pktmbuf_append(m_tail, data_len);
TEST_ASSERT_NOT_NULL(data, "Couldn't append %u bytes to mbuf from %d data type mbuf pool", data_len, op_type);
if (data_len > RTE_BBDEV_LDPC_E_MAX_MBUF) {
printf("Warning: Larger input size than DPDK mbuf %u\n", data_len);
large_input = true;
}
bufs[i].data = m_head;
bufs[i].offset = 0;
bufs[i].length = 0;
if ((op_type == DATA_INPUT) || (op_type == DATA_HARQ_INPUT)) {
if ((op_type == DATA_INPUT) && large_input) {
/* Allocate a fake overused mbuf */
data = rte_malloc(NULL, data_len, 0);
TEST_ASSERT_NOT_NULL(data, "rte malloc failed with %u bytes", data_len);
memcpy(data, &input_enc[0], data_len);
m_head->buf_addr = data;
m_head->buf_iova = rte_malloc_virt2iova(data);
m_head->data_off = 0;
m_head->data_len = data_len;
} else {
rte_pktmbuf_reset(m_head);
data = rte_pktmbuf_append(m_head, data_len);
TEST_ASSERT_NOT_NULL(data, "Couldn't append %u bytes to mbuf from %d data type mbuf pool", data_len, op_type);
TEST_ASSERT(data == RTE_PTR_ALIGN(data, min_alignment),
"Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
data,
min_alignment);
rte_memcpy(data, input, data_len);
bufs[i].length += data_len;
ret = rte_pktmbuf_chain(m_head, m_tail);
TEST_ASSERT_SUCCESS(ret, "Couldn't chain mbufs from %d data type mbuf pool", op_type);
rte_memcpy(data, input_enc[i], data_len);
}
} else {
/* allocate chained-mbuf for output buffer */
/*for (j = 1; j < nb_segments; ++j) {
struct rte_mbuf *m_tail =
rte_pktmbuf_alloc(mbuf_pool);
TEST_ASSERT_NOT_NULL(m_tail,
"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
op_type,
n * nb_segments,
mbuf_pool->size);
ret = rte_pktmbuf_chain(m_head, m_tail);
TEST_ASSERT_SUCCESS(ret,
"Couldn't chain mbufs from %d data type mbuf pool",
op_type);
}*/
bufs[i].length += data_len;
}
}
return 0;
}
......@@ -501,7 +492,7 @@ free_buffers(struct active_device *ad, struct test_op_params *op_params)
rte_mempool_free(ad->harq_in_mbuf_pool);
rte_mempool_free(ad->harq_out_mbuf_pool);
for (int i = 0; i < rte_lcore_count(); ++i) {
for (int i = 2; i < rte_lcore_count(); ++i) {
for (int j = 0; j < RTE_MAX_NUMA_NODES; ++j) {
rte_free(op_params->q_bufs[j][i].inputs);
rte_free(op_params->q_bufs[j][i].hard_outputs);
......@@ -512,27 +503,22 @@ free_buffers(struct active_device *ad, struct test_op_params *op_params)
}
}
// OAI / DPDK BBDEV modified - in DPDK named copy_reference_dec_op, here we are passing t_nrLDPCoffload_params *p_offloadParams to
// the function, check what is value of n, commented code for code block mode
static void
set_ldpc_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
unsigned int start_idx,
struct test_buffers *bufs,
struct rte_bbdev_dec_op *ref_op,
uint8_t r,
uint8_t harq_pid,
uint8_t ulsch_id,
t_nrLDPCoffload_params *p_offloadParams)
// based on DPDK BBDEV copy_reference_ldpc_dec_op
static void set_ldpc_dec_op(struct rte_bbdev_dec_op **ops,
unsigned int start_idx,
struct data_buffers *bufs,
uint8_t ulsch_id,
t_nrLDPCoffload_params *p_offloadParams)
{
unsigned int i;
for (i = 0; i < n; ++i) {
ops[i]->ldpc_dec.cb_params.e = p_offloadParams->E;
for (i = 0; i < p_offloadParams->C; ++i) {
ops[i]->ldpc_dec.cb_params.e = p_offloadParams->perCB[i].E_cb;
ops[i]->ldpc_dec.basegraph = p_offloadParams->BG;
ops[i]->ldpc_dec.z_c = p_offloadParams->Z;
ops[i]->ldpc_dec.q_m = p_offloadParams->Qm;
ops[i]->ldpc_dec.n_filler = p_offloadParams->F;
ops[i]->ldpc_dec.n_cb = p_offloadParams->n_cb;
ops[i]->ldpc_dec.iter_max = 20;
ops[i]->ldpc_dec.iter_max = p_offloadParams->numMaxIter;
ops[i]->ldpc_dec.rv_index = p_offloadParams->rv;
ops[i]->ldpc_dec.op_flags = RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE |
RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE |
......@@ -541,10 +527,13 @@ set_ldpc_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
if (p_offloadParams->setCombIn) {
ops[i]->ldpc_dec.op_flags |= RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE;
}
LOG_D(PHY,"ULSCH %02d HARQPID %02d R %02d COMBIN %d RV %d NCB %05d \n", ulsch_id, harq_pid, r, p_offloadParams->setCombIn, p_offloadParams->rv, p_offloadParams->n_cb);
ops[i]->ldpc_dec.code_block_mode = 1; // ldpc_dec->code_block_mode;
ops[i]->ldpc_dec.harq_combined_input.offset = ulsch_id * 64 * LDPC_MAX_CB_SIZE + r * LDPC_MAX_CB_SIZE;
ops[i]->ldpc_dec.harq_combined_output.offset = ulsch_id * 64 * LDPC_MAX_CB_SIZE + r * LDPC_MAX_CB_SIZE;
if (p_offloadParams->C > 1) {
ops[i]->ldpc_dec.op_flags |= RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP;
ops[i]->ldpc_dec.op_flags |= RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK;
}
ops[i]->ldpc_dec.code_block_mode = 1;
ops[i]->ldpc_dec.harq_combined_input.offset = ulsch_id * NR_LDPC_MAX_NUM_CB * LDPC_MAX_CB_SIZE + i * LDPC_MAX_CB_SIZE;
ops[i]->ldpc_dec.harq_combined_output.offset = ulsch_id * NR_LDPC_MAX_NUM_CB * LDPC_MAX_CB_SIZE + i * LDPC_MAX_CB_SIZE;
if (bufs->hard_outputs != NULL)
ops[i]->ldpc_dec.hard_output = bufs->hard_outputs[start_idx + i];
if (bufs->inputs != NULL)
......@@ -558,17 +547,15 @@ set_ldpc_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
}
}
// based on DPDK BBDEV copy_reference_ldpc_enc_op
static void set_ldpc_enc_op(struct rte_bbdev_enc_op **ops,
unsigned int n,
unsigned int start_idx,
struct rte_bbdev_op_data *inputs,
struct rte_bbdev_op_data *outputs,
struct rte_bbdev_enc_op *ref_op,
t_nrLDPCoffload_params *p_offloadParams)
{
// struct rte_bbdev_op_ldpc_enc *ldpc_enc = &ref_op->ldpc_enc;
for (int i = 0; i < n; ++i) {
ops[i]->ldpc_enc.cb_params.e = p_offloadParams->E;
for (int i = 0; i < p_offloadParams->C; ++i) {
ops[i]->ldpc_enc.cb_params.e = p_offloadParams->perCB[i].E_cb;
ops[i]->ldpc_enc.basegraph = p_offloadParams->BG;
ops[i]->ldpc_enc.z_c = p_offloadParams->Z;
ops[i]->ldpc_enc.q_m = p_offloadParams->Qm;
......@@ -582,47 +569,52 @@ static void set_ldpc_enc_op(struct rte_bbdev_enc_op **ops,
}
}
// DPDK BBDEV modified - in DPDK called validate_dec_op, int8_t* p_out added, remove code related to op_data_entries, turbo_dec
// replaced by ldpc_dec, removed coderelated to soft_output, memcpy(p_out, data+m->data_off, data_len)
static int retrieve_ldpc_dec_op(struct rte_bbdev_dec_op **ops,
const uint16_t n,
struct rte_bbdev_dec_op *ref_op,
const int vector_mask,
uint8_t *p_out)
static int retrieve_ldpc_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n, const int vector_mask, uint8_t *p_out)
{
struct rte_bbdev_op_data *hard_output;
uint16_t data_len = 0;
struct rte_mbuf *m;
unsigned int i;
char *data;
int offset = 0;
for (i = 0; i < n; ++i) {
hard_output = &ops[i]->ldpc_dec.hard_output;
m = hard_output->data;
uint16_t data_len = rte_pktmbuf_data_len(m) - hard_output->offset;
data_len = rte_pktmbuf_data_len(m) - hard_output->offset;
data = m->buf_addr;
memcpy(p_out, data + m->data_off, data_len);
memcpy(&p_out[offset], data + m->data_off, data_len);
offset += data_len;
rte_pktmbuf_free(ops[i]->ldpc_dec.hard_output.data);
rte_pktmbuf_free(ops[i]->ldpc_dec.input.data);
}
return 0;
}
static int retrieve_ldpc_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n, struct rte_bbdev_enc_op *ref_op, uint8_t *p_out)
static int retrieve_ldpc_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n, uint8_t *p_out, nrLDPC_params_per_cb_t *perCB)
{
struct rte_bbdev_op_data *output;
struct rte_mbuf *m;
unsigned int i;
char *data;
uint8_t *out;
int offset = 0;
for (i = 0; i < n; ++i) {
output = &ops[i]->ldpc_enc.output;
m = output->data;
uint16_t data_len = min((LDPC_MAX_CB_SIZE) / 8, rte_pktmbuf_data_len(m)); // fix me
uint16_t data_len = rte_pktmbuf_data_len(m) - output->offset;
out = &p_out[offset];
data = m->buf_addr;
for (int byte = 0; byte < data_len; byte++)
for (int bit = 0; bit < 8; bit++)
p_out[byte * 8 + bit] = (data[m->data_off + byte] >> (7 - bit)) & 1;
out[byte * 8 + bit] = (data[m->data_off + byte] >> (7 - bit)) & 1;
offset += perCB[i].E_cb;
rte_pktmbuf_free(ops[i]->ldpc_enc.output.data);
rte_pktmbuf_free(ops[i]->ldpc_enc.input.data);
}
return 0;
}
// DPDK BBDEV copy
// based on DPDK BBDEV init_test_op_params
static int init_test_op_params(struct test_op_params *op_params,
enum rte_bbdev_op_type op_type,
struct rte_mempool *ops_mp,
......@@ -632,10 +624,10 @@ static int init_test_op_params(struct test_op_params *op_params,
{
int ret = 0;
if (op_type == RTE_BBDEV_OP_LDPC_DEC) {
ret = rte_bbdev_dec_op_alloc_bulk(ops_mp, &op_params->ref_dec_op, 1);
ret = rte_bbdev_dec_op_alloc_bulk(ops_mp, &op_params->ref_dec_op, num_to_process);
op_params->mp_dec = ops_mp;
} else {
ret = rte_bbdev_enc_op_alloc_bulk(ops_mp, &op_params->ref_enc_op, 1);
ret = rte_bbdev_enc_op_alloc_bulk(ops_mp, &op_params->ref_enc_op, num_to_process);
op_params->mp_enc = ops_mp;
}
......@@ -647,179 +639,125 @@ static int init_test_op_params(struct test_op_params *op_params,
return 0;
}
// DPDK BBDEV modified - in DPDK called throughput_pmd_lcore_ldpc_dec, code related to extDdr removed
// based on DPDK BBDEV throughput_pmd_lcore_ldpc_dec
static int
pmd_lcore_ldpc_dec(void *arg)
{
struct thread_params *tp = arg;
uint16_t enq, deq;
int time_out = 0;
const uint16_t queue_id = tp->queue_id;
const uint16_t burst_sz = tp->op_params->burst_sz;
const uint16_t num_ops = tp->op_params->num_to_process;
struct rte_bbdev_dec_op *ops_enq[num_ops];
struct rte_bbdev_dec_op *ops_deq[num_ops];
struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
uint8_t r = tp->r;
uint8_t harq_pid = tp->harq_pid;
const uint16_t num_segments = tp->p_offloadParams->C;
struct rte_bbdev_dec_op *ops_enq[num_segments];
struct rte_bbdev_dec_op *ops_deq[num_segments];
uint8_t ulsch_id = tp->ulsch_id;
struct test_buffers *bufs = NULL;
int i, j, ret;
struct data_buffers *bufs = NULL;
int i, ret;
struct rte_bbdev_info info;
uint16_t num_to_enq;
uint8_t *p_out = tp->p_out;
t_nrLDPCoffload_params *p_offloadParams = tp->p_offloadParams;
TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), "BURST_SIZE should be <= %u", MAX_BURST);
TEST_ASSERT_SUCCESS((num_segments > MAX_BURST), "BURST_SIZE should be <= %u", MAX_BURST);
rte_bbdev_info_get(tp->dev_id, &info);
bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
rte_pause();
ret = rte_mempool_get_bulk(tp->op_params->mp_dec, (void **)ops_enq, num_ops);
// looks like a bbdev internal error for the free operation, workaround here
ops_enq[0]->mempool = tp->op_params->mp_dec;
// ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
set_ldpc_dec_op(ops_enq,
num_ops,
0,
bufs,
ref_op,
r,
harq_pid,
ulsch_id,
p_offloadParams);
/* Set counter to validate the ordering */
for (j = 0; j < num_ops; ++j)
ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
for (enq = 0, deq = 0; enq < num_ops;) {
num_to_enq = burst_sz;
if (unlikely(num_ops - enq < num_to_enq))
num_to_enq = num_ops - enq;
ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp_dec, ops_enq, num_segments);
TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_segments);
set_ldpc_dec_op(ops_enq, 0, bufs, ulsch_id, p_offloadParams);
for (enq = 0, deq = 0; enq < num_segments;) {
num_to_enq = num_segments;
if (unlikely(num_segments - enq < num_to_enq))
num_to_enq = num_segments - enq;
enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id, queue_id, &ops_enq[enq], num_to_enq);
deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id, queue_id, &ops_deq[deq], enq - deq);
}
/* dequeue the remaining */
int time_out = 0;
while (deq < enq) {
deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id, queue_id, &ops_deq[deq], enq - deq);
time_out++;
DevAssert(time_out <= TIME_OUT_POLL);
}
// This if statement is not in DPDK
if (deq == enq) {
tp->iter_count = 0;
/* get the max of iter_count for all dequeued ops */
for (i = 0; i < num_ops; ++i) {
for (i = 0; i < num_segments; ++i) {
uint8_t *status = tp->p_offloadParams->perCB[i].p_status_cb;
tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count, tp->iter_count);
*status = ops_enq[i]->status;
}
ret = retrieve_ldpc_dec_op(ops_deq, num_ops, ref_op, tp->op_params->vector_mask, p_out);
TEST_ASSERT_SUCCESS(ret, "Validation failed!");
ret = retrieve_ldpc_dec_op(ops_deq, num_segments, tp->op_params->vector_mask, p_out);
TEST_ASSERT_SUCCESS(ret, "LDPC offload decoder failed!");
}
if (num_ops > 0)
rte_mempool_put_bulk(ops_enq[0]->mempool, (void **)ops_enq, num_ops);
// Return the worst decoding number of iterations for all segments
rte_bbdev_dec_op_free_bulk(ops_enq, num_segments);
// Return the max number of iterations accross all segments
return tp->iter_count;
}
// DPDK BBDEV copy - in DPDK called throughput_pmd_lcore_ldpc_enc
// based on DPDK BBDEV throughput_pmd_lcore_ldpc_enc
static int pmd_lcore_ldpc_enc(void *arg)
{
struct thread_params *tp = arg;
uint16_t enq, deq;
int time_out = 0;
const uint16_t queue_id = tp->queue_id;
const uint16_t burst_sz = tp->op_params->burst_sz;
const uint16_t num_ops = tp->op_params->num_to_process;
struct rte_bbdev_enc_op *ops_enq[num_ops];
struct rte_bbdev_enc_op *ops_deq[num_ops];
struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
int j, ret;
const uint16_t num_segments = tp->p_offloadParams->C;
tp->op_params->num_to_process = num_segments;
struct rte_bbdev_enc_op *ops_enq[num_segments];
struct rte_bbdev_enc_op *ops_deq[num_segments];
struct rte_bbdev_info info;
int ret;
struct data_buffers *bufs = NULL;
uint16_t num_to_enq;
uint8_t *p_out = tp->p_out;
t_nrLDPCoffload_params *p_offloadParams = tp->p_offloadParams;
TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), "BURST_SIZE should be <= %u", MAX_BURST);
TEST_ASSERT_SUCCESS((num_segments > MAX_BURST), "BURST_SIZE should be <= %u", MAX_BURST);
struct rte_bbdev_info info;
rte_bbdev_info_get(tp->dev_id, &info);
TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim), "NUM_OPS cannot exceed %u for this device", info.drv.queue_size_lim);
struct test_buffers *bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
rte_pause();
ret = rte_mempool_get_bulk(tp->op_params->mp_enc, (void **)ops_enq, num_ops);
// ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
ops_enq[0]->mempool = tp->op_params->mp_enc;
set_ldpc_enc_op(ops_enq, num_ops, 0, bufs->inputs, bufs->hard_outputs, ref_op, p_offloadParams);
/* Set counter to validate the ordering */
for (j = 0; j < num_ops; ++j)
ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
for (j = 0; j < num_ops; ++j)
mbuf_reset(ops_enq[j]->ldpc_enc.output.data);
for (enq = 0, deq = 0; enq < num_ops;) {
num_to_enq = burst_sz;
if (unlikely(num_ops - enq < num_to_enq))
num_to_enq = num_ops - enq;
ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp_enc, ops_enq, num_segments);
TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_segments);
set_ldpc_enc_op(ops_enq, 0, bufs->inputs, bufs->hard_outputs, p_offloadParams);
for (enq = 0, deq = 0; enq < num_segments;) {
num_to_enq = num_segments;
if (unlikely(num_segments - enq < num_to_enq))
num_to_enq = num_segments - enq;
enq += rte_bbdev_enqueue_ldpc_enc_ops(tp->dev_id, queue_id, &ops_enq[enq], num_to_enq);
deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id, queue_id, &ops_deq[deq], enq - deq);
}
/* dequeue the remaining */
int time_out = 0;
while (deq < enq) {
deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id, queue_id, &ops_deq[deq], enq - deq);
time_out++;
DevAssert(time_out <= TIME_OUT_POLL);
}
ret = retrieve_ldpc_enc_op(ops_deq, num_ops, ref_op, p_out);
ret = retrieve_ldpc_enc_op(ops_deq, num_segments, p_out, tp->p_offloadParams->perCB);
TEST_ASSERT_SUCCESS(ret, "Validation failed!");
// rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
if (num_ops > 0)
rte_mempool_put_bulk(ops_enq[0]->mempool, (void **)ops_enq, num_ops);
rte_bbdev_enc_op_free_bulk(ops_enq, num_segments);
return ret;
}
/*
* Test function that determines how long an enqueue + dequeue of a burst
* takes on available lcores.
*/
// OAI / DPDK BBDEV modified - in DPDK called throughput_test, here we pass more parameters to the function (t_nrLDPCoffload_params
// *p_offloadParams, uint8_t r, ...), many commented lines Removed code which specified which function to use based on the op_type,
// now we are using only pmd_lcore_ldpc_dec for RTE_BBDEV_OP_LDPC_DEC op type. Encoder is RTE_BBDEV_OP_LDPC_ENC op type,
// pmd_lcore_ldpc_enc to be implemented.
// based on DPDK BBDEV throughput_pmd_lcore_dec
int start_pmd_dec(struct active_device *ad,
struct test_op_params *op_params,
t_nrLDPCoffload_params *p_offloadParams,
uint8_t r,
uint8_t harq_pid,
uint8_t ulsch_id,
uint8_t *p_out)
{
int ret;
unsigned int lcore_id, used_cores = 0;
// struct rte_bbdev_info info;
uint16_t num_lcores;
// rte_bbdev_info_get(ad->dev_id, &info);
/* Set number of lcores */
num_lcores = (ad->nb_queues < (op_params->num_lcores)) ? ad->nb_queues : op_params->num_lcores;
/* Allocate memory for thread parameters structure */
......@@ -828,20 +766,16 @@ int start_pmd_dec(struct active_device *ad,
"Failed to alloc %zuB for t_params",
RTE_ALIGN(sizeof(struct thread_params) * num_lcores, RTE_CACHE_LINE_SIZE));
rte_atomic16_set(&op_params->sync, SYNC_WAIT);
/* Master core is set at first entry */
t_params[0].dev_id = ad->dev_id;
t_params[0].lcore_id = 15;
t_params[0].op_params = op_params;
t_params[0].queue_id = ad->dec_queue;
used_cores++;
t_params[0].iter_count = 0;
t_params[0].p_out = p_out;
t_params[0].p_offloadParams = p_offloadParams;
t_params[0].r = r;
t_params[0].harq_pid = harq_pid;
t_params[0].ulsch_id = ulsch_id;
used_cores++;
// For now, we never enter here, we don't use the DPDK thread pool
RTE_LCORE_FOREACH_WORKER(lcore_id) {
if (used_cores >= num_lcores)
......@@ -853,58 +787,38 @@ int start_pmd_dec(struct active_device *ad,
t_params[used_cores].iter_count = 0;
t_params[used_cores].p_out = p_out;
t_params[used_cores].p_offloadParams = p_offloadParams;
t_params[used_cores].r = r;
t_params[used_cores].harq_pid = harq_pid;
t_params[used_cores].ulsch_id = ulsch_id;
rte_eal_remote_launch(pmd_lcore_ldpc_dec, &t_params[used_cores++], lcore_id);
}
rte_atomic16_set(&op_params->sync, SYNC_START);
ret = pmd_lcore_ldpc_dec(&t_params[0]);
/* Master core is always used */
// for (used_cores = 1; used_cores < num_lcores; used_cores++)
// ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
/* Return if test failed */
if (ret < 0) {
rte_free(t_params);
return ret;
}
rte_free(t_params);
return ret;
}
// based on DPDK BBDEV throughput_pmd_lcore_enc
int32_t start_pmd_enc(struct active_device *ad,
struct test_op_params *op_params,
t_nrLDPCoffload_params *p_offloadParams,
uint8_t *p_out)
{
int ret;
unsigned int lcore_id, used_cores = 0;
uint16_t num_lcores;
int ret;
num_lcores = (ad->nb_queues < (op_params->num_lcores)) ? ad->nb_queues : op_params->num_lcores;
/* Allocate memory for thread parameters structure */
struct thread_params *t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params), RTE_CACHE_LINE_SIZE);
TEST_ASSERT_NOT_NULL(t_params,
"Failed to alloc %zuB for t_params",
RTE_ALIGN(sizeof(struct thread_params) * num_lcores, RTE_CACHE_LINE_SIZE));
rte_atomic16_set(&op_params->sync, SYNC_WAIT);
/* Master core is set at first entry */
t_params[0].dev_id = ad->dev_id;
t_params[0].lcore_id = 14;
t_params[0].op_params = op_params;
// t_params[0].queue_id = ad->queue_ids[used_cores++];
used_cores++;
t_params[0].queue_id = ad->enc_queue;
t_params[0].iter_count = 0;
t_params[0].p_out = p_out;
t_params[0].p_offloadParams = p_offloadParams;
used_cores++;
// For now, we never enter here, we don't use the DPDK thread pool
RTE_LCORE_FOREACH_WORKER(lcore_id) {
if (used_cores >= num_lcores)
......@@ -916,14 +830,8 @@ int32_t start_pmd_enc(struct active_device *ad,
t_params[used_cores].iter_count = 0;
rte_eal_remote_launch(pmd_lcore_ldpc_enc, &t_params[used_cores++], lcore_id);
}
rte_atomic16_set(&op_params->sync, SYNC_START);
ret = pmd_lcore_ldpc_enc(&t_params[0]);
if (ret) {
rte_free(t_params);
return ret;
}
rte_free(t_params);
return ret;
}
......@@ -944,7 +852,7 @@ int32_t LDPCinit()
char *dpdk_dev = "d8:00.0"; //PCI address of the card
char *argv_re[] = {"bbdev", "-a", dpdk_dev, "-l", "14-15", "--file-prefix=b6", "--"};
// EAL initialization, if already initialized (init in xran lib) try to probe DPDK device
ret = rte_eal_init(5, argv_re);
ret = rte_eal_init(sizeofArray(argv_re), argv_re);
if (ret < 0) {
printf("EAL initialization failed, probing DPDK device %s\n", dpdk_dev);
if (rte_dev_probe(dpdk_dev) != 0) {
......@@ -969,50 +877,18 @@ int32_t LDPCinit()
int socket_id = GET_SOCKET(info.socket_id);
int out_max_sz = 8448; // max code block size (for BG1), 22 * 384
int in_max_sz = LDPC_MAX_CB_SIZE; // max number of encoded bits (for BG2 and MCS0)
int num_ops = 1;
int f_ret = create_mempools(ad, socket_id, num_ops, out_max_sz, in_max_sz);
int num_queues = 1;
int f_ret = create_mempools(ad, socket_id, num_queues, out_max_sz, in_max_sz);
if (f_ret != TEST_SUCCESS) {
printf("Couldn't create mempools");
return -1;
}
// get_num_lcores() hardcoded to 1: we use one core for decode, and another for encode
// this code from bbdev test example is not considering encode and decode test
// get_num_ops() replaced by 1: LDPC decode and ldpc encode (7th param)
f_ret = init_test_op_params(op_params, RTE_BBDEV_OP_LDPC_DEC, ad->bbdev_dec_op_pool, 1, 1, 1);
f_ret |= init_test_op_params(op_params, RTE_BBDEV_OP_LDPC_ENC, ad->bbdev_enc_op_pool, 1, 1, 1);
f_ret = init_test_op_params(op_params, RTE_BBDEV_OP_LDPC_DEC, ad->bbdev_dec_op_pool, num_queues, num_queues, 1);
f_ret = init_test_op_params(op_params, RTE_BBDEV_OP_LDPC_ENC, ad->bbdev_enc_op_pool, num_queues, num_queues, 1);
if (f_ret != TEST_SUCCESS) {
printf("Couldn't init test op params");
return -1;
}
// fill_queue_buffers -> allocate_buffers_on_socket
for (int i = 0; i < ad->nb_queues; ++i) {
const uint16_t n = op_params->num_to_process;
struct rte_mempool *in_mp = ad->in_mbuf_pool;
struct rte_mempool *hard_out_mp = ad->hard_out_mbuf_pool;
struct rte_mempool *soft_out_mp = ad->soft_out_mbuf_pool;
struct rte_mempool *harq_in_mp = ad->harq_in_mbuf_pool;
struct rte_mempool *harq_out_mp = ad->harq_out_mbuf_pool;
struct rte_mempool *mbuf_pools[DATA_NUM_TYPES] = {in_mp, soft_out_mp, hard_out_mp, harq_in_mp, harq_out_mp};
uint8_t queue_id = ad->queue_ids[i];
struct rte_bbdev_op_data **queue_ops[DATA_NUM_TYPES] = {&op_params->q_bufs[socket_id][queue_id].inputs,
&op_params->q_bufs[socket_id][queue_id].soft_outputs,
&op_params->q_bufs[socket_id][queue_id].hard_outputs,
&op_params->q_bufs[socket_id][queue_id].harq_inputs,
&op_params->q_bufs[socket_id][queue_id].harq_outputs};
for (enum op_data_type type = DATA_INPUT; type < 3; type += 2) {
int ret = allocate_buffers_on_socket(queue_ops[type], n * sizeof(struct rte_bbdev_op_data), socket_id);
TEST_ASSERT_SUCCESS(ret, "Couldn't allocate memory for rte_bbdev_op_data structs");
m_head[type] = rte_pktmbuf_alloc(mbuf_pools[type]);
TEST_ASSERT_NOT_NULL(m_head[type],
"Not enough mbufs in %d data type mbuf pool (needed %d, available %u)",
type,
1,
mbuf_pools[type]->size);
}
}
return 0;
}
......@@ -1023,7 +899,6 @@ int32_t LDPCshutdown()
struct rte_bbdev_stats stats;
free_buffers(ad, op_params);
rte_free(op_params);
// Stop and close bbdev
rte_bbdev_stats_get(dev_id, &stats);
rte_bbdev_stop(dev_id);
rte_bbdev_close(dev_id);
......@@ -1043,17 +918,23 @@ int32_t LDPCdecoder(struct nrLDPC_dec_params *p_decParams,
{
pthread_mutex_lock(&decode_mutex);
// hardcoded we use first device
struct active_device *ad = active_devs;
t_nrLDPCoffload_params offloadParams = {.E = p_decParams->E,
.n_cb = (p_decParams->BG == 1) ? (66 * p_decParams->Z) : (50 * p_decParams->Z),
t_nrLDPCoffload_params offloadParams = {.n_cb = (p_decParams->BG == 1) ? (66 * p_decParams->Z) : (50 * p_decParams->Z),
.BG = p_decParams->BG,
.Z = p_decParams->Z,
.rv = p_decParams->rv,
.F = p_decParams->F,
.Qm = p_decParams->Qm,
.numMaxIter = p_decParams->numMaxIter,
.C = C,
.setCombIn = p_decParams->setCombIn};
for (int r = 0; r < C; r++) {
offloadParams.perCB[r].E_cb = p_decParams->perCB[r].E_cb;
offloadParams.perCB[r].p_status_cb = &(p_decParams->perCB[r].status_cb);
}
struct rte_bbdev_info info;
int ret;
rte_bbdev_info_get(ad->dev_id, &info);
int socket_id = GET_SOCKET(info.socket_id);
// fill_queue_buffers -> init_op_data_objs
......@@ -1069,24 +950,23 @@ int32_t LDPCdecoder(struct nrLDPC_dec_params *p_decParams,
&op_params->q_bufs[socket_id][queue_id].hard_outputs,
&op_params->q_bufs[socket_id][queue_id].harq_inputs,
&op_params->q_bufs[socket_id][queue_id].harq_outputs};
// this should be modified
// enum rte_bbdev_op_type op_type = RTE_BBDEV_OP_LDPC_DEC;
for (enum op_data_type type = DATA_INPUT; type < 3; type += 2) {
int ret = init_op_data_objs(*queue_ops[type],
ret = allocate_buffers_on_socket(queue_ops[type], C * sizeof(struct rte_bbdev_op_data), socket_id);
TEST_ASSERT_SUCCESS(ret, "Couldn't allocate memory for rte_bbdev_op_data structs");
ret = init_op_data_objs_dec(*queue_ops[type],
(uint8_t *)p_llr,
p_decParams->E,
m_head[type],
&offloadParams,
mbuf_pools[type],
1,
C,
type,
info.drv.min_alignment);
TEST_ASSERT_SUCCESS(ret, "Couldn't init rte_bbdev_op_data structs");
}
int ret = start_pmd_dec(ad, op_params, &offloadParams, C, harq_pid, ulsch_id, (uint8_t *)p_out);
ret = start_pmd_dec(ad, op_params, &offloadParams, ulsch_id, (uint8_t *)p_out);
if (ret < 0) {
printf("Couldn't start pmd dec\n");
pthread_mutex_unlock(&decode_mutex);
return (20); // Fix me: we should propoagate max_iterations properly in the call (impp struct)
return (p_decParams->numMaxIter);
}
pthread_mutex_unlock(&decode_mutex);
return ret;
......@@ -1097,16 +977,23 @@ int32_t LDPCencoder(unsigned char **input, unsigned char **output, encoder_imple
pthread_mutex_lock(&encode_mutex);
// hardcoded to use the first found board
struct active_device *ad = active_devs;
int Zc = impp->Zc;
int BG = impp->BG;
t_nrLDPCoffload_params offloadParams = {.E = impp->E,
.n_cb = (BG == 1) ? (66 * Zc) : (50 * Zc),
.BG = BG,
.Z = Zc,
int ret;
uint32_t Nref = 0;
t_nrLDPCoffload_params offloadParams = {.n_cb = (impp->BG == 1) ? (66 * impp->Zc) : (50 * impp->Zc),
.BG = impp->BG,
.Z = impp->Zc,
.rv = impp->rv,
.F = impp->F,
.Qm = impp->Qm,
.C = impp->n_segments,
.Kr = (impp->K - impp->F + 7) / 8};
for (int r = 0; r < impp->n_segments; r++) {
offloadParams.perCB[r].E_cb = impp->perCB[r].E_cb;
}
if (impp->Tbslbrm != 0) {
Nref = 3 * impp->Tbslbrm / (2 * impp->n_segments);
offloadParams.n_cb = min(offloadParams.n_cb, Nref);
}
struct rte_bbdev_info info;
rte_bbdev_info_get(ad->dev_id, &info);
int socket_id = GET_SOCKET(info.socket_id);
......@@ -1124,17 +1011,19 @@ int32_t LDPCencoder(unsigned char **input, unsigned char **output, encoder_imple
&op_params->q_bufs[socket_id][queue_id].harq_inputs,
&op_params->q_bufs[socket_id][queue_id].harq_outputs};
for (enum op_data_type type = DATA_INPUT; type < 3; type += 2) {
int ret = init_op_data_objs(*queue_ops[type],
*input,
offloadParams.Kr,
ret = allocate_buffers_on_socket(queue_ops[type], impp->n_segments * sizeof(struct rte_bbdev_op_data), socket_id);
TEST_ASSERT_SUCCESS(ret, "Couldn't allocate memory for rte_bbdev_op_data structs");
ret = init_op_data_objs_enc(*queue_ops[type],
input,
&offloadParams,
m_head[type],
mbuf_pools[type],
1,
impp->n_segments,
type,
info.drv.min_alignment);
TEST_ASSERT_SUCCESS(ret, "Couldn't init rte_bbdev_op_data structs");
}
int ret=start_pmd_enc(ad, op_params, &offloadParams, *output);
ret = start_pmd_enc(ad, op_params, &offloadParams, *output);
pthread_mutex_unlock(&encode_mutex);
return ret;
}
......@@ -78,6 +78,15 @@ typedef enum nrLDPC_outMode {
nrLDPC_outMode_LLRINT8 /**< Single LLR value per int8_t output */
} e_nrLDPC_outMode;
/**
Structure containing LDPC parameters per CB
*/
typedef struct nrLDPC_params_per_cb {
uint32_t E_cb;
uint8_t status_cb;
uint8_t* p_status_cb;
} nrLDPC_params_per_cb_t;
/**
Structure containing LDPC decoder parameters.
*/
......@@ -94,18 +103,24 @@ typedef struct nrLDPC_dec_params {
int crc_type;
int (*check_crc)(uint8_t* decoded_bytes, uint32_t n, uint8_t crc_type);
uint8_t setCombIn;
nrLDPC_params_per_cb_t perCB[NR_LDPC_MAX_NUM_CB];
} t_nrLDPC_dec_params;
/**
Structure containing LDPC offload parameters.
*/
typedef struct nrLDPCoffload_params {
uint8_t BG; /**< Base graph */
uint16_t Z;
uint16_t Kr;
uint8_t rv;
uint32_t E;
uint16_t n_cb;
uint16_t F; /**< Filler bits */
uint8_t Qm; /**< Modulation */
uint8_t C;
uint8_t numMaxIter;
uint8_t setCombIn;
nrLDPC_params_per_cb_t perCB[NR_LDPC_MAX_NUM_CB];
} t_nrLDPCoffload_params;
/**
......
......@@ -99,6 +99,8 @@
/** Maximum number of possible input LLR = NR_LDPC_NCOL_BG1*NR_LDPC_ZMAX */
#define NR_LDPC_MAX_NUM_LLR 27000
#define NR_LDPC_MAX_NUM_CB 72
// ==============================================================================
// GLOBAL CONSTANT VARIABLES
......
......@@ -59,8 +59,9 @@ typedef struct {
uint32_t F;
/// Modulation order
uint8_t Qm;
uint32_t E;
uint32_t Tbslbrm;
unsigned int G;
nrLDPC_params_per_cb_t perCB[NR_LDPC_MAX_NUM_CB];
// Redundancy version index
uint8_t rv;
} encoder_implemparams_t;
......
......@@ -137,6 +137,9 @@ int phy_init_nr_gNB(PHY_VARS_gNB *gNB)
if (gNB->ldpc_offload_flag)
load_LDPClib("_t2", &ldpc_interface_offload);
else
load_LDPClib(NULL, &ldpc_interface);
gNB->max_nb_pdsch = MAX_MOBILES_PER_GNB;
init_delay_table(fp->ofdm_symbol_size, MAX_DELAY_COMP, NR_MAX_OFDM_SYMBOL_SIZE, fp->delay_table);
......
......@@ -360,6 +360,7 @@ int nr_dlsch_encoding(PHY_VARS_gNB *gNB,
impp.harq = harq;
if (gNB->ldpc_offload_flag) {
impp.Qm = rel15->qamModOrder[0];
impp.Tbslbrm = rel15->maintenance_parms_v3.tbSizeLbrmBytes;
impp.rv = rel15->rvIndex[0];
int nb_re_dmrs =
(rel15->dmrsConfigType == NFAPI_NR_DMRS_TYPE1) ? (6 * rel15->numDmrsCdmGrpsNoData) : (4 * rel15->numDmrsCdmGrpsNoData);
......@@ -370,13 +371,10 @@ int nr_dlsch_encoding(PHY_VARS_gNB *gNB,
harq->unav_res,
rel15->qamModOrder[0],
rel15->nrOfLayers);
int r_offset = 0;
for (int r = 0; r < impp.n_segments; r++) {
impp.E = nr_get_E(impp.G, impp.n_segments, impp.Qm, rel15->nrOfLayers, r);
uint8_t *f = impp.output + r_offset;
ldpc_interface_offload.LDPCencoder(&harq->c[r], &f, &impp);
r_offset += impp.E;
impp.perCB[r].E_cb = nr_get_E(impp.G, impp.n_segments, impp.Qm, rel15->nrOfLayers, r);
}
ldpc_interface_offload.LDPCencoder(harq->c, &impp.output, &impp);
} else {
notifiedFIFO_t nf;
initNotifiedFIFO(&nf);
......
......@@ -232,64 +232,59 @@ int decode_offload(PHY_VARS_gNB *phy_vars_gNB,
{
NR_gNB_ULSCH_t *ulsch = &phy_vars_gNB->ulsch[ULSCH_id];
NR_UL_gNB_HARQ_t *harq_process = ulsch->harq_process;
int16_t z_ol[LDPC_MAX_CB_SIZE] __attribute__((aligned(16)));
int8_t l_ol[LDPC_MAX_CB_SIZE] __attribute__((aligned(16)));
uint8_t Qm = pusch_pdu->qam_mod_order;
uint8_t n_layers = pusch_pdu->nrOfLayers;
int16_t z_ol[NR_LDPC_MAX_NUM_CB * LDPC_MAX_CB_SIZE] __attribute__((aligned(16)));
int8_t l_ol[NR_LDPC_MAX_NUM_CB * LDPC_MAX_CB_SIZE] __attribute__((aligned(16)));
const int kc = decParams->BG == 2 ? 52 : 68;
uint32_t A = (harq_process->TBS) << 3;
const int Kr = harq_process->K;
const int Kr_bytes = Kr >> 3;
uint32_t A = (harq_process->TBS) << 3;
const int kc = decParams->BG == 2 ? 52 : 68;
ulsch->max_ldpc_iterations = 20;
int decodeIterations = 2;
int r_offset = 0, offset = 0;
int8_t decodeIterations = 0;
int r_offset = 0;
int offset = 0;
// new data received, set processedSegments to 0
if (!decParams->setCombIn)
harq_process->processedSegments = 0;
for (int r = 0; r < harq_process->C; r++) {
int E = nr_get_E(G, harq_process->C, Qm, n_layers, r);
memset(harq_process->c[r], 0, Kr_bytes);
decParams->R = nr_get_R_ldpc_decoder(pusch_pdu->pusch_data.rv_index,
E,
decParams->BG,
decParams->Z,
&harq_process->llrLen,
harq_process->round);
memcpy(z_ol, ulsch_llr + r_offset, E * sizeof(short));
simde__m128i *pv_ol128 = (simde__m128i *)&z_ol;
simde__m128i *pl_ol128 = (simde__m128i *)&l_ol;
decParams->perCB[r].E_cb = nr_get_E(G, harq_process->C, decParams->Qm, pusch_pdu->nrOfLayers, r);
memcpy(&z_ol[offset], ulsch_llr + r_offset, decParams->perCB[r].E_cb * sizeof(*z_ol));
simde__m128i *pv_ol128 = (simde__m128i *)&z_ol[offset];
simde__m128i *pl_ol128 = (simde__m128i *)&l_ol[offset];
for (int i = 0, j = 0; j < ((kc * harq_process->Z) >> 4) + 1; i += 2, j++) {
pl_ol128[j] = simde_mm_packs_epi16(pv_ol128[i], pv_ol128[i + 1]);
}
decParams->E = E;
decParams->rv = pusch_pdu->pusch_data.rv_index;
decParams->F = harq_process->F;
decParams->Qm = Qm;
decodeIterations =
ldpc_interface_offload
.LDPCdecoder(decParams, harq_pid, ULSCH_id, r, (int8_t *)&pl_ol128[0], (int8_t *)harq_process->c[r], NULL, NULL);
if (decodeIterations < 0) {
LOG_E(PHY, "ulsch_decoding.c: Problem in LDPC decoder offload\n");
return -1;
}
bool decodeSuccess = check_crc((uint8_t *)harq_process->c[r], lenWithCrc(harq_process->C, A), crcType(harq_process->C, A));
if (decodeSuccess) {
memcpy(harq_process->b + offset, harq_process->c[r], Kr_bytes - (harq_process->F >> 3) - ((harq_process->C > 1) ? 3 : 0));
offset += (Kr_bytes - (harq_process->F >> 3) - ((harq_process->C > 1) ? 3 : 0));
r_offset += decParams->perCB[r].E_cb;
offset += LDPC_MAX_CB_SIZE;
}
int8_t *p_outDec = calloc(harq_process->C * Kr_bytes, sizeof(int8_t));
decodeIterations =
ldpc_interface_offload.LDPCdecoder(decParams, harq_pid, ULSCH_id, harq_process->C, (int8_t *)l_ol, p_outDec, NULL, NULL);
if (decodeIterations < 0) {
LOG_E(PHY, "ulsch_decoding.c: Problem in LDPC decoder offload\n");
return -1;
}
int offset_b = 0;
for (int r = 0; r < harq_process->C; r++) {
if (decParams->perCB[r].status_cb == 0 || harq_process->C == 1) {
memcpy(harq_process->b + offset_b, &p_outDec[offset_b], Kr_bytes - (harq_process->F >> 3) - ((harq_process->C > 1) ? 3 : 0));
harq_process->processedSegments++;
} else {
LOG_D(PHY, "uplink segment error %d/%d\n", r, harq_process->C);
LOG_D(PHY, "ULSCH %d in error\n", ULSCH_id);
}
r_offset += E;
offset_b += (Kr_bytes - (harq_process->F >> 3) - ((harq_process->C > 1) ? 3 : 0));
}
bool crc_valid = false;
// CRC check made by the T2, no need to perform CRC check for a single code block twice
if (harq_process->processedSegments == harq_process->C) {
// When the number of code blocks is 1 (C = 1) and ulsch_harq->processedSegments = 1, we can assume a good TB because of the
// CRC check made by the LDPC for early termination, so, no need to perform CRC check twice for a single code block
crc_valid = true;
if (harq_process->C > 1) {
crc_valid = check_crc(harq_process->b, lenWithCrc(1, A), crcType(1, A));
crc_valid = check_crc(harq_process->b, lenWithCrc(1, A), crcType(1, A));
if (harq_process->C == 1 && !crc_valid) {
harq_process->processedSegments--;
}
}
if (crc_valid) {
LOG_D(PHY, "ULSCH: Setting ACK for slot %d TBS %d\n", ulsch->slot, harq_process->TBS);
nr_fill_indication(phy_vars_gNB, ulsch->frame, ulsch->slot, ULSCH_id, harq_pid, 0, 0);
......@@ -312,6 +307,7 @@ int decode_offload(PHY_VARS_gNB *phy_vars_gNB,
}
ulsch->last_iteration_cnt = decodeIterations;
free(p_outDec);
VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_PHY_gNB_ULSCH_DECODING,0);
return 0;
}
......@@ -349,7 +345,6 @@ int nr_ulsch_decoding(PHY_VARS_gNB *phy_vars_gNB,
const uint8_t n_layers = pusch_pdu->nrOfLayers;
// ------------------------------------------------------------------
harq_process->processedSegments = 0;
harq_process->TBS = pusch_pdu->pusch_data.tb_size;
t_nrLDPC_dec_params decParams = {.check_crc = check_crc};
......@@ -418,6 +413,8 @@ int nr_ulsch_decoding(PHY_VARS_gNB *phy_vars_gNB,
decParams.Z = harq_process->Z;
decParams.numMaxIter = ulsch->max_ldpc_iterations;
decParams.Qm = Qm;
decParams.rv = pusch_pdu->pusch_data.rv_index;
decParams.outMode = 0;
decParams.setCombIn = !harq_process->harq_to_be_cleared;
if (harq_process->harq_to_be_cleared) {
......@@ -428,7 +425,7 @@ int nr_ulsch_decoding(PHY_VARS_gNB *phy_vars_gNB,
if (phy_vars_gNB->ldpc_offload_flag)
return decode_offload(phy_vars_gNB, ULSCH_id, ulsch_llr, pusch_pdu, &decParams, harq_pid, G);
harq_process->processedSegments = 0;
uint32_t offset = 0, r_offset = 0;
set_abort(&harq_process->abort_decode, false);
for (int r = 0; r < harq_process->C; r++) {
......
......@@ -151,16 +151,13 @@ int nr_ulsch_encoding(PHY_VARS_NR_UE *ue,
start_meas(&ue->ulsch_ldpc_encoding_stats);
if (ldpc_interface_offload.LDPCencoder) {
for (int j = 0; j < impp.n_segments; j++) {
impp.E = nr_get_E(G, impp.n_segments, impp.Qm, ulsch->pusch_pdu.nrOfLayers, j);
uint8_t *f = harq_process->f + r_offset;
ldpc_interface_offload.LDPCencoder(&harq_process->c[j], &f, &impp);
r_offset += impp.E;
impp.perCB[j].E_cb = nr_get_E(G, impp.n_segments, impp.Qm, ulsch->pusch_pdu.nrOfLayers, j);
}
ldpc_interface_offload.LDPCencoder(harq_process->c, &harq_process->f, &impp);
} else {
if (ulsch->pusch_pdu.pusch_data.new_data_indicator) {
for (int j = 0; j < (impp.n_segments / 8 + 1); j++) {
impp.macro_num = j;
impp.E = nr_get_E(G, impp.n_segments, impp.Qm, ulsch->pusch_pdu.nrOfLayers, j);
impp.Kr = impp.K;
ldpc_interface.LDPCencoder(harq_process->c, harq_process->d, &impp);
}
......@@ -191,7 +188,7 @@ int nr_ulsch_encoding(PHY_VARS_NR_UE *ue,
ulsch->pusch_pdu.pusch_data.rv_index);
///////////////////////// d---->| Rate matching bit selection |---->e /////////////////////////
impp.E = nr_get_E(G, impp.n_segments, impp.Qm, ulsch->pusch_pdu.nrOfLayers, r);
impp.perCB[r].E_cb = nr_get_E(G, impp.n_segments, impp.Qm, ulsch->pusch_pdu.nrOfLayers, r);
VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_NR_RATE_MATCHING_LDPC, VCD_FUNCTION_IN);
start_meas(&ue->ulsch_rate_matching_stats);
......@@ -204,7 +201,8 @@ int nr_ulsch_encoding(PHY_VARS_NR_UE *ue,
impp.F,
impp.Kr - impp.F - 2 * impp.Zc,
impp.rv,
impp.E) == -1)
impp.perCB[r].E_cb)
== -1)
return -1;
stop_meas(&ue->ulsch_rate_matching_stats);
......@@ -218,10 +216,7 @@ int nr_ulsch_encoding(PHY_VARS_NR_UE *ue,
///////////////////////// e---->| Rate matching bit interleaving |---->f /////////////////////////
VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_NR_INTERLEAVING_LDPC, VCD_FUNCTION_IN);
start_meas(&ue->ulsch_interleaving_stats);
nr_interleaving_ldpc(impp.E,
impp.Qm,
harq_process->e + r_offset,
harq_process->f + r_offset);
nr_interleaving_ldpc(impp.perCB[r].E_cb, impp.Qm, harq_process->e + r_offset, harq_process->f + r_offset);
stop_meas(&ue->ulsch_interleaving_stats);
VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_NR_INTERLEAVING_LDPC, VCD_FUNCTION_OUT);
......@@ -231,7 +226,7 @@ int nr_ulsch_encoding(PHY_VARS_NR_UE *ue,
if (r == impp.n_segments - 1)
write_output("enc_output.m","enc", harq_process->f, G, 1, 4);
#endif
r_offset += impp.E;
r_offset += impp.perCB[r].E_cb;
}
}
///////////////////////////////////////////////////////////////////////////////////////////////
......
......@@ -504,6 +504,7 @@ typedef struct PHY_VARS_NR_UE_s {
uint8_t max_ldpc_iterations;
int ldpc_offload_enable;
/// SRS variables
nr_srs_info_t *nr_srs_info;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment