Commit 887f8a90 authored by Hongzhi Wang's avatar Hongzhi Wang

code cleanup ldpc offload

parent a2c9a280
...@@ -1930,11 +1930,11 @@ printf("segment %d offset %d length %d data length %d\n",i, offset,total_data_si ...@@ -1930,11 +1930,11 @@ printf("segment %d offset %d length %d data length %d\n",i, offset,total_data_si
// TEST_ASSERT(orig_op->segments[i].length == data_len, // TEST_ASSERT(orig_op->segments[i].length == data_len,
// "Length of segment differ in original (%u) and filled (%u) op", // "Length of segment differ in original (%u) and filled (%u) op",
// orig_op->segments[i].length, data_len); // orig_op->segments[i].length, data_len);
TEST_ASSERT_BUFFERS_ARE_EQUAL(orig_op->segments[i].addr, /* TEST_ASSERT_BUFFERS_ARE_EQUAL(orig_op->segments[i].addr,
rte_pktmbuf_mtod_offset(m, uint32_t *, offset), rte_pktmbuf_mtod_offset(m, uint32_t *, offset),
data_len, data_len,
"Output buffers (CB=%u) are not equal", i); "Output buffers (CB=%u) are not equal", i);
*/
m = m->next; m = m->next;
} }
...@@ -2157,29 +2157,6 @@ validate_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n, ...@@ -2157,29 +2157,6 @@ validate_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
return TEST_SUCCESS; return TEST_SUCCESS;
} }
/* Check Number of code blocks errors */
static int
validate_ldpc_bler(struct rte_bbdev_dec_op **ops, const uint16_t n)
{
unsigned int i;
struct op_data_entries *hard_data_orig =
&test_vector.entries[DATA_HARD_OUTPUT];
struct rte_bbdev_op_ldpc_dec *ops_td;
struct rte_bbdev_op_data *hard_output;
int errors = 0;
struct rte_mbuf *m;
for (i = 0; i < n; ++i) {
ops_td = &ops[i]->ldpc_dec;
hard_output = &ops_td->hard_output;
m = hard_output->data;
if (memcmp(rte_pktmbuf_mtod_offset(m, uint32_t *, 0),
hard_data_orig->segments[0].addr,
hard_data_orig->segments[0].length))
errors++;
}
return errors;
}
static int static int
validate_ldpc_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n, validate_ldpc_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
...@@ -2939,2129 +2916,443 @@ printf("bufs len %d data %x addr orig %p addr %p\n",bufs->inputs[0].data->data_l ...@@ -2939,2129 +2916,443 @@ printf("bufs len %d data %x addr orig %p addr %p\n",bufs->inputs[0].data->data_l
return TEST_SUCCESS; return TEST_SUCCESS;
} }
static int static int
throughput_intr_lcore_dec(void *arg) throughput_pmd_lcore_ldpc_dec(void *arg)
{ {
struct thread_params *tp = arg; struct thread_params *tp = arg;
unsigned int enqueued; uint16_t enq, deq;
uint64_t total_time = 0, start_time;
const uint16_t queue_id = tp->queue_id; const uint16_t queue_id = tp->queue_id;
const uint16_t burst_sz = tp->op_params->burst_sz; const uint16_t burst_sz = tp->op_params->burst_sz;
const uint16_t num_to_process = tp->op_params->num_to_process; const uint16_t num_ops = tp->op_params->num_to_process;
struct rte_bbdev_dec_op *ops[num_to_process]; struct rte_bbdev_dec_op *ops_enq[num_ops];
struct rte_bbdev_dec_op *ops_deq[num_ops];
struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
struct test_buffers *bufs = NULL; struct test_buffers *bufs = NULL;
int i, j, ret;
struct rte_bbdev_info info; struct rte_bbdev_info info;
int ret, i, j; uint16_t num_to_enq;
uint16_t num_to_enq, enq;
struct rte_bbdev_op_data *hard_output;
struct rte_bbdev_op_ldpc_dec *ops_td;
bool extDdr = check_bit(ldpc_cap_flags,
RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE);
bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
"BURST_SIZE should be <= %u", MAX_BURST); "BURST_SIZE should be <= %u", MAX_BURST);
TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
"Failed to enable interrupts for dev: %u, queue_id: %u",
tp->dev_id, queue_id);
rte_bbdev_info_get(tp->dev_id, &info); rte_bbdev_info_get(tp->dev_id, &info);
TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim), TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
"NUM_OPS cannot exceed %u for this device", "NUM_OPS cannot exceed %u for this device",
info.drv.queue_size_lim); info.drv.queue_size_lim);
bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
rte_atomic16_clear(&tp->processing_status); //&op_params->q_bufs[socket_id][queue_id].inputs
rte_atomic16_clear(&tp->nb_dequeued); //printf("bufs len %d\n",bufs->input.data->data_len);
while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
rte_pause(); rte_pause();
ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops, ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
num_to_process); TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
num_to_process); /* For throughput tests we need to disable early termination */
if (check_bit(ref_op->ldpc_dec.op_flags,
RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
ref_op->ldpc_dec.op_flags -=
RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
ref_op->ldpc_dec.iter_max = get_iter_max();
ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
if (test_vector.op_type != RTE_BBDEV_OP_NONE) if (test_vector.op_type != RTE_BBDEV_OP_NONE)
copy_reference_dec_op(ops, num_to_process, 0, bufs->inputs, copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs,
bufs->hard_outputs, bufs->soft_outputs, bufs->hard_outputs, bufs->soft_outputs,
tp->op_params->ref_dec_op); bufs->harq_inputs, bufs->harq_outputs, ref_op);
/* Set counter to validate the ordering */ /* Set counter to validate the ordering */
for (j = 0; j < num_to_process; ++j) for (j = 0; j < num_ops; ++j)
ops[j]->opaque_data = (void *)(uintptr_t)j; ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
for (j = 0; j < TEST_REPETITIONS; ++j) { for (i = 0; i < TEST_REPETITIONS; ++i) {
for (i = 0; i < num_to_process; ++i) for (j = 0; j < num_ops; ++j) {
rte_pktmbuf_reset(ops[i]->turbo_dec.hard_output.data); if (!loopback)
mbuf_reset(
ops_enq[j]->ldpc_dec.hard_output.data);
if (hc_out || loopback)
mbuf_reset(
ops_enq[j]->ldpc_dec.harq_combined_output.data);
}
if (extDdr) {
bool preload = i == (TEST_REPETITIONS - 1);
preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
num_ops, preload);
}
start_time = rte_rdtsc_precise();
tp->start_time = rte_rdtsc_precise(); for (enq = 0, deq = 0; enq < num_ops;) {
for (enqueued = 0; enqueued < num_to_process;) {
num_to_enq = burst_sz; num_to_enq = burst_sz;
if (unlikely(num_to_process - enqueued < num_to_enq)) if (unlikely(num_ops - enq < num_to_enq))
num_to_enq = num_to_process - enqueued; num_to_enq = num_ops - enq;
enq = 0; //printf("pmd lcore ldpc dec data %x\n", *ops_enq[enq]->ldpc_dec.input.addr);
do {
enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
queue_id, &ops[enqueued],
num_to_enq);
} while (unlikely(num_to_enq != enq));
enqueued += enq;
/* Write to thread burst_sz current number of enqueued enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id,
* descriptors. It ensures that proper number of queue_id, &ops_enq[enq], num_to_enq);
* descriptors will be dequeued in callback
* function - needed for last batch in case where
* the number of operations is not a multiple of
* burst size.
*/
rte_atomic16_set(&tp->burst_sz, num_to_enq);
/* Wait until processing of previous batch is deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
* completed queue_id, &ops_deq[deq], enq - deq);
*/
while (rte_atomic16_read(&tp->nb_dequeued) != /* ops_td = &ops_deq[enq]->ldpc_dec;
(int16_t) enqueued) hard_output = &ops_td->hard_output;
rte_pause(); struct rte_mbuf *m = hard_output->data;
printf("deq nb segs %d\n", m->nb_segs);
*/
} }
if (j != TEST_REPETITIONS - 1)
rte_atomic16_clear(&tp->nb_dequeued); /* dequeue the remaining */
while (deq < enq) {
deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
queue_id, &ops_deq[deq], enq - deq);
}
total_time += rte_rdtsc_precise() - start_time;
}
tp->iter_count = 0;
/* get the max of iter_count for all dequeued ops */
for (i = 0; i < num_ops; ++i) {
tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count,
tp->iter_count);
}
if (extDdr) {
/* Read loopback is not thread safe */
retrieve_harq_ddr(tp->dev_id, queue_id, ops_enq, num_ops);
} }
//if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
//printf("op type != OP NONE\n");
ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op,
tp->op_params->vector_mask);
TEST_ASSERT_SUCCESS(ret, "Validation failed!");
//}
rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
double tb_len_bits = calc_ldpc_dec_TB_size(ref_op);
tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
((double)total_time / (double)rte_get_tsc_hz());
tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
1000000.0) / ((double)total_time /
(double)rte_get_tsc_hz());
return TEST_SUCCESS; return TEST_SUCCESS;
} }
static int static int
throughput_intr_lcore_enc(void *arg) throughput_pmd_lcore_ldpc_enc(void *arg)
{ {
struct thread_params *tp = arg; struct thread_params *tp = arg;
unsigned int enqueued; uint16_t enq, deq;
uint64_t total_time = 0, start_time;
const uint16_t queue_id = tp->queue_id; const uint16_t queue_id = tp->queue_id;
const uint16_t burst_sz = tp->op_params->burst_sz; const uint16_t burst_sz = tp->op_params->burst_sz;
const uint16_t num_to_process = tp->op_params->num_to_process; const uint16_t num_ops = tp->op_params->num_to_process;
struct rte_bbdev_enc_op *ops[num_to_process]; struct rte_bbdev_enc_op *ops_enq[num_ops];
struct rte_bbdev_enc_op *ops_deq[num_ops];
struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
struct test_buffers *bufs = NULL; struct test_buffers *bufs = NULL;
int i, j, ret;
struct rte_bbdev_info info; struct rte_bbdev_info info;
int ret, i, j; uint16_t num_to_enq;
uint16_t num_to_enq, enq;
TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
"BURST_SIZE should be <= %u", MAX_BURST); "BURST_SIZE should be <= %u", MAX_BURST);
TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
"Failed to enable interrupts for dev: %u, queue_id: %u",
tp->dev_id, queue_id);
rte_bbdev_info_get(tp->dev_id, &info); rte_bbdev_info_get(tp->dev_id, &info);
TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim), TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
"NUM_OPS cannot exceed %u for this device", "NUM_OPS cannot exceed %u for this device",
info.drv.queue_size_lim); info.drv.queue_size_lim);
bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
rte_atomic16_clear(&tp->processing_status);
rte_atomic16_clear(&tp->nb_dequeued);
while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
rte_pause(); rte_pause();
ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops, ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
num_to_process); num_ops);
TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
num_to_process); num_ops);
if (test_vector.op_type != RTE_BBDEV_OP_NONE) if (test_vector.op_type != RTE_BBDEV_OP_NONE)
copy_reference_enc_op(ops, num_to_process, 0, bufs->inputs, copy_reference_ldpc_enc_op(ops_enq, num_ops, 0, bufs->inputs,
bufs->hard_outputs, tp->op_params->ref_enc_op); bufs->hard_outputs, ref_op);
/* Set counter to validate the ordering */ /* Set counter to validate the ordering */
for (j = 0; j < num_to_process; ++j) for (j = 0; j < num_ops; ++j)
ops[j]->opaque_data = (void *)(uintptr_t)j; ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
for (j = 0; j < TEST_REPETITIONS; ++j) { for (i = 0; i < TEST_REPETITIONS; ++i) {
for (i = 0; i < num_to_process; ++i)
rte_pktmbuf_reset(ops[i]->turbo_enc.output.data);
tp->start_time = rte_rdtsc_precise(); if (test_vector.op_type != RTE_BBDEV_OP_NONE)
for (enqueued = 0; enqueued < num_to_process;) { for (j = 0; j < num_ops; ++j)
mbuf_reset(ops_enq[j]->turbo_enc.output.data);
start_time = rte_rdtsc_precise();
for (enq = 0, deq = 0; enq < num_ops;) {
num_to_enq = burst_sz; num_to_enq = burst_sz;
if (unlikely(num_to_process - enqueued < num_to_enq)) if (unlikely(num_ops - enq < num_to_enq))
num_to_enq = num_to_process - enqueued; num_to_enq = num_ops - enq;
enq = 0; enq += rte_bbdev_enqueue_ldpc_enc_ops(tp->dev_id,
do { queue_id, &ops_enq[enq], num_to_enq);
enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
queue_id, &ops[enqueued],
num_to_enq);
} while (unlikely(enq != num_to_enq));
enqueued += enq;
/* Write to thread burst_sz current number of enqueued deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
* descriptors. It ensures that proper number of queue_id, &ops_deq[deq], enq - deq);
* descriptors will be dequeued in callback }
* function - needed for last batch in case where
* the number of operations is not a multiple of
* burst size.
*/
rte_atomic16_set(&tp->burst_sz, num_to_enq);
/* Wait until processing of previous batch is /* dequeue the remaining */
* completed while (deq < enq) {
*/ deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
while (rte_atomic16_read(&tp->nb_dequeued) != queue_id, &ops_deq[deq], enq - deq);
(int16_t) enqueued)
rte_pause();
} }
if (j != TEST_REPETITIONS - 1)
rte_atomic16_clear(&tp->nb_dequeued); total_time += rte_rdtsc_precise() - start_time;
} }
return TEST_SUCCESS; if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
} ret = validate_ldpc_enc_op(ops_deq, num_ops, ref_op);
TEST_ASSERT_SUCCESS(ret, "Validation failed!");
}
rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
static int double tb_len_bits = calc_ldpc_enc_TB_size(ref_op);
throughput_intr_lcore_ldpc_enc(void *arg)
{
struct thread_params *tp = arg;
unsigned int enqueued;
const uint16_t queue_id = tp->queue_id;
const uint16_t burst_sz = tp->op_params->burst_sz;
const uint16_t num_to_process = tp->op_params->num_to_process;
struct rte_bbdev_enc_op *ops[num_to_process];
struct test_buffers *bufs = NULL;
struct rte_bbdev_info info;
int ret, i, j;
uint16_t num_to_enq, enq;
TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
"BURST_SIZE should be <= %u", MAX_BURST);
TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
"Failed to enable interrupts for dev: %u, queue_id: %u",
tp->dev_id, queue_id);
rte_bbdev_info_get(tp->dev_id, &info);
TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
"NUM_OPS cannot exceed %u for this device",
info.drv.queue_size_lim);
bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
rte_atomic16_clear(&tp->processing_status);
rte_atomic16_clear(&tp->nb_dequeued);
while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
rte_pause();
ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
num_to_process);
TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
num_to_process);
if (test_vector.op_type != RTE_BBDEV_OP_NONE)
copy_reference_ldpc_enc_op(ops, num_to_process, 0,
bufs->inputs, bufs->hard_outputs,
tp->op_params->ref_enc_op);
/* Set counter to validate the ordering */
for (j = 0; j < num_to_process; ++j)
ops[j]->opaque_data = (void *)(uintptr_t)j;
for (j = 0; j < TEST_REPETITIONS; ++j) {
for (i = 0; i < num_to_process; ++i)
rte_pktmbuf_reset(ops[i]->turbo_enc.output.data);
tp->start_time = rte_rdtsc_precise();
for (enqueued = 0; enqueued < num_to_process;) {
num_to_enq = burst_sz;
if (unlikely(num_to_process - enqueued < num_to_enq))
num_to_enq = num_to_process - enqueued;
enq = 0;
do {
enq += rte_bbdev_enqueue_ldpc_enc_ops(
tp->dev_id,
queue_id, &ops[enqueued],
num_to_enq);
} while (unlikely(enq != num_to_enq));
enqueued += enq;
/* Write to thread burst_sz current number of enqueued
* descriptors. It ensures that proper number of
* descriptors will be dequeued in callback
* function - needed for last batch in case where
* the number of operations is not a multiple of
* burst size.
*/
rte_atomic16_set(&tp->burst_sz, num_to_enq);
/* Wait until processing of previous batch is
* completed
*/
while (rte_atomic16_read(&tp->nb_dequeued) !=
(int16_t) enqueued)
rte_pause();
}
if (j != TEST_REPETITIONS - 1)
rte_atomic16_clear(&tp->nb_dequeued);
}
return TEST_SUCCESS;
}
static int
throughput_pmd_lcore_dec(void *arg)
{
struct thread_params *tp = arg;
uint16_t enq, deq;
uint64_t total_time = 0, start_time;
const uint16_t queue_id = tp->queue_id;
const uint16_t burst_sz = tp->op_params->burst_sz;
const uint16_t num_ops = tp->op_params->num_to_process;
struct rte_bbdev_dec_op *ops_enq[num_ops];
struct rte_bbdev_dec_op *ops_deq[num_ops];
struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
struct test_buffers *bufs = NULL;
int i, j, ret;
struct rte_bbdev_info info;
uint16_t num_to_enq;
TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
"BURST_SIZE should be <= %u", MAX_BURST);
rte_bbdev_info_get(tp->dev_id, &info);
TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
"NUM_OPS cannot exceed %u for this device",
info.drv.queue_size_lim);
bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
rte_pause();
ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
if (test_vector.op_type != RTE_BBDEV_OP_NONE)
copy_reference_dec_op(ops_enq, num_ops, 0, bufs->inputs,
bufs->hard_outputs, bufs->soft_outputs, ref_op);
/* Set counter to validate the ordering */
for (j = 0; j < num_ops; ++j)
ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
for (i = 0; i < TEST_REPETITIONS; ++i) {
for (j = 0; j < num_ops; ++j)
mbuf_reset(ops_enq[j]->turbo_dec.hard_output.data);
start_time = rte_rdtsc_precise();
for (enq = 0, deq = 0; enq < num_ops;) {
num_to_enq = burst_sz;
if (unlikely(num_ops - enq < num_to_enq))
num_to_enq = num_ops - enq;
enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
queue_id, &ops_enq[enq], num_to_enq);
deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
queue_id, &ops_deq[deq], enq - deq);
}
/* dequeue the remaining */
while (deq < enq) {
deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
queue_id, &ops_deq[deq], enq - deq);
}
total_time += rte_rdtsc_precise() - start_time;
}
tp->iter_count = 0;
/* get the max of iter_count for all dequeued ops */
for (i = 0; i < num_ops; ++i) {
tp->iter_count = RTE_MAX(ops_enq[i]->turbo_dec.iter_count,
tp->iter_count);
}
if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
ret = validate_dec_op(ops_deq, num_ops, ref_op,
tp->op_params->vector_mask);
TEST_ASSERT_SUCCESS(ret, "Validation failed!");
}
rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
double tb_len_bits = calc_dec_TB_size(ref_op);
tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
((double)total_time / (double)rte_get_tsc_hz());
tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
1000000.0) / ((double)total_time /
(double)rte_get_tsc_hz());
return TEST_SUCCESS;
}
static int
bler_pmd_lcore_ldpc_dec(void *arg)
{
struct thread_params *tp = arg;
uint16_t enq, deq;
uint64_t total_time = 0, start_time;
const uint16_t queue_id = tp->queue_id;
const uint16_t burst_sz = tp->op_params->burst_sz;
const uint16_t num_ops = tp->op_params->num_to_process;
struct rte_bbdev_dec_op *ops_enq[num_ops];
struct rte_bbdev_dec_op *ops_deq[num_ops];
struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
struct test_buffers *bufs = NULL;
int i, j, ret;
float parity_bler = 0;
struct rte_bbdev_info info;
uint16_t num_to_enq;
bool extDdr = check_bit(ldpc_cap_flags,
RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE);
bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
"BURST_SIZE should be <= %u", MAX_BURST);
rte_bbdev_info_get(tp->dev_id, &info);
TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
"NUM_OPS cannot exceed %u for this device",
info.drv.queue_size_lim);
bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
rte_pause();
ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
/* For BLER tests we need to enable early termination */
if (!check_bit(ref_op->ldpc_dec.op_flags,
RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
ref_op->ldpc_dec.op_flags +=
RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
ref_op->ldpc_dec.iter_max = get_iter_max();
ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
if (test_vector.op_type != RTE_BBDEV_OP_NONE)
copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs,
bufs->hard_outputs, bufs->soft_outputs,
bufs->harq_inputs, bufs->harq_outputs, ref_op);
generate_llr_input(num_ops, bufs->inputs, ref_op);
/* Set counter to validate the ordering */
for (j = 0; j < num_ops; ++j)
ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
for (i = 0; i < 1; ++i) { /* Could add more iterations */
for (j = 0; j < num_ops; ++j) {
if (!loopback)
mbuf_reset(
ops_enq[j]->ldpc_dec.hard_output.data);
if (hc_out || loopback)
mbuf_reset(
ops_enq[j]->ldpc_dec.harq_combined_output.data);
}
if (extDdr) {
bool preload = i == (TEST_REPETITIONS - 1);
preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
num_ops, preload);
}
start_time = rte_rdtsc_precise();
for (enq = 0, deq = 0; enq < num_ops;) {
num_to_enq = burst_sz;
if (unlikely(num_ops - enq < num_to_enq))
num_to_enq = num_ops - enq;
enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id,
queue_id, &ops_enq[enq], num_to_enq);
deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
queue_id, &ops_deq[deq], enq - deq);
}
/* dequeue the remaining */
while (deq < enq) {
deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
queue_id, &ops_deq[deq], enq - deq);
}
total_time += rte_rdtsc_precise() - start_time;
}
tp->iter_count = 0;
tp->iter_average = 0;
/* get the max of iter_count for all dequeued ops */
for (i = 0; i < num_ops; ++i) {
tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count,
tp->iter_count);
tp->iter_average += (double) ops_enq[i]->ldpc_dec.iter_count;
if (ops_enq[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR))
parity_bler += 1.0;
}
parity_bler /= num_ops; /* This one is based on SYND */
tp->iter_average /= num_ops;
tp->bler = (double) validate_ldpc_bler(ops_deq, num_ops) / num_ops;
if (test_vector.op_type != RTE_BBDEV_OP_NONE
&& tp->bler == 0
&& parity_bler == 0
&& !hc_out) {
ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op,
tp->op_params->vector_mask);
TEST_ASSERT_SUCCESS(ret, "Validation failed!");
}
rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
double tb_len_bits = calc_ldpc_dec_TB_size(ref_op);
tp->ops_per_sec = ((double)num_ops * 1) /
((double)total_time / (double)rte_get_tsc_hz());
tp->mbps = (((double)(num_ops * 1 * tb_len_bits)) /
1000000.0) / ((double)total_time /
(double)rte_get_tsc_hz());
return TEST_SUCCESS;
}
static int
throughput_pmd_lcore_ldpc_dec(void *arg)
{
struct thread_params *tp = arg;
uint16_t enq, deq;
uint64_t total_time = 0, start_time;
const uint16_t queue_id = tp->queue_id;
const uint16_t burst_sz = tp->op_params->burst_sz;
const uint16_t num_ops = tp->op_params->num_to_process;
struct rte_bbdev_dec_op *ops_enq[num_ops];
struct rte_bbdev_dec_op *ops_deq[num_ops];
struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
struct test_buffers *bufs = NULL;
int i, j, ret;
struct rte_bbdev_info info;
uint16_t num_to_enq;
struct rte_bbdev_op_data *hard_output;
struct rte_bbdev_op_ldpc_dec *ops_td;
bool extDdr = check_bit(ldpc_cap_flags,
RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE);
bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
"BURST_SIZE should be <= %u", MAX_BURST);
rte_bbdev_info_get(tp->dev_id, &info);
TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
"NUM_OPS cannot exceed %u for this device",
info.drv.queue_size_lim);
bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
//&op_params->q_bufs[socket_id][queue_id].inputs
//printf("bufs len %d\n",bufs->input.data->data_len);
while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
rte_pause();
ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
/* For throughput tests we need to disable early termination */
if (check_bit(ref_op->ldpc_dec.op_flags,
RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
ref_op->ldpc_dec.op_flags -=
RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
ref_op->ldpc_dec.iter_max = get_iter_max();
ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
if (test_vector.op_type != RTE_BBDEV_OP_NONE)
copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs,
bufs->hard_outputs, bufs->soft_outputs,
bufs->harq_inputs, bufs->harq_outputs, ref_op);
/* Set counter to validate the ordering */
for (j = 0; j < num_ops; ++j)
ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
for (i = 0; i < TEST_REPETITIONS; ++i) {
for (j = 0; j < num_ops; ++j) {
if (!loopback)
mbuf_reset(
ops_enq[j]->ldpc_dec.hard_output.data);
if (hc_out || loopback)
mbuf_reset(
ops_enq[j]->ldpc_dec.harq_combined_output.data);
}
if (extDdr) {
bool preload = i == (TEST_REPETITIONS - 1);
preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
num_ops, preload);
}
start_time = rte_rdtsc_precise();
for (enq = 0, deq = 0; enq < num_ops;) {
num_to_enq = burst_sz;
if (unlikely(num_ops - enq < num_to_enq))
num_to_enq = num_ops - enq;
//printf("pmd lcore ldpc dec data %x\n", *ops_enq[enq]->ldpc_dec.input.addr);
enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id,
queue_id, &ops_enq[enq], num_to_enq);
deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
queue_id, &ops_deq[deq], enq - deq);
/* ops_td = &ops_deq[enq]->ldpc_dec;
hard_output = &ops_td->hard_output;
struct rte_mbuf *m = hard_output->data;
printf("deq nb segs %d\n", m->nb_segs);
*/
}
/* dequeue the remaining */
while (deq < enq) {
deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
queue_id, &ops_deq[deq], enq - deq);
}
total_time += rte_rdtsc_precise() - start_time;
}
tp->iter_count = 0;
/* get the max of iter_count for all dequeued ops */
for (i = 0; i < num_ops; ++i) {
tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count,
tp->iter_count);
}
if (extDdr) {
/* Read loopback is not thread safe */
retrieve_harq_ddr(tp->dev_id, queue_id, ops_enq, num_ops);
}
//if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
//printf("op type != OP NONE\n");
ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op,
tp->op_params->vector_mask);
TEST_ASSERT_SUCCESS(ret, "Validation failed!");
//}
rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
double tb_len_bits = calc_ldpc_dec_TB_size(ref_op);
tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
((double)total_time / (double)rte_get_tsc_hz());
tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
1000000.0) / ((double)total_time /
(double)rte_get_tsc_hz());
return TEST_SUCCESS;
}
static int
throughput_pmd_lcore_enc(void *arg)
{
struct thread_params *tp = arg;
uint16_t enq, deq;
uint64_t total_time = 0, start_time;
const uint16_t queue_id = tp->queue_id;
const uint16_t burst_sz = tp->op_params->burst_sz;
const uint16_t num_ops = tp->op_params->num_to_process;
struct rte_bbdev_enc_op *ops_enq[num_ops];
struct rte_bbdev_enc_op *ops_deq[num_ops];
struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
struct test_buffers *bufs = NULL;
int i, j, ret;
struct rte_bbdev_info info;
uint16_t num_to_enq;
TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
"BURST_SIZE should be <= %u", MAX_BURST);
rte_bbdev_info_get(tp->dev_id, &info);
TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
"NUM_OPS cannot exceed %u for this device",
info.drv.queue_size_lim);
bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
rte_pause();
ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
num_ops);
TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
num_ops);
if (test_vector.op_type != RTE_BBDEV_OP_NONE)
copy_reference_enc_op(ops_enq, num_ops, 0, bufs->inputs,
bufs->hard_outputs, ref_op);
/* Set counter to validate the ordering */
for (j = 0; j < num_ops; ++j)
ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
for (i = 0; i < TEST_REPETITIONS; ++i) {
if (test_vector.op_type != RTE_BBDEV_OP_NONE)
for (j = 0; j < num_ops; ++j)
mbuf_reset(ops_enq[j]->turbo_enc.output.data);
start_time = rte_rdtsc_precise();
for (enq = 0, deq = 0; enq < num_ops;) {
num_to_enq = burst_sz;
if (unlikely(num_ops - enq < num_to_enq))
num_to_enq = num_ops - enq;
enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
queue_id, &ops_enq[enq], num_to_enq);
deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
queue_id, &ops_deq[deq], enq - deq);
}
/* dequeue the remaining */
while (deq < enq) {
deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
queue_id, &ops_deq[deq], enq - deq);
}
total_time += rte_rdtsc_precise() - start_time;
}
if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
ret = validate_enc_op(ops_deq, num_ops, ref_op);
TEST_ASSERT_SUCCESS(ret, "Validation failed!");
}
rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
double tb_len_bits = calc_enc_TB_size(ref_op);
tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
((double)total_time / (double)rte_get_tsc_hz());
tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
/ 1000000.0) / ((double)total_time /
(double)rte_get_tsc_hz());
return TEST_SUCCESS;
}
static int
throughput_pmd_lcore_ldpc_enc(void *arg)
{
struct thread_params *tp = arg;
uint16_t enq, deq;
uint64_t total_time = 0, start_time;
const uint16_t queue_id = tp->queue_id;
const uint16_t burst_sz = tp->op_params->burst_sz;
const uint16_t num_ops = tp->op_params->num_to_process;
struct rte_bbdev_enc_op *ops_enq[num_ops];
struct rte_bbdev_enc_op *ops_deq[num_ops];
struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
struct test_buffers *bufs = NULL;
int i, j, ret;
struct rte_bbdev_info info;
uint16_t num_to_enq;
TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
"BURST_SIZE should be <= %u", MAX_BURST);
rte_bbdev_info_get(tp->dev_id, &info);
TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
"NUM_OPS cannot exceed %u for this device",
info.drv.queue_size_lim);
bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
rte_pause();
ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
num_ops);
TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
num_ops);
if (test_vector.op_type != RTE_BBDEV_OP_NONE)
copy_reference_ldpc_enc_op(ops_enq, num_ops, 0, bufs->inputs,
bufs->hard_outputs, ref_op);
/* Set counter to validate the ordering */
for (j = 0; j < num_ops; ++j)
ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
for (i = 0; i < TEST_REPETITIONS; ++i) {
if (test_vector.op_type != RTE_BBDEV_OP_NONE)
for (j = 0; j < num_ops; ++j)
mbuf_reset(ops_enq[j]->turbo_enc.output.data);
start_time = rte_rdtsc_precise();
for (enq = 0, deq = 0; enq < num_ops;) {
num_to_enq = burst_sz;
if (unlikely(num_ops - enq < num_to_enq))
num_to_enq = num_ops - enq;
enq += rte_bbdev_enqueue_ldpc_enc_ops(tp->dev_id,
queue_id, &ops_enq[enq], num_to_enq);
deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
queue_id, &ops_deq[deq], enq - deq);
}
/* dequeue the remaining */
while (deq < enq) {
deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
queue_id, &ops_deq[deq], enq - deq);
}
total_time += rte_rdtsc_precise() - start_time;
}
if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
ret = validate_ldpc_enc_op(ops_deq, num_ops, ref_op);
TEST_ASSERT_SUCCESS(ret, "Validation failed!");
}
rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
double tb_len_bits = calc_ldpc_enc_TB_size(ref_op);
tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
((double)total_time / (double)rte_get_tsc_hz());
tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
/ 1000000.0) / ((double)total_time /
(double)rte_get_tsc_hz());
return TEST_SUCCESS;
}
static void
print_enc_throughput(struct thread_params *t_params, unsigned int used_cores)
{
unsigned int iter = 0;
double total_mops = 0, total_mbps = 0;
for (iter = 0; iter < used_cores; iter++) {
printf(
"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps\n",
t_params[iter].lcore_id, t_params[iter].ops_per_sec,
t_params[iter].mbps);
total_mops += t_params[iter].ops_per_sec;
total_mbps += t_params[iter].mbps;
}
printf(
"\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps\n",
used_cores, total_mops, total_mbps);
}
/* Aggregate the performance results over the number of cores used */
static void
print_dec_throughput(struct thread_params *t_params, unsigned int used_cores)
{
unsigned int core_idx = 0;
double total_mops = 0, total_mbps = 0;
uint8_t iter_count = 0;
for (core_idx = 0; core_idx < used_cores; core_idx++) {
printf(
"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps @ max %u iterations\n",
t_params[core_idx].lcore_id,
t_params[core_idx].ops_per_sec,
t_params[core_idx].mbps,
t_params[core_idx].iter_count);
total_mops += t_params[core_idx].ops_per_sec;
total_mbps += t_params[core_idx].mbps;
iter_count = RTE_MAX(iter_count,
t_params[core_idx].iter_count);
}
printf(
"\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps @ max %u iterations\n",
used_cores, total_mops, total_mbps, iter_count);
}
/* Aggregate the performance results over the number of cores used */
static void
print_dec_bler(struct thread_params *t_params, unsigned int used_cores)
{
unsigned int core_idx = 0;
double total_mbps = 0, total_bler = 0, total_iter = 0;
double snr = get_snr();
for (core_idx = 0; core_idx < used_cores; core_idx++) {
printf("Core%u BLER %.1f %% - Iters %.1f - Tp %.1f Mbps %s\n",
t_params[core_idx].lcore_id,
t_params[core_idx].bler * 100,
t_params[core_idx].iter_average,
t_params[core_idx].mbps,
get_vector_filename());
total_mbps += t_params[core_idx].mbps;
total_bler += t_params[core_idx].bler;
total_iter += t_params[core_idx].iter_average;
}
total_bler /= used_cores;
total_iter /= used_cores;
printf("SNR %.2f BLER %.1f %% - Iterations %.1f %d - Tp %.1f Mbps %s\n",
snr, total_bler * 100, total_iter, get_iter_max(),
total_mbps, get_vector_filename());
}
/*
* Test function that determines BLER wireless performance
*/
static int
bler_test(struct active_device *ad,
struct test_op_params *op_params)
{
int ret;
unsigned int lcore_id, used_cores = 0;
struct thread_params *t_params;
struct rte_bbdev_info info;
lcore_function_t *bler_function;
uint16_t num_lcores;
const char *op_type_str;
rte_bbdev_info_get(ad->dev_id, &info);
op_type_str = rte_bbdev_op_type_str(test_vector.op_type);
TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u",
test_vector.op_type);
printf("+ ------------------------------------------------------- +\n");
printf("== test: bler\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n",
info.dev_name, ad->nb_queues, op_params->burst_sz,
op_params->num_to_process, op_params->num_lcores,
op_type_str,
intr_enabled ? "Interrupt mode" : "PMD mode",
(double)rte_get_tsc_hz() / 1000000000.0);
/* Set number of lcores */
num_lcores = (ad->nb_queues < (op_params->num_lcores))
? ad->nb_queues
: op_params->num_lcores;
/* Allocate memory for thread parameters structure */
t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params),
RTE_CACHE_LINE_SIZE);
TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params",
RTE_ALIGN(sizeof(struct thread_params) * num_lcores,
RTE_CACHE_LINE_SIZE));
if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
bler_function = bler_pmd_lcore_ldpc_dec;
else
return TEST_SKIPPED;
rte_atomic16_set(&op_params->sync, SYNC_WAIT);
/* Master core is set at first entry */
t_params[0].dev_id = ad->dev_id;
t_params[0].lcore_id = rte_lcore_id();
t_params[0].op_params = op_params;
t_params[0].queue_id = ad->queue_ids[used_cores++];
t_params[0].iter_count = 0;
RTE_LCORE_FOREACH_SLAVE(lcore_id) {
if (used_cores >= num_lcores)
break;
t_params[used_cores].dev_id = ad->dev_id;
t_params[used_cores].lcore_id = lcore_id;
t_params[used_cores].op_params = op_params;
t_params[used_cores].queue_id = ad->queue_ids[used_cores];
t_params[used_cores].iter_count = 0;
rte_eal_remote_launch(bler_function,
&t_params[used_cores++], lcore_id);
}
rte_atomic16_set(&op_params->sync, SYNC_START);
ret = bler_function(&t_params[0]);
/* Master core is always used */
for (used_cores = 1; used_cores < num_lcores; used_cores++)
ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
print_dec_bler(t_params, num_lcores);
/* Return if test failed */
if (ret) {
rte_free(t_params);
return ret;
}
/* Function to print something here*/
rte_free(t_params);
return ret;
}
/*
* Test function that determines how long an enqueue + dequeue of a burst
* takes on available lcores.
*/
static int
throughput_test(struct active_device *ad,
struct test_op_params *op_params)
{
int ret;
unsigned int lcore_id, used_cores = 0;
struct thread_params *t_params, *tp;
struct rte_bbdev_info info;
lcore_function_t *throughput_function;
uint16_t num_lcores;
const char *op_type_str;
rte_bbdev_info_get(ad->dev_id, &info);
op_type_str = rte_bbdev_op_type_str(test_vector.op_type);
TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u",
test_vector.op_type);
printf("+ ------------------------------------------------------- +\n");
printf("== new test: throughput\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n",
info.dev_name, ad->nb_queues, op_params->burst_sz,
op_params->num_to_process, op_params->num_lcores,
op_type_str,
intr_enabled ? "Interrupt mode" : "PMD mode",
(double)rte_get_tsc_hz() / 1000000000.0);
/* Set number of lcores */
num_lcores = (ad->nb_queues < (op_params->num_lcores))
? ad->nb_queues
: op_params->num_lcores;
/* Allocate memory for thread parameters structure */
t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params),
RTE_CACHE_LINE_SIZE);
TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params",
RTE_ALIGN(sizeof(struct thread_params) * num_lcores,
RTE_CACHE_LINE_SIZE));
if (intr_enabled) {
if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
throughput_function = throughput_intr_lcore_dec;
else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
throughput_function = throughput_intr_lcore_ldpc_dec;
else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
throughput_function = throughput_intr_lcore_enc;
else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
throughput_function = throughput_intr_lcore_ldpc_enc;
else
throughput_function = throughput_intr_lcore_enc;
/* Dequeue interrupt callback registration */
ret = rte_bbdev_callback_register(ad->dev_id,
RTE_BBDEV_EVENT_DEQUEUE, dequeue_event_callback,
t_params);
if (ret < 0) {
rte_free(t_params);
return ret;
}
} else {
if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
throughput_function = throughput_pmd_lcore_dec;
else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
throughput_function = throughput_pmd_lcore_ldpc_dec;
else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
throughput_function = throughput_pmd_lcore_enc;
else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
throughput_function = throughput_pmd_lcore_ldpc_enc;
else
throughput_function = throughput_pmd_lcore_enc;
}
rte_atomic16_set(&op_params->sync, SYNC_WAIT);
/* Master core is set at first entry */
t_params[0].dev_id = ad->dev_id;
t_params[0].lcore_id = rte_lcore_id();
t_params[0].op_params = op_params;
t_params[0].queue_id = ad->queue_ids[used_cores++];
t_params[0].iter_count = 0;
RTE_LCORE_FOREACH_SLAVE(lcore_id) {
if (used_cores >= num_lcores)
break;
t_params[used_cores].dev_id = ad->dev_id;
t_params[used_cores].lcore_id = lcore_id;
t_params[used_cores].op_params = op_params;
t_params[used_cores].queue_id = ad->queue_ids[used_cores];
t_params[used_cores].iter_count = 0;
rte_eal_remote_launch(throughput_pmd_lcore_ldpc_dec,
&t_params[used_cores++], lcore_id);
}
rte_atomic16_set(&op_params->sync, SYNC_START);
ret = throughput_function(&t_params[0]);
/* Master core is always used */
for (used_cores = 1; used_cores < num_lcores; used_cores++)
ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
/* Return if test failed */
if (ret) {
rte_free(t_params);
return ret;
}
/* Print throughput if interrupts are disabled and test passed */
if (!intr_enabled) {
if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
print_dec_throughput(t_params, num_lcores);
else
print_enc_throughput(t_params, num_lcores);
rte_free(t_params);
return ret;
}
/* In interrupt TC we need to wait for the interrupt callback to deqeue
* all pending operations. Skip waiting for queues which reported an
* error using processing_status variable.
* Wait for master lcore operations.
*/
tp = &t_params[0];
while ((rte_atomic16_read(&tp->nb_dequeued) <
op_params->num_to_process) &&
(rte_atomic16_read(&tp->processing_status) !=
TEST_FAILED))
rte_pause();
tp->ops_per_sec /= TEST_REPETITIONS;
tp->mbps /= TEST_REPETITIONS;
ret |= (int)rte_atomic16_read(&tp->processing_status);
/* Wait for slave lcores operations */
for (used_cores = 1; used_cores < num_lcores; used_cores++) {
tp = &t_params[used_cores];
while ((rte_atomic16_read(&tp->nb_dequeued) <
op_params->num_to_process) &&
(rte_atomic16_read(&tp->processing_status) !=
TEST_FAILED))
rte_pause();
tp->ops_per_sec /= TEST_REPETITIONS;
tp->mbps /= TEST_REPETITIONS;
ret |= (int)rte_atomic16_read(&tp->processing_status);
}
/* Print throughput if test passed */
if (!ret) {
if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
print_dec_throughput(t_params, num_lcores);
else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC ||
test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
print_enc_throughput(t_params, num_lcores);
}
rte_free(t_params);
return ret;
}
static int
latency_test_dec(struct rte_mempool *mempool,
struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
int vector_mask, uint16_t dev_id, uint16_t queue_id,
const uint16_t num_to_process, uint16_t burst_sz,
uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
{
int ret = TEST_SUCCESS;
uint16_t i, j, dequeued;
struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
uint64_t start_time = 0, last_time = 0;
for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
uint16_t enq = 0, deq = 0;
bool first_time = true;
last_time = 0;
if (unlikely(num_to_process - dequeued < burst_sz))
burst_sz = num_to_process - dequeued;
ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
TEST_ASSERT_SUCCESS(ret,
"rte_bbdev_dec_op_alloc_bulk() failed");
if (test_vector.op_type != RTE_BBDEV_OP_NONE)
copy_reference_dec_op(ops_enq, burst_sz, dequeued,
bufs->inputs,
bufs->hard_outputs,
bufs->soft_outputs,
ref_op);
/* Set counter to validate the ordering */
for (j = 0; j < burst_sz; ++j)
ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
start_time = rte_rdtsc_precise();
enq = rte_bbdev_enqueue_dec_ops(dev_id, queue_id, &ops_enq[enq],
burst_sz);
TEST_ASSERT(enq == burst_sz,
"Error enqueueing burst, expected %u, got %u",
burst_sz, enq);
/* Dequeue */
do {
deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
&ops_deq[deq], burst_sz - deq);
if (likely(first_time && (deq > 0))) {
last_time = rte_rdtsc_precise() - start_time;
first_time = false;
}
} while (unlikely(burst_sz != deq));
*max_time = RTE_MAX(*max_time, last_time);
*min_time = RTE_MIN(*min_time, last_time);
*total_time += last_time;
if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
ret = validate_dec_op(ops_deq, burst_sz, ref_op,
vector_mask);
TEST_ASSERT_SUCCESS(ret, "Validation failed!");
}
rte_bbdev_dec_op_free_bulk(ops_enq, deq);
dequeued += deq;
}
return i;
}
static int
latency_test_ldpc_dec(struct rte_mempool *mempool,
struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
int vector_mask, uint16_t dev_id, uint16_t queue_id,
const uint16_t num_to_process, uint16_t burst_sz,
uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
{
int ret = TEST_SUCCESS;
uint16_t i, j, dequeued;
struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
uint64_t start_time = 0, last_time = 0;
bool extDdr = ldpc_cap_flags &
RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
uint16_t enq = 0, deq = 0;
bool first_time = true;
last_time = 0;
if (unlikely(num_to_process - dequeued < burst_sz))
burst_sz = num_to_process - dequeued;
ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
TEST_ASSERT_SUCCESS(ret,
"rte_bbdev_dec_op_alloc_bulk() failed");
/* For latency tests we need to disable early termination */
if (check_bit(ref_op->ldpc_dec.op_flags,
RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
ref_op->ldpc_dec.op_flags -=
RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
ref_op->ldpc_dec.iter_max = get_iter_max();
ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
if (test_vector.op_type != RTE_BBDEV_OP_NONE)
copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued,
bufs->inputs,
bufs->hard_outputs,
bufs->soft_outputs,
bufs->harq_inputs,
bufs->harq_outputs,
ref_op);
if (extDdr)
preload_harq_ddr(dev_id, queue_id, ops_enq,
burst_sz, true);
/* Set counter to validate the ordering */
for (j = 0; j < burst_sz; ++j)
ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
start_time = rte_rdtsc_precise();
enq = rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
&ops_enq[enq], burst_sz);
TEST_ASSERT(enq == burst_sz,
"Error enqueueing burst, expected %u, got %u",
burst_sz, enq);
/* Dequeue */
do {
deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
&ops_deq[deq], burst_sz - deq);
if (likely(first_time && (deq > 0))) {
last_time = rte_rdtsc_precise() - start_time;
first_time = false;
}
} while (unlikely(burst_sz != deq));
*max_time = RTE_MAX(*max_time, last_time);
*min_time = RTE_MIN(*min_time, last_time);
*total_time += last_time;
if (extDdr)
retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz);
if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
ret = validate_ldpc_dec_op(ops_deq, burst_sz, ref_op,
vector_mask);
TEST_ASSERT_SUCCESS(ret, "Validation failed!");
}
rte_bbdev_dec_op_free_bulk(ops_enq, deq);
dequeued += deq;
}
return i;
}
static int
latency_test_enc(struct rte_mempool *mempool,
struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
uint16_t dev_id, uint16_t queue_id,
const uint16_t num_to_process, uint16_t burst_sz,
uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
{
int ret = TEST_SUCCESS;
uint16_t i, j, dequeued;
struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
uint64_t start_time = 0, last_time = 0;
for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
uint16_t enq = 0, deq = 0;
bool first_time = true;
last_time = 0;
if (unlikely(num_to_process - dequeued < burst_sz))
burst_sz = num_to_process - dequeued;
ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
TEST_ASSERT_SUCCESS(ret,
"rte_bbdev_enc_op_alloc_bulk() failed");
if (test_vector.op_type != RTE_BBDEV_OP_NONE)
copy_reference_enc_op(ops_enq, burst_sz, dequeued,
bufs->inputs,
bufs->hard_outputs,
ref_op);
/* Set counter to validate the ordering */
for (j = 0; j < burst_sz; ++j)
ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
start_time = rte_rdtsc_precise();
enq = rte_bbdev_enqueue_enc_ops(dev_id, queue_id, &ops_enq[enq],
burst_sz);
TEST_ASSERT(enq == burst_sz,
"Error enqueueing burst, expected %u, got %u",
burst_sz, enq);
/* Dequeue */
do {
deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
&ops_deq[deq], burst_sz - deq);
if (likely(first_time && (deq > 0))) {
last_time += rte_rdtsc_precise() - start_time;
first_time = false;
}
} while (unlikely(burst_sz != deq));
*max_time = RTE_MAX(*max_time, last_time);
*min_time = RTE_MIN(*min_time, last_time);
*total_time += last_time;
if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
ret = validate_enc_op(ops_deq, burst_sz, ref_op);
TEST_ASSERT_SUCCESS(ret, "Validation failed!");
}
rte_bbdev_enc_op_free_bulk(ops_enq, deq);
dequeued += deq;
}
return i;
}
static int
latency_test_ldpc_enc(struct rte_mempool *mempool,
struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
uint16_t dev_id, uint16_t queue_id,
const uint16_t num_to_process, uint16_t burst_sz,
uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
{
int ret = TEST_SUCCESS;
uint16_t i, j, dequeued;
struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
uint64_t start_time = 0, last_time = 0;
for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
uint16_t enq = 0, deq = 0;
bool first_time = true;
last_time = 0;
if (unlikely(num_to_process - dequeued < burst_sz))
burst_sz = num_to_process - dequeued;
ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
TEST_ASSERT_SUCCESS(ret,
"rte_bbdev_enc_op_alloc_bulk() failed");
if (test_vector.op_type != RTE_BBDEV_OP_NONE)
copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued,
bufs->inputs,
bufs->hard_outputs,
ref_op);
/* Set counter to validate the ordering */
for (j = 0; j < burst_sz; ++j)
ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
start_time = rte_rdtsc_precise();
enq = rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id,
&ops_enq[enq], burst_sz);
TEST_ASSERT(enq == burst_sz,
"Error enqueueing burst, expected %u, got %u",
burst_sz, enq);
/* Dequeue */
do {
deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
&ops_deq[deq], burst_sz - deq);
if (likely(first_time && (deq > 0))) {
last_time += rte_rdtsc_precise() - start_time;
first_time = false;
}
} while (unlikely(burst_sz != deq));
*max_time = RTE_MAX(*max_time, last_time);
*min_time = RTE_MIN(*min_time, last_time);
*total_time += last_time;
if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
ret = validate_enc_op(ops_deq, burst_sz, ref_op);
TEST_ASSERT_SUCCESS(ret, "Validation failed!");
}
rte_bbdev_enc_op_free_bulk(ops_enq, deq);
dequeued += deq;
}
return i;
}
static int
latency_test(struct active_device *ad,
struct test_op_params *op_params)
{
int iter;
uint16_t burst_sz = op_params->burst_sz;
const uint16_t num_to_process = op_params->num_to_process;
const enum rte_bbdev_op_type op_type = test_vector.op_type;
const uint16_t queue_id = ad->queue_ids[0];
struct test_buffers *bufs = NULL;
struct rte_bbdev_info info;
uint64_t total_time, min_time, max_time;
const char *op_type_str;
total_time = max_time = 0;
min_time = UINT64_MAX;
TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
"BURST_SIZE should be <= %u", MAX_BURST);
rte_bbdev_info_get(ad->dev_id, &info);
bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
op_type_str = rte_bbdev_op_type_str(op_type);
TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
printf("+ ------------------------------------------------------- +\n");
printf("== test: validation/latency\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
info.dev_name, burst_sz, num_to_process, op_type_str);
if (op_type == RTE_BBDEV_OP_TURBO_DEC)
iter = latency_test_dec(op_params->mp, bufs,
op_params->ref_dec_op, op_params->vector_mask,
ad->dev_id, queue_id, num_to_process,
burst_sz, &total_time, &min_time, &max_time);
else if (op_type == RTE_BBDEV_OP_TURBO_ENC)
iter = latency_test_enc(op_params->mp, bufs,
op_params->ref_enc_op, ad->dev_id, queue_id,
num_to_process, burst_sz, &total_time,
&min_time, &max_time);
else if (op_type == RTE_BBDEV_OP_LDPC_ENC)
iter = latency_test_ldpc_enc(op_params->mp, bufs,
op_params->ref_enc_op, ad->dev_id, queue_id,
num_to_process, burst_sz, &total_time,
&min_time, &max_time);
else if (op_type == RTE_BBDEV_OP_LDPC_DEC)
iter = latency_test_ldpc_dec(op_params->mp, bufs,
op_params->ref_dec_op, op_params->vector_mask,
ad->dev_id, queue_id, num_to_process,
burst_sz, &total_time, &min_time, &max_time);
else
iter = latency_test_enc(op_params->mp, bufs,
op_params->ref_enc_op,
ad->dev_id, queue_id,
num_to_process, burst_sz, &total_time,
&min_time, &max_time);
if (iter <= 0)
return TEST_FAILED;
printf("Operation latency:\n" tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
"\tavg: %lg cycles, %lg us\n" ((double)total_time / (double)rte_get_tsc_hz());
"\tmin: %lg cycles, %lg us\n" tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
"\tmax: %lg cycles, %lg us\n", / 1000000.0) / ((double)total_time /
(double)total_time / (double)iter,
(double)(total_time * 1000000) / (double)iter /
(double)rte_get_tsc_hz(), (double)min_time,
(double)(min_time * 1000000) / (double)rte_get_tsc_hz(),
(double)max_time, (double)(max_time * 1000000) /
(double)rte_get_tsc_hz()); (double)rte_get_tsc_hz());
return TEST_SUCCESS; return TEST_SUCCESS;
} }
#ifdef RTE_BBDEV_OFFLOAD_COST static void
static int print_enc_throughput(struct thread_params *t_params, unsigned int used_cores)
get_bbdev_queue_stats(uint16_t dev_id, uint16_t queue_id,
struct rte_bbdev_stats *stats)
{
struct rte_bbdev *dev = &rte_bbdev_devices[dev_id];
struct rte_bbdev_stats *q_stats;
if (queue_id >= dev->data->num_queues)
return -1;
q_stats = &dev->data->queues[queue_id].queue_stats;
stats->enqueued_count = q_stats->enqueued_count;
stats->dequeued_count = q_stats->dequeued_count;
stats->enqueue_err_count = q_stats->enqueue_err_count;
stats->dequeue_err_count = q_stats->dequeue_err_count;
stats->acc_offload_cycles = q_stats->acc_offload_cycles;
return 0;
}
static int
offload_latency_test_dec(struct rte_mempool *mempool, struct test_buffers *bufs,
struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
uint16_t queue_id, const uint16_t num_to_process,
uint16_t burst_sz, struct test_time_stats *time_st)
{ {
int i, dequeued, ret; unsigned int iter = 0;
struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; double total_mops = 0, total_mbps = 0;
uint64_t enq_start_time, deq_start_time;
uint64_t enq_sw_last_time, deq_last_time;
struct rte_bbdev_stats stats;
for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
uint16_t enq = 0, deq = 0;
if (unlikely(num_to_process - dequeued < burst_sz))
burst_sz = num_to_process - dequeued;
rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
if (test_vector.op_type != RTE_BBDEV_OP_NONE)
copy_reference_dec_op(ops_enq, burst_sz, dequeued,
bufs->inputs,
bufs->hard_outputs,
bufs->soft_outputs,
ref_op);
/* Start time meas for enqueue function offload latency */
enq_start_time = rte_rdtsc_precise();
do {
enq += rte_bbdev_enqueue_dec_ops(dev_id, queue_id,
&ops_enq[enq], burst_sz - enq);
} while (unlikely(burst_sz != enq));
ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
TEST_ASSERT_SUCCESS(ret,
"Failed to get stats for queue (%u) of device (%u)",
queue_id, dev_id);
enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
stats.acc_offload_cycles;
time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
enq_sw_last_time);
time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
enq_sw_last_time);
time_st->enq_sw_total_time += enq_sw_last_time;
time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
stats.acc_offload_cycles);
time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
stats.acc_offload_cycles);
time_st->enq_acc_total_time += stats.acc_offload_cycles;
/* give time for device to process ops */
rte_delay_us(200);
/* Start time meas for dequeue function offload latency */
deq_start_time = rte_rdtsc_precise();
/* Dequeue one operation */
do {
deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
&ops_deq[deq], 1);
} while (unlikely(deq != 1));
deq_last_time = rte_rdtsc_precise() - deq_start_time;
time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
deq_last_time);
time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
deq_last_time);
time_st->deq_total_time += deq_last_time;
/* Dequeue remaining operations if needed*/
while (burst_sz != deq)
deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
&ops_deq[deq], burst_sz - deq);
rte_bbdev_dec_op_free_bulk(ops_enq, deq); for (iter = 0; iter < used_cores; iter++) {
dequeued += deq; printf(
"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps\n",
t_params[iter].lcore_id, t_params[iter].ops_per_sec,
t_params[iter].mbps);
total_mops += t_params[iter].ops_per_sec;
total_mbps += t_params[iter].mbps;
} }
printf(
return i; "\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps\n",
used_cores, total_mops, total_mbps);
} }
static int /* Aggregate the performance results over the number of cores used */
offload_latency_test_ldpc_dec(struct rte_mempool *mempool, static void
struct test_buffers *bufs, print_dec_throughput(struct thread_params *t_params, unsigned int used_cores)
struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
uint16_t queue_id, const uint16_t num_to_process,
uint16_t burst_sz, struct test_time_stats *time_st)
{ {
int i, dequeued, ret; unsigned int core_idx = 0;
struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; double total_mops = 0, total_mbps = 0;
uint64_t enq_start_time, deq_start_time; uint8_t iter_count = 0;
uint64_t enq_sw_last_time, deq_last_time;
struct rte_bbdev_stats stats;
bool extDdr = ldpc_cap_flags &
RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
uint16_t enq = 0, deq = 0;
if (unlikely(num_to_process - dequeued < burst_sz))
burst_sz = num_to_process - dequeued;
rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
if (test_vector.op_type != RTE_BBDEV_OP_NONE)
copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued,
bufs->inputs,
bufs->hard_outputs,
bufs->soft_outputs,
bufs->harq_inputs,
bufs->harq_outputs,
ref_op);
if (extDdr)
preload_harq_ddr(dev_id, queue_id, ops_enq,
burst_sz, true);
/* Start time meas for enqueue function offload latency */
enq_start_time = rte_rdtsc_precise();
do {
enq += rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
&ops_enq[enq], burst_sz - enq);
} while (unlikely(burst_sz != enq));
enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
TEST_ASSERT_SUCCESS(ret,
"Failed to get stats for queue (%u) of device (%u)",
queue_id, dev_id);
enq_sw_last_time -= stats.acc_offload_cycles;
time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
enq_sw_last_time);
time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
enq_sw_last_time);
time_st->enq_sw_total_time += enq_sw_last_time;
time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
stats.acc_offload_cycles);
time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
stats.acc_offload_cycles);
time_st->enq_acc_total_time += stats.acc_offload_cycles;
/* give time for device to process ops */
rte_delay_us(200);
/* Start time meas for dequeue function offload latency */
deq_start_time = rte_rdtsc_precise();
/* Dequeue one operation */
do {
deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
&ops_deq[deq], 1);
} while (unlikely(deq != 1));
deq_last_time = rte_rdtsc_precise() - deq_start_time;
time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
deq_last_time);
time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
deq_last_time);
time_st->deq_total_time += deq_last_time;
/* Dequeue remaining operations if needed*/
while (burst_sz != deq)
deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
&ops_deq[deq], burst_sz - deq);
if (extDdr) {
/* Read loopback is not thread safe */
retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz);
}
rte_bbdev_dec_op_free_bulk(ops_enq, deq); for (core_idx = 0; core_idx < used_cores; core_idx++) {
dequeued += deq; printf(
"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps @ max %u iterations\n",
t_params[core_idx].lcore_id,
t_params[core_idx].ops_per_sec,
t_params[core_idx].mbps,
t_params[core_idx].iter_count);
total_mops += t_params[core_idx].ops_per_sec;
total_mbps += t_params[core_idx].mbps;
iter_count = RTE_MAX(iter_count,
t_params[core_idx].iter_count);
} }
printf(
return i; "\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps @ max %u iterations\n",
used_cores, total_mops, total_mbps, iter_count);
} }
static int /* Aggregate the performance results over the number of cores used */
offload_latency_test_enc(struct rte_mempool *mempool, struct test_buffers *bufs, static void
struct rte_bbdev_enc_op *ref_op, uint16_t dev_id, print_dec_bler(struct thread_params *t_params, unsigned int used_cores)
uint16_t queue_id, const uint16_t num_to_process,
uint16_t burst_sz, struct test_time_stats *time_st)
{ {
int i, dequeued, ret; unsigned int core_idx = 0;
struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; double total_mbps = 0, total_bler = 0, total_iter = 0;
uint64_t enq_start_time, deq_start_time; double snr = get_snr();
uint64_t enq_sw_last_time, deq_last_time;
struct rte_bbdev_stats stats;
for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
uint16_t enq = 0, deq = 0;
if (unlikely(num_to_process - dequeued < burst_sz))
burst_sz = num_to_process - dequeued;
ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
TEST_ASSERT_SUCCESS(ret,
"rte_bbdev_enc_op_alloc_bulk() failed");
if (test_vector.op_type != RTE_BBDEV_OP_NONE)
copy_reference_enc_op(ops_enq, burst_sz, dequeued,
bufs->inputs,
bufs->hard_outputs,
ref_op);
/* Start time meas for enqueue function offload latency */
enq_start_time = rte_rdtsc_precise();
do {
enq += rte_bbdev_enqueue_enc_ops(dev_id, queue_id,
&ops_enq[enq], burst_sz - enq);
} while (unlikely(burst_sz != enq));
enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
TEST_ASSERT_SUCCESS(ret,
"Failed to get stats for queue (%u) of device (%u)",
queue_id, dev_id);
enq_sw_last_time -= stats.acc_offload_cycles;
time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
enq_sw_last_time);
time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
enq_sw_last_time);
time_st->enq_sw_total_time += enq_sw_last_time;
time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
stats.acc_offload_cycles);
time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
stats.acc_offload_cycles);
time_st->enq_acc_total_time += stats.acc_offload_cycles;
/* give time for device to process ops */
rte_delay_us(200);
/* Start time meas for dequeue function offload latency */
deq_start_time = rte_rdtsc_precise();
/* Dequeue one operation */
do {
deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
&ops_deq[deq], 1);
} while (unlikely(deq != 1));
deq_last_time = rte_rdtsc_precise() - deq_start_time;
time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
deq_last_time);
time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
deq_last_time);
time_st->deq_total_time += deq_last_time;
while (burst_sz != deq)
deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
&ops_deq[deq], burst_sz - deq);
rte_bbdev_enc_op_free_bulk(ops_enq, deq); for (core_idx = 0; core_idx < used_cores; core_idx++) {
dequeued += deq; printf("Core%u BLER %.1f %% - Iters %.1f - Tp %.1f Mbps %s\n",
t_params[core_idx].lcore_id,
t_params[core_idx].bler * 100,
t_params[core_idx].iter_average,
t_params[core_idx].mbps,
get_vector_filename());
total_mbps += t_params[core_idx].mbps;
total_bler += t_params[core_idx].bler;
total_iter += t_params[core_idx].iter_average;
} }
total_bler /= used_cores;
total_iter /= used_cores;
return i; printf("SNR %.2f BLER %.1f %% - Iterations %.1f %d - Tp %.1f Mbps %s\n",
snr, total_bler * 100, total_iter, get_iter_max(),
total_mbps, get_vector_filename());
} }
static int
offload_latency_test_ldpc_enc(struct rte_mempool *mempool,
struct test_buffers *bufs,
struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
uint16_t queue_id, const uint16_t num_to_process,
uint16_t burst_sz, struct test_time_stats *time_st)
{
int i, dequeued, ret;
struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
uint64_t enq_start_time, deq_start_time;
uint64_t enq_sw_last_time, deq_last_time;
struct rte_bbdev_stats stats;
for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
uint16_t enq = 0, deq = 0;
if (unlikely(num_to_process - dequeued < burst_sz))
burst_sz = num_to_process - dequeued;
ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
TEST_ASSERT_SUCCESS(ret,
"rte_bbdev_enc_op_alloc_bulk() failed");
if (test_vector.op_type != RTE_BBDEV_OP_NONE)
copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued,
bufs->inputs,
bufs->hard_outputs,
ref_op);
/* Start time meas for enqueue function offload latency */
enq_start_time = rte_rdtsc_precise();
do {
enq += rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id,
&ops_enq[enq], burst_sz - enq);
} while (unlikely(burst_sz != enq));
enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
TEST_ASSERT_SUCCESS(ret,
"Failed to get stats for queue (%u) of device (%u)",
queue_id, dev_id);
enq_sw_last_time -= stats.acc_offload_cycles;
time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
enq_sw_last_time);
time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
enq_sw_last_time);
time_st->enq_sw_total_time += enq_sw_last_time;
time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
stats.acc_offload_cycles);
time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
stats.acc_offload_cycles);
time_st->enq_acc_total_time += stats.acc_offload_cycles;
/* give time for device to process ops */
rte_delay_us(200);
/* Start time meas for dequeue function offload latency */
deq_start_time = rte_rdtsc_precise();
/* Dequeue one operation */
do {
deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
&ops_deq[deq], 1);
} while (unlikely(deq != 1));
deq_last_time = rte_rdtsc_precise() - deq_start_time;
time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
deq_last_time);
time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
deq_last_time);
time_st->deq_total_time += deq_last_time;
while (burst_sz != deq)
deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
&ops_deq[deq], burst_sz - deq);
rte_bbdev_enc_op_free_bulk(ops_enq, deq);
dequeued += deq;
}
return i;
}
#endif
/*
* Test function that determines how long an enqueue + dequeue of a burst
* takes on available lcores.
*/
static int static int
offload_cost_test(struct active_device *ad, throughput_test(struct active_device *ad,
struct test_op_params *op_params) struct test_op_params *op_params)
{ {
#ifndef RTE_BBDEV_OFFLOAD_COST int ret;
RTE_SET_USED(ad); unsigned int lcore_id, used_cores = 0;
RTE_SET_USED(op_params); struct thread_params *t_params, *tp;
printf("Offload latency test is disabled.\n");
printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
return TEST_SKIPPED;
#else
int iter;
uint16_t burst_sz = op_params->burst_sz;
const uint16_t num_to_process = op_params->num_to_process;
const enum rte_bbdev_op_type op_type = test_vector.op_type;
const uint16_t queue_id = ad->queue_ids[0];
struct test_buffers *bufs = NULL;
struct rte_bbdev_info info; struct rte_bbdev_info info;
lcore_function_t *throughput_function;
uint16_t num_lcores;
const char *op_type_str; const char *op_type_str;
struct test_time_stats time_st;
memset(&time_st, 0, sizeof(struct test_time_stats));
time_st.enq_sw_min_time = UINT64_MAX;
time_st.enq_acc_min_time = UINT64_MAX;
time_st.deq_min_time = UINT64_MAX;
TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
"BURST_SIZE should be <= %u", MAX_BURST);
rte_bbdev_info_get(ad->dev_id, &info); rte_bbdev_info_get(ad->dev_id, &info);
bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
op_type_str = rte_bbdev_op_type_str(op_type); op_type_str = rte_bbdev_op_type_str(test_vector.op_type);
TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type); TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u",
test_vector.op_type);
printf("+ ------------------------------------------------------- +\n"); printf("+ ------------------------------------------------------- +\n");
printf("== test: offload latency test\ndev: %s, burst size: %u, num ops: %u, op type: %s\n", printf("== new test: throughput\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n",
info.dev_name, burst_sz, num_to_process, op_type_str); info.dev_name, ad->nb_queues, op_params->burst_sz,
op_params->num_to_process, op_params->num_lcores,
if (op_type == RTE_BBDEV_OP_TURBO_DEC) op_type_str,
iter = offload_latency_test_dec(op_params->mp, bufs, intr_enabled ? "Interrupt mode" : "PMD mode",
op_params->ref_dec_op, ad->dev_id, queue_id, (double)rte_get_tsc_hz() / 1000000000.0);
num_to_process, burst_sz, &time_st);
else if (op_type == RTE_BBDEV_OP_TURBO_ENC)
iter = offload_latency_test_enc(op_params->mp, bufs,
op_params->ref_enc_op, ad->dev_id, queue_id,
num_to_process, burst_sz, &time_st);
else if (op_type == RTE_BBDEV_OP_LDPC_ENC)
iter = offload_latency_test_ldpc_enc(op_params->mp, bufs,
op_params->ref_enc_op, ad->dev_id, queue_id,
num_to_process, burst_sz, &time_st);
else if (op_type == RTE_BBDEV_OP_LDPC_DEC)
iter = offload_latency_test_ldpc_dec(op_params->mp, bufs,
op_params->ref_dec_op, ad->dev_id, queue_id,
num_to_process, burst_sz, &time_st);
else
iter = offload_latency_test_enc(op_params->mp, bufs,
op_params->ref_enc_op, ad->dev_id, queue_id,
num_to_process, burst_sz, &time_st);
if (iter <= 0)
return TEST_FAILED;
printf("Enqueue driver offload cost latency:\n" /* Set number of lcores */
"\tavg: %lg cycles, %lg us\n" num_lcores = (ad->nb_queues < (op_params->num_lcores))
"\tmin: %lg cycles, %lg us\n" ? ad->nb_queues
"\tmax: %lg cycles, %lg us\n" : op_params->num_lcores;
"Enqueue accelerator offload cost latency:\n"
"\tavg: %lg cycles, %lg us\n"
"\tmin: %lg cycles, %lg us\n"
"\tmax: %lg cycles, %lg us\n",
(double)time_st.enq_sw_total_time / (double)iter,
(double)(time_st.enq_sw_total_time * 1000000) /
(double)iter / (double)rte_get_tsc_hz(),
(double)time_st.enq_sw_min_time,
(double)(time_st.enq_sw_min_time * 1000000) /
rte_get_tsc_hz(), (double)time_st.enq_sw_max_time,
(double)(time_st.enq_sw_max_time * 1000000) /
rte_get_tsc_hz(), (double)time_st.enq_acc_total_time /
(double)iter,
(double)(time_st.enq_acc_total_time * 1000000) /
(double)iter / (double)rte_get_tsc_hz(),
(double)time_st.enq_acc_min_time,
(double)(time_st.enq_acc_min_time * 1000000) /
rte_get_tsc_hz(), (double)time_st.enq_acc_max_time,
(double)(time_st.enq_acc_max_time * 1000000) /
rte_get_tsc_hz());
printf("Dequeue offload cost latency - one op:\n"
"\tavg: %lg cycles, %lg us\n"
"\tmin: %lg cycles, %lg us\n"
"\tmax: %lg cycles, %lg us\n",
(double)time_st.deq_total_time / (double)iter,
(double)(time_st.deq_total_time * 1000000) /
(double)iter / (double)rte_get_tsc_hz(),
(double)time_st.deq_min_time,
(double)(time_st.deq_min_time * 1000000) /
rte_get_tsc_hz(), (double)time_st.deq_max_time,
(double)(time_st.deq_max_time * 1000000) /
rte_get_tsc_hz());
return TEST_SUCCESS; /* Allocate memory for thread parameters structure */
#endif t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params),
} RTE_CACHE_LINE_SIZE);
TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params",
RTE_ALIGN(sizeof(struct thread_params) * num_lcores,
RTE_CACHE_LINE_SIZE));
#ifdef RTE_BBDEV_OFFLOAD_COST rte_atomic16_set(&op_params->sync, SYNC_WAIT);
static int
offload_latency_empty_q_test_dec(uint16_t dev_id, uint16_t queue_id,
const uint16_t num_to_process, uint16_t burst_sz,
uint64_t *deq_total_time, uint64_t *deq_min_time,
uint64_t *deq_max_time, const enum rte_bbdev_op_type op_type)
{
int i, deq_total;
struct rte_bbdev_dec_op *ops[MAX_BURST];
uint64_t deq_start_time, deq_last_time;
/* Test deq offload latency from an empty queue */ /* Master core is set at first entry */
t_params[0].dev_id = ad->dev_id;
t_params[0].lcore_id = rte_lcore_id();
t_params[0].op_params = op_params;
t_params[0].queue_id = ad->queue_ids[used_cores++];
t_params[0].iter_count = 0;
for (i = 0, deq_total = 0; deq_total < num_to_process; RTE_LCORE_FOREACH_SLAVE(lcore_id) {
++i, deq_total += burst_sz) { if (used_cores >= num_lcores)
deq_start_time = rte_rdtsc_precise(); break;
if (unlikely(num_to_process - deq_total < burst_sz)) t_params[used_cores].dev_id = ad->dev_id;
burst_sz = num_to_process - deq_total; t_params[used_cores].lcore_id = lcore_id;
if (op_type == RTE_BBDEV_OP_LDPC_DEC) t_params[used_cores].op_params = op_params;
rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, ops, t_params[used_cores].queue_id = ad->queue_ids[used_cores];
burst_sz); t_params[used_cores].iter_count = 0;
else
rte_bbdev_dequeue_dec_ops(dev_id, queue_id, ops,
burst_sz);
deq_last_time = rte_rdtsc_precise() - deq_start_time; rte_eal_remote_launch(throughput_pmd_lcore_ldpc_dec,
*deq_max_time = RTE_MAX(*deq_max_time, deq_last_time); &t_params[used_cores++], lcore_id);
*deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
*deq_total_time += deq_last_time;
} }
return i; rte_atomic16_set(&op_params->sync, SYNC_START);
} ret = throughput_pmd_lcore_ldpc_dec(&t_params[0]);
static int /* Master core is always used */
offload_latency_empty_q_test_enc(uint16_t dev_id, uint16_t queue_id, for (used_cores = 1; used_cores < num_lcores; used_cores++)
const uint16_t num_to_process, uint16_t burst_sz, ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
uint64_t *deq_total_time, uint64_t *deq_min_time,
uint64_t *deq_max_time, const enum rte_bbdev_op_type op_type)
{
int i, deq_total;
struct rte_bbdev_enc_op *ops[MAX_BURST];
uint64_t deq_start_time, deq_last_time;
/* Test deq offload latency from an empty queue */
for (i = 0, deq_total = 0; deq_total < num_to_process;
++i, deq_total += burst_sz) {
deq_start_time = rte_rdtsc_precise();
if (unlikely(num_to_process - deq_total < burst_sz))
burst_sz = num_to_process - deq_total;
if (op_type == RTE_BBDEV_OP_LDPC_ENC)
rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, ops,
burst_sz);
else
rte_bbdev_dequeue_enc_ops(dev_id, queue_id, ops,
burst_sz);
deq_last_time = rte_rdtsc_precise() - deq_start_time; /* Return if test failed */
*deq_max_time = RTE_MAX(*deq_max_time, deq_last_time); if (ret) {
*deq_min_time = RTE_MIN(*deq_min_time, deq_last_time); rte_free(t_params);
*deq_total_time += deq_last_time; return ret;
} }
return i; /* Print throughput if interrupts are disabled and test passed */
} if (!intr_enabled) {
if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
#endif test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
print_dec_throughput(t_params, num_lcores);
static int else
offload_latency_empty_q_test(struct active_device *ad, print_enc_throughput(t_params, num_lcores);
struct test_op_params *op_params) rte_free(t_params);
{ return ret;
#ifndef RTE_BBDEV_OFFLOAD_COST }
RTE_SET_USED(ad);
RTE_SET_USED(op_params);
printf("Offload latency empty dequeue test is disabled.\n");
printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
return TEST_SKIPPED;
#else
int iter;
uint64_t deq_total_time, deq_min_time, deq_max_time;
uint16_t burst_sz = op_params->burst_sz;
const uint16_t num_to_process = op_params->num_to_process;
const enum rte_bbdev_op_type op_type = test_vector.op_type;
const uint16_t queue_id = ad->queue_ids[0];
struct rte_bbdev_info info;
const char *op_type_str;
deq_total_time = deq_max_time = 0;
deq_min_time = UINT64_MAX;
TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
"BURST_SIZE should be <= %u", MAX_BURST);
rte_bbdev_info_get(ad->dev_id, &info);
op_type_str = rte_bbdev_op_type_str(op_type); /* In interrupt TC we need to wait for the interrupt callback to deqeue
TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type); * all pending operations. Skip waiting for queues which reported an
* error using processing_status variable.
* Wait for master lcore operations.
*/
tp = &t_params[0];
while ((rte_atomic16_read(&tp->nb_dequeued) <
op_params->num_to_process) &&
(rte_atomic16_read(&tp->processing_status) !=
TEST_FAILED))
rte_pause();
printf("+ ------------------------------------------------------- +\n"); tp->ops_per_sec /= TEST_REPETITIONS;
printf("== test: offload latency empty dequeue\ndev: %s, burst size: %u, num ops: %u, op type: %s\n", tp->mbps /= TEST_REPETITIONS;
info.dev_name, burst_sz, num_to_process, op_type_str); ret |= (int)rte_atomic16_read(&tp->processing_status);
if (op_type == RTE_BBDEV_OP_TURBO_DEC || /* Wait for slave lcores operations */
op_type == RTE_BBDEV_OP_LDPC_DEC) for (used_cores = 1; used_cores < num_lcores; used_cores++) {
iter = offload_latency_empty_q_test_dec(ad->dev_id, queue_id, tp = &t_params[used_cores];
num_to_process, burst_sz, &deq_total_time,
&deq_min_time, &deq_max_time, op_type);
else
iter = offload_latency_empty_q_test_enc(ad->dev_id, queue_id,
num_to_process, burst_sz, &deq_total_time,
&deq_min_time, &deq_max_time, op_type);
if (iter <= 0) while ((rte_atomic16_read(&tp->nb_dequeued) <
return TEST_FAILED; op_params->num_to_process) &&
(rte_atomic16_read(&tp->processing_status) !=
TEST_FAILED))
rte_pause();
printf("Empty dequeue offload:\n" tp->ops_per_sec /= TEST_REPETITIONS;
"\tavg: %lg cycles, %lg us\n" tp->mbps /= TEST_REPETITIONS;
"\tmin: %lg cycles, %lg us\n" ret |= (int)rte_atomic16_read(&tp->processing_status);
"\tmax: %lg cycles, %lg us\n", }
(double)deq_total_time / (double)iter,
(double)(deq_total_time * 1000000) / (double)iter /
(double)rte_get_tsc_hz(), (double)deq_min_time,
(double)(deq_min_time * 1000000) / rte_get_tsc_hz(),
(double)deq_max_time, (double)(deq_max_time * 1000000) /
rte_get_tsc_hz());
return TEST_SUCCESS; /* Print throughput if test passed */
#endif if (!ret) {
} if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
print_dec_throughput(t_params, num_lcores);
else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC ||
test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
print_enc_throughput(t_params, num_lcores);
}
static int rte_free(t_params);
bler_tc(void) return ret;
{
return run_test_case(bler_test);
} }
static int static int
...@@ -5070,23 +3361,7 @@ throughput_tc(void) ...@@ -5070,23 +3361,7 @@ throughput_tc(void)
return run_test_case(throughput_test); return run_test_case(throughput_test);
} }
static int
offload_cost_tc(void)
{
return run_test_case(offload_cost_test);
}
static int
offload_latency_empty_q_tc(void)
{
return run_test_case(offload_latency_empty_q_test);
}
static int
latency_tc(void)
{
return run_test_case(latency_test);
}
static int static int
interrupt_tc(void) interrupt_tc(void)
...@@ -5094,16 +3369,6 @@ interrupt_tc(void) ...@@ -5094,16 +3369,6 @@ interrupt_tc(void)
return run_test_case(throughput_test); return run_test_case(throughput_test);
} }
static struct unit_test_suite bbdev_bler_testsuite = {
.suite_name = "BBdev BLER Tests",
.setup = testsuite_setup,
.teardown = testsuite_teardown,
.unit_test_cases = {
TEST_CASE_ST(ut_setup, ut_teardown, bler_tc),
TEST_CASES_END() /**< NULL terminate unit test array */
}
};
static struct unit_test_suite bbdev_throughput_testsuite = { static struct unit_test_suite bbdev_throughput_testsuite = {
.suite_name = "BBdev Throughput Tests", .suite_name = "BBdev Throughput Tests",
.setup = testsuite_setup, .setup = testsuite_setup,
...@@ -5114,53 +3379,8 @@ static struct unit_test_suite bbdev_throughput_testsuite = { ...@@ -5114,53 +3379,8 @@ static struct unit_test_suite bbdev_throughput_testsuite = {
} }
}; };
static struct unit_test_suite bbdev_validation_testsuite = {
.suite_name = "BBdev Validation Tests",
.setup = testsuite_setup,
.teardown = testsuite_teardown,
.unit_test_cases = {
TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
TEST_CASES_END() /**< NULL terminate unit test array */
}
};
static struct unit_test_suite bbdev_latency_testsuite = {
.suite_name = "BBdev Latency Tests",
.setup = testsuite_setup,
.teardown = testsuite_teardown,
.unit_test_cases = {
TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
TEST_CASES_END() /**< NULL terminate unit test array */
}
};
static struct unit_test_suite bbdev_offload_cost_testsuite = {
.suite_name = "BBdev Offload Cost Tests",
.setup = testsuite_setup,
.teardown = testsuite_teardown,
.unit_test_cases = {
TEST_CASE_ST(ut_setup, ut_teardown, offload_cost_tc),
TEST_CASE_ST(ut_setup, ut_teardown, offload_latency_empty_q_tc),
TEST_CASES_END() /**< NULL terminate unit test array */
}
};
static struct unit_test_suite bbdev_interrupt_testsuite = {
.suite_name = "BBdev Interrupt Tests",
.setup = interrupt_testsuite_setup,
.teardown = testsuite_teardown,
.unit_test_cases = {
TEST_CASE_ST(ut_setup, ut_teardown, interrupt_tc),
TEST_CASES_END() /**< NULL terminate unit test array */
}
};
//REGISTER_TEST_COMMAND(bler, bbdev_bler_testsuite);
REGISTER_TEST_COMMAND(throughput, bbdev_throughput_testsuite); REGISTER_TEST_COMMAND(throughput, bbdev_throughput_testsuite);
//REGISTER_TEST_COMMAND(validation, bbdev_validation_testsuite);
//REGISTER_TEST_COMMAND(latency, bbdev_latency_testsuite);
//REGISTER_TEST_COMMAND(offload, bbdev_offload_cost_testsuite);
//REGISTER_TEST_COMMAND(interrupt, bbdev_interrupt_testsuite);
#define MAX_QUEUES RTE_MAX_LCORE #define MAX_QUEUES RTE_MAX_LCORE
...@@ -5331,162 +3551,6 @@ print_usage(const char *prog_name) ...@@ -5331,162 +3551,6 @@ print_usage(const char *prog_name)
printf("\n"); printf("\n");
} }
static int
parse_args(int argc, char **argv, struct test_params *tp)
{
int opt, option_index;
unsigned int num_tests = 0;
bool test_cases_present = false;
bool test_vector_present = false;
struct test_command *t;
char *tokens[MAX_CMDLINE_TESTCASES];
int tc, ret;
static struct option lgopts[] = {
{ "num-ops", 1, 0, 'n' },
{ "burst-size", 1, 0, 'b' },
{ "test-cases", 1, 0, 'c' },
{ "test-vector", 1, 0, 'v' },
{ "lcores", 1, 0, 'l' },
{ "snr", 1, 0, 's' },
{ "iter_max", 6, 0, 't' },
{ "init-device", 0, 0, 'i'},
{ "help", 0, 0, 'h' },
{ NULL, 0, 0, 0 }
};
tp->iter_max = DEFAULT_ITER;
while ((opt = getopt_long(argc, argv, "hin:b:c:v:l:s:t:", lgopts,
&option_index)) != EOF)
switch (opt) {
case 'n':
TEST_ASSERT(strlen(optarg) > 0,
"Num of operations is not provided");
tp->num_ops = strtol(optarg, NULL, 10);
break;
case 'b':
TEST_ASSERT(strlen(optarg) > 0,
"Burst size is not provided");
tp->burst_sz = strtol(optarg, NULL, 10);
TEST_ASSERT(tp->burst_sz <= MAX_BURST,
"Burst size mustn't be greater than %u",
MAX_BURST);
break;
case 'c':
TEST_ASSERT(test_cases_present == false,
"Test cases provided more than once");
test_cases_present = true;
ret = rte_strsplit(optarg, strlen(optarg),
tokens, MAX_CMDLINE_TESTCASES, tc_sep);
TEST_ASSERT(ret <= MAX_CMDLINE_TESTCASES,
"Too many test cases (max=%d)",
MAX_CMDLINE_TESTCASES);
for (tc = 0; tc < ret; ++tc) {
/* Find matching test case */
TAILQ_FOREACH(t, &commands_list, next)
if (!strcmp(tokens[tc], t->command))
tp->test_to_run[num_tests] = t;
TEST_ASSERT(tp->test_to_run[num_tests] != NULL,
"Unknown test case: %s",
tokens[tc]);
++num_tests;
}
break;
case 'v':
TEST_ASSERT(test_vector_present == false,
"Test vector provided more than once");
test_vector_present = true;
TEST_ASSERT(strlen(optarg) > 0,
"Config file name is null");
snprintf(tp->test_vector_filename,
sizeof(tp->test_vector_filename),
"%s", optarg);
break;
case 's':
TEST_ASSERT(strlen(optarg) > 0,
"SNR is not provided");
tp->snr = strtod(optarg, NULL);
break;
case 't':
TEST_ASSERT(strlen(optarg) > 0,
"Iter_max is not provided");
tp->iter_max = strtol(optarg, NULL, 10);
break;
case 'l':
TEST_ASSERT(strlen(optarg) > 0,
"Num of lcores is not provided");
tp->num_lcores = strtol(optarg, NULL, 10);
TEST_ASSERT(tp->num_lcores <= RTE_MAX_LCORE,
"Num of lcores mustn't be greater than %u",
RTE_MAX_LCORE);
break;
case 'i':
/* indicate fpga fec config required */
tp->init_device = true;
break;
case 'h':
print_usage(argv[0]);
return 0;
default:
printf("ERROR: Unknown option: -%c\n", opt);
return -1;
}
if (tp->num_ops == 0) {
printf(
"WARNING: Num of operations was not provided or was set 0. Set to default (%u)\n",
DEFAULT_OPS);
tp->num_ops = DEFAULT_OPS;
}
if (tp->burst_sz == 0) {
printf(
"WARNING: Burst size was not provided or was set 0. Set to default (%u)\n",
DEFAULT_BURST);
tp->burst_sz = DEFAULT_BURST;
}
if (tp->num_lcores == 0) {
printf(
"WARNING: Num of lcores was not provided or was set 0. Set to value from RTE config (%u)\n",
rte_lcore_count());
tp->num_lcores = rte_lcore_count();
}
TEST_ASSERT(tp->burst_sz <= tp->num_ops,
"Burst size (%u) mustn't be greater than num ops (%u)",
tp->burst_sz, tp->num_ops);
tp->num_tests = num_tests;
return 0;
}
static int
run_all_tests(void)
{
int ret = TEST_SUCCESS;
struct test_command *t;
TAILQ_FOREACH(t, &commands_list, next)
ret |= (int) t->callback();
return ret;
}
static int
run_parsed_tests(struct test_params *tp)
{
int ret = TEST_SUCCESS;
unsigned int i;
for (i = 0; i < tp->num_tests; ++i)
ret |= (int) tp->test_to_run[i]->callback();
return ret;
}
static int static int
init_input(uint32_t **data, uint32_t data_length) init_input(uint32_t **data, uint32_t data_length)
...@@ -5652,13 +3716,6 @@ argv_re[1] = "-v"; ...@@ -5652,13 +3716,6 @@ argv_re[1] = "-v";
argv_re[2] = "../../../targets/ARCH/test-bbdev/test_vectors/ldpc_dec_v8480.data"; argv_re[2] = "../../../targets/ARCH/test-bbdev/test_vectors/ldpc_dec_v8480.data";
//printf("after ......ret %d argc %d argv %s %s %s %s\n", ret,argc, argv[0], argv[1], argv[2], argv[3],argv[4]); //printf("after ......ret %d argc %d argv %s %s %s %s\n", ret,argc, argv[0], argv[1], argv[2], argv[3],argv[4]);
/* Parse application arguments (after the EAL ones) */
/* ret = parse_args(argc_re, argv_re, &test_params);
if (ret < 0) {
print_usage(argv_re[0]);
return 1;
}
*/
memset(&test_vector_dec, 0, sizeof(struct test_bbdev_vector)); memset(&test_vector_dec, 0, sizeof(struct test_bbdev_vector));
...@@ -5707,11 +3764,19 @@ test_params.num_ops=2; ...@@ -5707,11 +3764,19 @@ test_params.num_ops=2;
test_params.burst_sz=2; test_params.burst_sz=2;
test_params.num_lcores=1; test_params.num_lcores=1;
test_params.num_tests = 1; test_params.num_tests = 1;
run_all_tests(); //run_all_tests();
testsuite_setup();
ut_setup();
throughput_tc();
char *data = ldpc_output; char *data = ldpc_output;
data_len = (p_decParams->BG==1)?(22*p_decParams->Z):(10*p_decParams->Z); data_len = (p_decParams->BG==1)?(22*p_decParams->Z):(10*p_decParams->Z);
memcpy(&p_out[0], data, C*data_len); memcpy(&p_out[0], data, C*data_len);
//p_out = ldpc_output; //p_out = ldpc_output;
ut_teardown();
//for (i=0;i<8;i++) //for (i=0;i<8;i++)
//printf("p_out[%d] = %x addr %p ldpcout addr %p\n",i,p_out[i],p_out+i,ldpc_output+i); //printf("p_out[%d] = %x addr %p ldpcout addr %p\n",i,p_out[i],p_out+i,ldpc_output+i);
......
...@@ -675,12 +675,12 @@ uint32_t nr_ulsch_decoding(PHY_VARS_gNB *phy_vars_gNB, ...@@ -675,12 +675,12 @@ uint32_t nr_ulsch_decoding(PHY_VARS_gNB *phy_vars_gNB,
#endif #endif
no_iteration_ldpc = ulsch->max_ldpc_iterations + 1; no_iteration_ldpc = ulsch->max_ldpc_iterations + 1;
} }
for (int k=0;k<8;k++) /*for (int k=0;k<8;k++)
{ {
printf("output decoder [%d] = 0x%02x \n", k, harq_process->c[r][k]); printf("output decoder [%d] = 0x%02x \n", k, harq_process->c[r][k]);
printf("llrprocbuf [%d] = %x adr %p\n", k, llrProcBuf[k], llrProcBuf+k); printf("llrprocbuf [%d] = %x adr %p\n", k, llrProcBuf[k], llrProcBuf+k);
} }
*/
memcpy(harq_process->b+offset, memcpy(harq_process->b+offset,
harq_process->c[r], harq_process->c[r],
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment