Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
O
OpenXG-RAN
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
wangjie
OpenXG-RAN
Commits
887f8a90
Commit
887f8a90
authored
Jul 27, 2021
by
Hongzhi Wang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
code cleanup ldpc offload
parent
a2c9a280
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
324 additions
and
2259 deletions
+324
-2259
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder_offload.c
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder_offload.c
+322
-2257
openair1/PHY/NR_TRANSPORT/nr_ulsch_decoding.c
openair1/PHY/NR_TRANSPORT/nr_ulsch_decoding.c
+2
-2
No files found.
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder_offload.c
View file @
887f8a90
...
...
@@ -1930,11 +1930,11 @@ printf("segment %d offset %d length %d data length %d\n",i, offset,total_data_si
// TEST_ASSERT(orig_op->segments[i].length == data_len,
// "Length of segment differ in original (%u) and filled (%u) op",
// orig_op->segments[i].length, data_len);
TEST_ASSERT_BUFFERS_ARE_EQUAL
(
orig_op
->
segments
[
i
].
addr
,
/*
TEST_ASSERT_BUFFERS_ARE_EQUAL(orig_op->segments[i].addr,
rte_pktmbuf_mtod_offset(m, uint32_t *, offset),
data_len,
"Output buffers (CB=%u) are not equal", i);
*/
m
=
m
->
next
;
}
...
...
@@ -2157,29 +2157,6 @@ validate_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
return
TEST_SUCCESS
;
}
/* Check Number of code blocks errors */
static
int
validate_ldpc_bler
(
struct
rte_bbdev_dec_op
**
ops
,
const
uint16_t
n
)
{
unsigned
int
i
;
struct
op_data_entries
*
hard_data_orig
=
&
test_vector
.
entries
[
DATA_HARD_OUTPUT
];
struct
rte_bbdev_op_ldpc_dec
*
ops_td
;
struct
rte_bbdev_op_data
*
hard_output
;
int
errors
=
0
;
struct
rte_mbuf
*
m
;
for
(
i
=
0
;
i
<
n
;
++
i
)
{
ops_td
=
&
ops
[
i
]
->
ldpc_dec
;
hard_output
=
&
ops_td
->
hard_output
;
m
=
hard_output
->
data
;
if
(
memcmp
(
rte_pktmbuf_mtod_offset
(
m
,
uint32_t
*
,
0
),
hard_data_orig
->
segments
[
0
].
addr
,
hard_data_orig
->
segments
[
0
].
length
))
errors
++
;
}
return
errors
;
}
static
int
validate_ldpc_dec_op
(
struct
rte_bbdev_dec_op
**
ops
,
const
uint16_t
n
,
...
...
@@ -2939,2129 +2916,443 @@ printf("bufs len %d data %x addr orig %p addr %p\n",bufs->inputs[0].data->data_l
return
TEST_SUCCESS
;
}
static
int
throughput_
intr_lcore
_dec
(
void
*
arg
)
throughput_
pmd_lcore_ldpc
_dec
(
void
*
arg
)
{
struct
thread_params
*
tp
=
arg
;
unsigned
int
enqueued
;
uint16_t
enq
,
deq
;
uint64_t
total_time
=
0
,
start_time
;
const
uint16_t
queue_id
=
tp
->
queue_id
;
const
uint16_t
burst_sz
=
tp
->
op_params
->
burst_sz
;
const
uint16_t
num_to_process
=
tp
->
op_params
->
num_to_process
;
struct
rte_bbdev_dec_op
*
ops
[
num_to_process
];
const
uint16_t
num_ops
=
tp
->
op_params
->
num_to_process
;
struct
rte_bbdev_dec_op
*
ops_enq
[
num_ops
];
struct
rte_bbdev_dec_op
*
ops_deq
[
num_ops
];
struct
rte_bbdev_dec_op
*
ref_op
=
tp
->
op_params
->
ref_dec_op
;
struct
test_buffers
*
bufs
=
NULL
;
int
i
,
j
,
ret
;
struct
rte_bbdev_info
info
;
int
ret
,
i
,
j
;
uint16_t
num_to_enq
,
enq
;
uint16_t
num_to_enq
;
struct
rte_bbdev_op_data
*
hard_output
;
struct
rte_bbdev_op_ldpc_dec
*
ops_td
;
bool
extDdr
=
check_bit
(
ldpc_cap_flags
,
RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE
);
bool
loopback
=
check_bit
(
ref_op
->
ldpc_dec
.
op_flags
,
RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK
);
bool
hc_out
=
check_bit
(
ref_op
->
ldpc_dec
.
op_flags
,
RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE
);
TEST_ASSERT_SUCCESS
((
burst_sz
>
MAX_BURST
),
"BURST_SIZE should be <= %u"
,
MAX_BURST
);
TEST_ASSERT_SUCCESS
(
rte_bbdev_queue_intr_enable
(
tp
->
dev_id
,
queue_id
),
"Failed to enable interrupts for dev: %u, queue_id: %u"
,
tp
->
dev_id
,
queue_id
);
rte_bbdev_info_get
(
tp
->
dev_id
,
&
info
);
TEST_ASSERT_SUCCESS
((
num_
to_proces
s
>
info
.
drv
.
queue_size_lim
),
TEST_ASSERT_SUCCESS
((
num_
op
s
>
info
.
drv
.
queue_size_lim
),
"NUM_OPS cannot exceed %u for this device"
,
info
.
drv
.
queue_size_lim
);
bufs
=
&
tp
->
op_params
->
q_bufs
[
GET_SOCKET
(
info
.
socket_id
)][
queue_id
];
rte_atomic16_clear
(
&
tp
->
processing_status
);
rte_atomic16_clear
(
&
tp
->
nb_dequeued
);
//&op_params->q_bufs[socket_id][queue_id].inputs
//printf("bufs len %d\n",bufs->input.data->data_len);
while
(
rte_atomic16_read
(
&
tp
->
op_params
->
sync
)
==
SYNC_WAIT
)
rte_pause
();
ret
=
rte_bbdev_dec_op_alloc_bulk
(
tp
->
op_params
->
mp
,
ops
,
num_to_process
);
TEST_ASSERT_SUCCESS
(
ret
,
"Allocation failed for %d ops"
,
num_to_process
);
ret
=
rte_bbdev_dec_op_alloc_bulk
(
tp
->
op_params
->
mp
,
ops_enq
,
num_ops
);
TEST_ASSERT_SUCCESS
(
ret
,
"Allocation failed for %d ops"
,
num_ops
);
/* For throughput tests we need to disable early termination */
if
(
check_bit
(
ref_op
->
ldpc_dec
.
op_flags
,
RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE
))
ref_op
->
ldpc_dec
.
op_flags
-=
RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE
;
ref_op
->
ldpc_dec
.
iter_max
=
get_iter_max
();
ref_op
->
ldpc_dec
.
iter_count
=
ref_op
->
ldpc_dec
.
iter_max
;
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
copy_reference_
dec_op
(
ops
,
num_to_proces
s
,
0
,
bufs
->
inputs
,
copy_reference_
ldpc_dec_op
(
ops_enq
,
num_op
s
,
0
,
bufs
->
inputs
,
bufs
->
hard_outputs
,
bufs
->
soft_outputs
,
tp
->
op_params
->
ref_dec
_op
);
bufs
->
harq_inputs
,
bufs
->
harq_outputs
,
ref
_op
);
/* Set counter to validate the ordering */
for
(
j
=
0
;
j
<
num_
to_proces
s
;
++
j
)
ops
[
j
]
->
opaque_data
=
(
void
*
)(
uintptr_t
)
j
;
for
(
j
=
0
;
j
<
num_
op
s
;
++
j
)
ops
_enq
[
j
]
->
opaque_data
=
(
void
*
)(
uintptr_t
)
j
;
for
(
j
=
0
;
j
<
TEST_REPETITIONS
;
++
j
)
{
for
(
i
=
0
;
i
<
num_to_process
;
++
i
)
rte_pktmbuf_reset
(
ops
[
i
]
->
turbo_dec
.
hard_output
.
data
);
for
(
i
=
0
;
i
<
TEST_REPETITIONS
;
++
i
)
{
for
(
j
=
0
;
j
<
num_ops
;
++
j
)
{
if
(
!
loopback
)
mbuf_reset
(
ops_enq
[
j
]
->
ldpc_dec
.
hard_output
.
data
);
if
(
hc_out
||
loopback
)
mbuf_reset
(
ops_enq
[
j
]
->
ldpc_dec
.
harq_combined_output
.
data
);
}
if
(
extDdr
)
{
bool
preload
=
i
==
(
TEST_REPETITIONS
-
1
);
preload_harq_ddr
(
tp
->
dev_id
,
queue_id
,
ops_enq
,
num_ops
,
preload
);
}
start_time
=
rte_rdtsc_precise
();
tp
->
start_time
=
rte_rdtsc_precise
();
for
(
enqueued
=
0
;
enqueued
<
num_to_process
;)
{
for
(
enq
=
0
,
deq
=
0
;
enq
<
num_ops
;)
{
num_to_enq
=
burst_sz
;
if
(
unlikely
(
num_to_process
-
enqueued
<
num_to_enq
))
num_to_enq
=
num_to_process
-
enqueued
;
if
(
unlikely
(
num_ops
-
enq
<
num_to_enq
))
num_to_enq
=
num_ops
-
enq
;
//printf("pmd lcore ldpc dec data %x\n", *ops_enq[enq]->ldpc_dec.input.addr);
enq
=
0
;
do
{
enq
+=
rte_bbdev_enqueue_dec_ops
(
tp
->
dev_id
,
queue_id
,
&
ops
[
enqueued
],
num_to_enq
);
}
while
(
unlikely
(
num_to_enq
!=
enq
));
enqueued
+=
enq
;
enq
+=
rte_bbdev_enqueue_ldpc_dec_ops
(
tp
->
dev_id
,
queue_id
,
&
ops_enq
[
enq
],
num_to_enq
);
/* Write to thread burst_sz current number of enqueued
* descriptors. It ensures that proper number of
* descriptors will be dequeued in callback
* function - needed for last batch in case where
* the number of operations is not a multiple of
* burst size.
*/
rte_atomic16_set
(
&
tp
->
burst_sz
,
num_to_enq
);
deq
+=
rte_bbdev_dequeue_ldpc_dec_ops
(
tp
->
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
enq
-
deq
);
/* Wait until processing of previous batch is
* completed
*/
while
(
rte_atomic16_read
(
&
tp
->
nb_dequeued
)
!=
(
int16_t
)
enqueued
)
rte_pause
();
/* ops_td = &ops_deq[enq]->ldpc_dec;
hard_output = &ops_td->hard_output;
struct rte_mbuf *m = hard_output->data;
printf("deq nb segs %d\n", m->nb_segs);
*/
}
/* dequeue the remaining */
while
(
deq
<
enq
)
{
deq
+=
rte_bbdev_dequeue_ldpc_dec_ops
(
tp
->
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
enq
-
deq
);
}
if
(
j
!=
TEST_REPETITIONS
-
1
)
rte_atomic16_clear
(
&
tp
->
nb_dequeued
);
total_time
+=
rte_rdtsc_precise
()
-
start_time
;
}
tp
->
iter_count
=
0
;
/* get the max of iter_count for all dequeued ops */
for
(
i
=
0
;
i
<
num_ops
;
++
i
)
{
tp
->
iter_count
=
RTE_MAX
(
ops_enq
[
i
]
->
ldpc_dec
.
iter_count
,
tp
->
iter_count
);
}
if
(
extDdr
)
{
/* Read loopback is not thread safe */
retrieve_harq_ddr
(
tp
->
dev_id
,
queue_id
,
ops_enq
,
num_ops
);
}
//if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
//printf("op type != OP NONE\n");
ret
=
validate_ldpc_dec_op
(
ops_deq
,
num_ops
,
ref_op
,
tp
->
op_params
->
vector_mask
);
TEST_ASSERT_SUCCESS
(
ret
,
"Validation failed!"
);
//}
rte_bbdev_dec_op_free_bulk
(
ops_enq
,
num_ops
);
double
tb_len_bits
=
calc_ldpc_dec_TB_size
(
ref_op
);
tp
->
ops_per_sec
=
((
double
)
num_ops
*
TEST_REPETITIONS
)
/
((
double
)
total_time
/
(
double
)
rte_get_tsc_hz
());
tp
->
mbps
=
(((
double
)(
num_ops
*
TEST_REPETITIONS
*
tb_len_bits
))
/
1000000
.
0
)
/
((
double
)
total_time
/
(
double
)
rte_get_tsc_hz
());
return
TEST_SUCCESS
;
}
static
int
throughput_
intr_lcore
_enc
(
void
*
arg
)
throughput_
pmd_lcore_ldpc
_enc
(
void
*
arg
)
{
struct
thread_params
*
tp
=
arg
;
unsigned
int
enqueued
;
uint16_t
enq
,
deq
;
uint64_t
total_time
=
0
,
start_time
;
const
uint16_t
queue_id
=
tp
->
queue_id
;
const
uint16_t
burst_sz
=
tp
->
op_params
->
burst_sz
;
const
uint16_t
num_to_process
=
tp
->
op_params
->
num_to_process
;
struct
rte_bbdev_enc_op
*
ops
[
num_to_process
];
const
uint16_t
num_ops
=
tp
->
op_params
->
num_to_process
;
struct
rte_bbdev_enc_op
*
ops_enq
[
num_ops
];
struct
rte_bbdev_enc_op
*
ops_deq
[
num_ops
];
struct
rte_bbdev_enc_op
*
ref_op
=
tp
->
op_params
->
ref_enc_op
;
struct
test_buffers
*
bufs
=
NULL
;
int
i
,
j
,
ret
;
struct
rte_bbdev_info
info
;
int
ret
,
i
,
j
;
uint16_t
num_to_enq
,
enq
;
uint16_t
num_to_enq
;
TEST_ASSERT_SUCCESS
((
burst_sz
>
MAX_BURST
),
"BURST_SIZE should be <= %u"
,
MAX_BURST
);
TEST_ASSERT_SUCCESS
(
rte_bbdev_queue_intr_enable
(
tp
->
dev_id
,
queue_id
),
"Failed to enable interrupts for dev: %u, queue_id: %u"
,
tp
->
dev_id
,
queue_id
);
rte_bbdev_info_get
(
tp
->
dev_id
,
&
info
);
TEST_ASSERT_SUCCESS
((
num_
to_proces
s
>
info
.
drv
.
queue_size_lim
),
TEST_ASSERT_SUCCESS
((
num_
op
s
>
info
.
drv
.
queue_size_lim
),
"NUM_OPS cannot exceed %u for this device"
,
info
.
drv
.
queue_size_lim
);
bufs
=
&
tp
->
op_params
->
q_bufs
[
GET_SOCKET
(
info
.
socket_id
)][
queue_id
];
rte_atomic16_clear
(
&
tp
->
processing_status
);
rte_atomic16_clear
(
&
tp
->
nb_dequeued
);
while
(
rte_atomic16_read
(
&
tp
->
op_params
->
sync
)
==
SYNC_WAIT
)
rte_pause
();
ret
=
rte_bbdev_enc_op_alloc_bulk
(
tp
->
op_params
->
mp
,
ops
,
num_
to_proces
s
);
ret
=
rte_bbdev_enc_op_alloc_bulk
(
tp
->
op_params
->
mp
,
ops
_enq
,
num_
op
s
);
TEST_ASSERT_SUCCESS
(
ret
,
"Allocation failed for %d ops"
,
num_
to_proces
s
);
num_
op
s
);
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
copy_reference_
enc_op
(
ops
,
num_to_proces
s
,
0
,
bufs
->
inputs
,
bufs
->
hard_outputs
,
tp
->
op_params
->
ref_enc
_op
);
copy_reference_
ldpc_enc_op
(
ops_enq
,
num_op
s
,
0
,
bufs
->
inputs
,
bufs
->
hard_outputs
,
ref
_op
);
/* Set counter to validate the ordering */
for
(
j
=
0
;
j
<
num_
to_proces
s
;
++
j
)
ops
[
j
]
->
opaque_data
=
(
void
*
)(
uintptr_t
)
j
;
for
(
j
=
0
;
j
<
num_
op
s
;
++
j
)
ops
_enq
[
j
]
->
opaque_data
=
(
void
*
)(
uintptr_t
)
j
;
for
(
j
=
0
;
j
<
TEST_REPETITIONS
;
++
j
)
{
for
(
i
=
0
;
i
<
num_to_process
;
++
i
)
rte_pktmbuf_reset
(
ops
[
i
]
->
turbo_enc
.
output
.
data
);
for
(
i
=
0
;
i
<
TEST_REPETITIONS
;
++
i
)
{
tp
->
start_time
=
rte_rdtsc_precise
();
for
(
enqueued
=
0
;
enqueued
<
num_to_process
;)
{
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
for
(
j
=
0
;
j
<
num_ops
;
++
j
)
mbuf_reset
(
ops_enq
[
j
]
->
turbo_enc
.
output
.
data
);
start_time
=
rte_rdtsc_precise
();
for
(
enq
=
0
,
deq
=
0
;
enq
<
num_ops
;)
{
num_to_enq
=
burst_sz
;
if
(
unlikely
(
num_
to_process
-
enqueued
<
num_to_enq
))
num_to_enq
=
num_
to_process
-
enqueued
;
if
(
unlikely
(
num_
ops
-
enq
<
num_to_enq
))
num_to_enq
=
num_
ops
-
enq
;
enq
=
0
;
do
{
enq
+=
rte_bbdev_enqueue_enc_ops
(
tp
->
dev_id
,
queue_id
,
&
ops
[
enqueued
],
num_to_enq
);
}
while
(
unlikely
(
enq
!=
num_to_enq
));
enqueued
+=
enq
;
enq
+=
rte_bbdev_enqueue_ldpc_enc_ops
(
tp
->
dev_id
,
queue_id
,
&
ops_enq
[
enq
],
num_to_enq
);
/* Write to thread burst_sz current number of enqueued
* descriptors. It ensures that proper number of
* descriptors will be dequeued in callback
* function - needed for last batch in case where
* the number of operations is not a multiple of
* burst size.
*/
rte_atomic16_set
(
&
tp
->
burst_sz
,
num_to_enq
);
deq
+=
rte_bbdev_dequeue_ldpc_enc_ops
(
tp
->
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
enq
-
deq
);
}
/* Wait until processing of previous batch is
* completed
*/
while
(
rte_atomic16_read
(
&
tp
->
nb_dequeued
)
!=
(
int16_t
)
enqueued
)
rte_pause
();
/* dequeue the remaining */
while
(
deq
<
enq
)
{
deq
+=
rte_bbdev_dequeue_ldpc_enc_ops
(
tp
->
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
enq
-
deq
);
}
if
(
j
!=
TEST_REPETITIONS
-
1
)
rte_atomic16_clear
(
&
tp
->
nb_dequeued
)
;
total_time
+=
rte_rdtsc_precise
()
-
start_time
;
}
return
TEST_SUCCESS
;
}
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
{
ret
=
validate_ldpc_enc_op
(
ops_deq
,
num_ops
,
ref_op
);
TEST_ASSERT_SUCCESS
(
ret
,
"Validation failed!"
);
}
rte_bbdev_enc_op_free_bulk
(
ops_enq
,
num_ops
);
static
int
throughput_intr_lcore_ldpc_enc
(
void
*
arg
)
{
struct
thread_params
*
tp
=
arg
;
unsigned
int
enqueued
;
const
uint16_t
queue_id
=
tp
->
queue_id
;
const
uint16_t
burst_sz
=
tp
->
op_params
->
burst_sz
;
const
uint16_t
num_to_process
=
tp
->
op_params
->
num_to_process
;
struct
rte_bbdev_enc_op
*
ops
[
num_to_process
];
struct
test_buffers
*
bufs
=
NULL
;
struct
rte_bbdev_info
info
;
int
ret
,
i
,
j
;
uint16_t
num_to_enq
,
enq
;
TEST_ASSERT_SUCCESS
((
burst_sz
>
MAX_BURST
),
"BURST_SIZE should be <= %u"
,
MAX_BURST
);
TEST_ASSERT_SUCCESS
(
rte_bbdev_queue_intr_enable
(
tp
->
dev_id
,
queue_id
),
"Failed to enable interrupts for dev: %u, queue_id: %u"
,
tp
->
dev_id
,
queue_id
);
rte_bbdev_info_get
(
tp
->
dev_id
,
&
info
);
TEST_ASSERT_SUCCESS
((
num_to_process
>
info
.
drv
.
queue_size_lim
),
"NUM_OPS cannot exceed %u for this device"
,
info
.
drv
.
queue_size_lim
);
bufs
=
&
tp
->
op_params
->
q_bufs
[
GET_SOCKET
(
info
.
socket_id
)][
queue_id
];
rte_atomic16_clear
(
&
tp
->
processing_status
);
rte_atomic16_clear
(
&
tp
->
nb_dequeued
);
while
(
rte_atomic16_read
(
&
tp
->
op_params
->
sync
)
==
SYNC_WAIT
)
rte_pause
();
ret
=
rte_bbdev_enc_op_alloc_bulk
(
tp
->
op_params
->
mp
,
ops
,
num_to_process
);
TEST_ASSERT_SUCCESS
(
ret
,
"Allocation failed for %d ops"
,
num_to_process
);
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
copy_reference_ldpc_enc_op
(
ops
,
num_to_process
,
0
,
bufs
->
inputs
,
bufs
->
hard_outputs
,
tp
->
op_params
->
ref_enc_op
);
/* Set counter to validate the ordering */
for
(
j
=
0
;
j
<
num_to_process
;
++
j
)
ops
[
j
]
->
opaque_data
=
(
void
*
)(
uintptr_t
)
j
;
for
(
j
=
0
;
j
<
TEST_REPETITIONS
;
++
j
)
{
for
(
i
=
0
;
i
<
num_to_process
;
++
i
)
rte_pktmbuf_reset
(
ops
[
i
]
->
turbo_enc
.
output
.
data
);
tp
->
start_time
=
rte_rdtsc_precise
();
for
(
enqueued
=
0
;
enqueued
<
num_to_process
;)
{
num_to_enq
=
burst_sz
;
if
(
unlikely
(
num_to_process
-
enqueued
<
num_to_enq
))
num_to_enq
=
num_to_process
-
enqueued
;
enq
=
0
;
do
{
enq
+=
rte_bbdev_enqueue_ldpc_enc_ops
(
tp
->
dev_id
,
queue_id
,
&
ops
[
enqueued
],
num_to_enq
);
}
while
(
unlikely
(
enq
!=
num_to_enq
));
enqueued
+=
enq
;
/* Write to thread burst_sz current number of enqueued
* descriptors. It ensures that proper number of
* descriptors will be dequeued in callback
* function - needed for last batch in case where
* the number of operations is not a multiple of
* burst size.
*/
rte_atomic16_set
(
&
tp
->
burst_sz
,
num_to_enq
);
/* Wait until processing of previous batch is
* completed
*/
while
(
rte_atomic16_read
(
&
tp
->
nb_dequeued
)
!=
(
int16_t
)
enqueued
)
rte_pause
();
}
if
(
j
!=
TEST_REPETITIONS
-
1
)
rte_atomic16_clear
(
&
tp
->
nb_dequeued
);
}
return
TEST_SUCCESS
;
}
static
int
throughput_pmd_lcore_dec
(
void
*
arg
)
{
struct
thread_params
*
tp
=
arg
;
uint16_t
enq
,
deq
;
uint64_t
total_time
=
0
,
start_time
;
const
uint16_t
queue_id
=
tp
->
queue_id
;
const
uint16_t
burst_sz
=
tp
->
op_params
->
burst_sz
;
const
uint16_t
num_ops
=
tp
->
op_params
->
num_to_process
;
struct
rte_bbdev_dec_op
*
ops_enq
[
num_ops
];
struct
rte_bbdev_dec_op
*
ops_deq
[
num_ops
];
struct
rte_bbdev_dec_op
*
ref_op
=
tp
->
op_params
->
ref_dec_op
;
struct
test_buffers
*
bufs
=
NULL
;
int
i
,
j
,
ret
;
struct
rte_bbdev_info
info
;
uint16_t
num_to_enq
;
TEST_ASSERT_SUCCESS
((
burst_sz
>
MAX_BURST
),
"BURST_SIZE should be <= %u"
,
MAX_BURST
);
rte_bbdev_info_get
(
tp
->
dev_id
,
&
info
);
TEST_ASSERT_SUCCESS
((
num_ops
>
info
.
drv
.
queue_size_lim
),
"NUM_OPS cannot exceed %u for this device"
,
info
.
drv
.
queue_size_lim
);
bufs
=
&
tp
->
op_params
->
q_bufs
[
GET_SOCKET
(
info
.
socket_id
)][
queue_id
];
while
(
rte_atomic16_read
(
&
tp
->
op_params
->
sync
)
==
SYNC_WAIT
)
rte_pause
();
ret
=
rte_bbdev_dec_op_alloc_bulk
(
tp
->
op_params
->
mp
,
ops_enq
,
num_ops
);
TEST_ASSERT_SUCCESS
(
ret
,
"Allocation failed for %d ops"
,
num_ops
);
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
copy_reference_dec_op
(
ops_enq
,
num_ops
,
0
,
bufs
->
inputs
,
bufs
->
hard_outputs
,
bufs
->
soft_outputs
,
ref_op
);
/* Set counter to validate the ordering */
for
(
j
=
0
;
j
<
num_ops
;
++
j
)
ops_enq
[
j
]
->
opaque_data
=
(
void
*
)(
uintptr_t
)
j
;
for
(
i
=
0
;
i
<
TEST_REPETITIONS
;
++
i
)
{
for
(
j
=
0
;
j
<
num_ops
;
++
j
)
mbuf_reset
(
ops_enq
[
j
]
->
turbo_dec
.
hard_output
.
data
);
start_time
=
rte_rdtsc_precise
();
for
(
enq
=
0
,
deq
=
0
;
enq
<
num_ops
;)
{
num_to_enq
=
burst_sz
;
if
(
unlikely
(
num_ops
-
enq
<
num_to_enq
))
num_to_enq
=
num_ops
-
enq
;
enq
+=
rte_bbdev_enqueue_dec_ops
(
tp
->
dev_id
,
queue_id
,
&
ops_enq
[
enq
],
num_to_enq
);
deq
+=
rte_bbdev_dequeue_dec_ops
(
tp
->
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
enq
-
deq
);
}
/* dequeue the remaining */
while
(
deq
<
enq
)
{
deq
+=
rte_bbdev_dequeue_dec_ops
(
tp
->
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
enq
-
deq
);
}
total_time
+=
rte_rdtsc_precise
()
-
start_time
;
}
tp
->
iter_count
=
0
;
/* get the max of iter_count for all dequeued ops */
for
(
i
=
0
;
i
<
num_ops
;
++
i
)
{
tp
->
iter_count
=
RTE_MAX
(
ops_enq
[
i
]
->
turbo_dec
.
iter_count
,
tp
->
iter_count
);
}
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
{
ret
=
validate_dec_op
(
ops_deq
,
num_ops
,
ref_op
,
tp
->
op_params
->
vector_mask
);
TEST_ASSERT_SUCCESS
(
ret
,
"Validation failed!"
);
}
rte_bbdev_dec_op_free_bulk
(
ops_enq
,
num_ops
);
double
tb_len_bits
=
calc_dec_TB_size
(
ref_op
);
tp
->
ops_per_sec
=
((
double
)
num_ops
*
TEST_REPETITIONS
)
/
((
double
)
total_time
/
(
double
)
rte_get_tsc_hz
());
tp
->
mbps
=
(((
double
)(
num_ops
*
TEST_REPETITIONS
*
tb_len_bits
))
/
1000000
.
0
)
/
((
double
)
total_time
/
(
double
)
rte_get_tsc_hz
());
return
TEST_SUCCESS
;
}
static
int
bler_pmd_lcore_ldpc_dec
(
void
*
arg
)
{
struct
thread_params
*
tp
=
arg
;
uint16_t
enq
,
deq
;
uint64_t
total_time
=
0
,
start_time
;
const
uint16_t
queue_id
=
tp
->
queue_id
;
const
uint16_t
burst_sz
=
tp
->
op_params
->
burst_sz
;
const
uint16_t
num_ops
=
tp
->
op_params
->
num_to_process
;
struct
rte_bbdev_dec_op
*
ops_enq
[
num_ops
];
struct
rte_bbdev_dec_op
*
ops_deq
[
num_ops
];
struct
rte_bbdev_dec_op
*
ref_op
=
tp
->
op_params
->
ref_dec_op
;
struct
test_buffers
*
bufs
=
NULL
;
int
i
,
j
,
ret
;
float
parity_bler
=
0
;
struct
rte_bbdev_info
info
;
uint16_t
num_to_enq
;
bool
extDdr
=
check_bit
(
ldpc_cap_flags
,
RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE
);
bool
loopback
=
check_bit
(
ref_op
->
ldpc_dec
.
op_flags
,
RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK
);
bool
hc_out
=
check_bit
(
ref_op
->
ldpc_dec
.
op_flags
,
RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE
);
TEST_ASSERT_SUCCESS
((
burst_sz
>
MAX_BURST
),
"BURST_SIZE should be <= %u"
,
MAX_BURST
);
rte_bbdev_info_get
(
tp
->
dev_id
,
&
info
);
TEST_ASSERT_SUCCESS
((
num_ops
>
info
.
drv
.
queue_size_lim
),
"NUM_OPS cannot exceed %u for this device"
,
info
.
drv
.
queue_size_lim
);
bufs
=
&
tp
->
op_params
->
q_bufs
[
GET_SOCKET
(
info
.
socket_id
)][
queue_id
];
while
(
rte_atomic16_read
(
&
tp
->
op_params
->
sync
)
==
SYNC_WAIT
)
rte_pause
();
ret
=
rte_bbdev_dec_op_alloc_bulk
(
tp
->
op_params
->
mp
,
ops_enq
,
num_ops
);
TEST_ASSERT_SUCCESS
(
ret
,
"Allocation failed for %d ops"
,
num_ops
);
/* For BLER tests we need to enable early termination */
if
(
!
check_bit
(
ref_op
->
ldpc_dec
.
op_flags
,
RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE
))
ref_op
->
ldpc_dec
.
op_flags
+=
RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE
;
ref_op
->
ldpc_dec
.
iter_max
=
get_iter_max
();
ref_op
->
ldpc_dec
.
iter_count
=
ref_op
->
ldpc_dec
.
iter_max
;
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
copy_reference_ldpc_dec_op
(
ops_enq
,
num_ops
,
0
,
bufs
->
inputs
,
bufs
->
hard_outputs
,
bufs
->
soft_outputs
,
bufs
->
harq_inputs
,
bufs
->
harq_outputs
,
ref_op
);
generate_llr_input
(
num_ops
,
bufs
->
inputs
,
ref_op
);
/* Set counter to validate the ordering */
for
(
j
=
0
;
j
<
num_ops
;
++
j
)
ops_enq
[
j
]
->
opaque_data
=
(
void
*
)(
uintptr_t
)
j
;
for
(
i
=
0
;
i
<
1
;
++
i
)
{
/* Could add more iterations */
for
(
j
=
0
;
j
<
num_ops
;
++
j
)
{
if
(
!
loopback
)
mbuf_reset
(
ops_enq
[
j
]
->
ldpc_dec
.
hard_output
.
data
);
if
(
hc_out
||
loopback
)
mbuf_reset
(
ops_enq
[
j
]
->
ldpc_dec
.
harq_combined_output
.
data
);
}
if
(
extDdr
)
{
bool
preload
=
i
==
(
TEST_REPETITIONS
-
1
);
preload_harq_ddr
(
tp
->
dev_id
,
queue_id
,
ops_enq
,
num_ops
,
preload
);
}
start_time
=
rte_rdtsc_precise
();
for
(
enq
=
0
,
deq
=
0
;
enq
<
num_ops
;)
{
num_to_enq
=
burst_sz
;
if
(
unlikely
(
num_ops
-
enq
<
num_to_enq
))
num_to_enq
=
num_ops
-
enq
;
enq
+=
rte_bbdev_enqueue_ldpc_dec_ops
(
tp
->
dev_id
,
queue_id
,
&
ops_enq
[
enq
],
num_to_enq
);
deq
+=
rte_bbdev_dequeue_ldpc_dec_ops
(
tp
->
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
enq
-
deq
);
}
/* dequeue the remaining */
while
(
deq
<
enq
)
{
deq
+=
rte_bbdev_dequeue_ldpc_dec_ops
(
tp
->
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
enq
-
deq
);
}
total_time
+=
rte_rdtsc_precise
()
-
start_time
;
}
tp
->
iter_count
=
0
;
tp
->
iter_average
=
0
;
/* get the max of iter_count for all dequeued ops */
for
(
i
=
0
;
i
<
num_ops
;
++
i
)
{
tp
->
iter_count
=
RTE_MAX
(
ops_enq
[
i
]
->
ldpc_dec
.
iter_count
,
tp
->
iter_count
);
tp
->
iter_average
+=
(
double
)
ops_enq
[
i
]
->
ldpc_dec
.
iter_count
;
if
(
ops_enq
[
i
]
->
status
&
(
1
<<
RTE_BBDEV_SYNDROME_ERROR
))
parity_bler
+=
1
.
0
;
}
parity_bler
/=
num_ops
;
/* This one is based on SYND */
tp
->
iter_average
/=
num_ops
;
tp
->
bler
=
(
double
)
validate_ldpc_bler
(
ops_deq
,
num_ops
)
/
num_ops
;
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
&&
tp
->
bler
==
0
&&
parity_bler
==
0
&&
!
hc_out
)
{
ret
=
validate_ldpc_dec_op
(
ops_deq
,
num_ops
,
ref_op
,
tp
->
op_params
->
vector_mask
);
TEST_ASSERT_SUCCESS
(
ret
,
"Validation failed!"
);
}
rte_bbdev_dec_op_free_bulk
(
ops_enq
,
num_ops
);
double
tb_len_bits
=
calc_ldpc_dec_TB_size
(
ref_op
);
tp
->
ops_per_sec
=
((
double
)
num_ops
*
1
)
/
((
double
)
total_time
/
(
double
)
rte_get_tsc_hz
());
tp
->
mbps
=
(((
double
)(
num_ops
*
1
*
tb_len_bits
))
/
1000000
.
0
)
/
((
double
)
total_time
/
(
double
)
rte_get_tsc_hz
());
return
TEST_SUCCESS
;
}
static
int
throughput_pmd_lcore_ldpc_dec
(
void
*
arg
)
{
struct
thread_params
*
tp
=
arg
;
uint16_t
enq
,
deq
;
uint64_t
total_time
=
0
,
start_time
;
const
uint16_t
queue_id
=
tp
->
queue_id
;
const
uint16_t
burst_sz
=
tp
->
op_params
->
burst_sz
;
const
uint16_t
num_ops
=
tp
->
op_params
->
num_to_process
;
struct
rte_bbdev_dec_op
*
ops_enq
[
num_ops
];
struct
rte_bbdev_dec_op
*
ops_deq
[
num_ops
];
struct
rte_bbdev_dec_op
*
ref_op
=
tp
->
op_params
->
ref_dec_op
;
struct
test_buffers
*
bufs
=
NULL
;
int
i
,
j
,
ret
;
struct
rte_bbdev_info
info
;
uint16_t
num_to_enq
;
struct
rte_bbdev_op_data
*
hard_output
;
struct
rte_bbdev_op_ldpc_dec
*
ops_td
;
bool
extDdr
=
check_bit
(
ldpc_cap_flags
,
RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE
);
bool
loopback
=
check_bit
(
ref_op
->
ldpc_dec
.
op_flags
,
RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK
);
bool
hc_out
=
check_bit
(
ref_op
->
ldpc_dec
.
op_flags
,
RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE
);
TEST_ASSERT_SUCCESS
((
burst_sz
>
MAX_BURST
),
"BURST_SIZE should be <= %u"
,
MAX_BURST
);
rte_bbdev_info_get
(
tp
->
dev_id
,
&
info
);
TEST_ASSERT_SUCCESS
((
num_ops
>
info
.
drv
.
queue_size_lim
),
"NUM_OPS cannot exceed %u for this device"
,
info
.
drv
.
queue_size_lim
);
bufs
=
&
tp
->
op_params
->
q_bufs
[
GET_SOCKET
(
info
.
socket_id
)][
queue_id
];
//&op_params->q_bufs[socket_id][queue_id].inputs
//printf("bufs len %d\n",bufs->input.data->data_len);
while
(
rte_atomic16_read
(
&
tp
->
op_params
->
sync
)
==
SYNC_WAIT
)
rte_pause
();
ret
=
rte_bbdev_dec_op_alloc_bulk
(
tp
->
op_params
->
mp
,
ops_enq
,
num_ops
);
TEST_ASSERT_SUCCESS
(
ret
,
"Allocation failed for %d ops"
,
num_ops
);
/* For throughput tests we need to disable early termination */
if
(
check_bit
(
ref_op
->
ldpc_dec
.
op_flags
,
RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE
))
ref_op
->
ldpc_dec
.
op_flags
-=
RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE
;
ref_op
->
ldpc_dec
.
iter_max
=
get_iter_max
();
ref_op
->
ldpc_dec
.
iter_count
=
ref_op
->
ldpc_dec
.
iter_max
;
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
copy_reference_ldpc_dec_op
(
ops_enq
,
num_ops
,
0
,
bufs
->
inputs
,
bufs
->
hard_outputs
,
bufs
->
soft_outputs
,
bufs
->
harq_inputs
,
bufs
->
harq_outputs
,
ref_op
);
/* Set counter to validate the ordering */
for
(
j
=
0
;
j
<
num_ops
;
++
j
)
ops_enq
[
j
]
->
opaque_data
=
(
void
*
)(
uintptr_t
)
j
;
for
(
i
=
0
;
i
<
TEST_REPETITIONS
;
++
i
)
{
for
(
j
=
0
;
j
<
num_ops
;
++
j
)
{
if
(
!
loopback
)
mbuf_reset
(
ops_enq
[
j
]
->
ldpc_dec
.
hard_output
.
data
);
if
(
hc_out
||
loopback
)
mbuf_reset
(
ops_enq
[
j
]
->
ldpc_dec
.
harq_combined_output
.
data
);
}
if
(
extDdr
)
{
bool
preload
=
i
==
(
TEST_REPETITIONS
-
1
);
preload_harq_ddr
(
tp
->
dev_id
,
queue_id
,
ops_enq
,
num_ops
,
preload
);
}
start_time
=
rte_rdtsc_precise
();
for
(
enq
=
0
,
deq
=
0
;
enq
<
num_ops
;)
{
num_to_enq
=
burst_sz
;
if
(
unlikely
(
num_ops
-
enq
<
num_to_enq
))
num_to_enq
=
num_ops
-
enq
;
//printf("pmd lcore ldpc dec data %x\n", *ops_enq[enq]->ldpc_dec.input.addr);
enq
+=
rte_bbdev_enqueue_ldpc_dec_ops
(
tp
->
dev_id
,
queue_id
,
&
ops_enq
[
enq
],
num_to_enq
);
deq
+=
rte_bbdev_dequeue_ldpc_dec_ops
(
tp
->
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
enq
-
deq
);
/* ops_td = &ops_deq[enq]->ldpc_dec;
hard_output = &ops_td->hard_output;
struct rte_mbuf *m = hard_output->data;
printf("deq nb segs %d\n", m->nb_segs);
*/
}
/* dequeue the remaining */
while
(
deq
<
enq
)
{
deq
+=
rte_bbdev_dequeue_ldpc_dec_ops
(
tp
->
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
enq
-
deq
);
}
total_time
+=
rte_rdtsc_precise
()
-
start_time
;
}
tp
->
iter_count
=
0
;
/* get the max of iter_count for all dequeued ops */
for
(
i
=
0
;
i
<
num_ops
;
++
i
)
{
tp
->
iter_count
=
RTE_MAX
(
ops_enq
[
i
]
->
ldpc_dec
.
iter_count
,
tp
->
iter_count
);
}
if
(
extDdr
)
{
/* Read loopback is not thread safe */
retrieve_harq_ddr
(
tp
->
dev_id
,
queue_id
,
ops_enq
,
num_ops
);
}
//if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
//printf("op type != OP NONE\n");
ret
=
validate_ldpc_dec_op
(
ops_deq
,
num_ops
,
ref_op
,
tp
->
op_params
->
vector_mask
);
TEST_ASSERT_SUCCESS
(
ret
,
"Validation failed!"
);
//}
rte_bbdev_dec_op_free_bulk
(
ops_enq
,
num_ops
);
double
tb_len_bits
=
calc_ldpc_dec_TB_size
(
ref_op
);
tp
->
ops_per_sec
=
((
double
)
num_ops
*
TEST_REPETITIONS
)
/
((
double
)
total_time
/
(
double
)
rte_get_tsc_hz
());
tp
->
mbps
=
(((
double
)(
num_ops
*
TEST_REPETITIONS
*
tb_len_bits
))
/
1000000
.
0
)
/
((
double
)
total_time
/
(
double
)
rte_get_tsc_hz
());
return
TEST_SUCCESS
;
}
static
int
throughput_pmd_lcore_enc
(
void
*
arg
)
{
struct
thread_params
*
tp
=
arg
;
uint16_t
enq
,
deq
;
uint64_t
total_time
=
0
,
start_time
;
const
uint16_t
queue_id
=
tp
->
queue_id
;
const
uint16_t
burst_sz
=
tp
->
op_params
->
burst_sz
;
const
uint16_t
num_ops
=
tp
->
op_params
->
num_to_process
;
struct
rte_bbdev_enc_op
*
ops_enq
[
num_ops
];
struct
rte_bbdev_enc_op
*
ops_deq
[
num_ops
];
struct
rte_bbdev_enc_op
*
ref_op
=
tp
->
op_params
->
ref_enc_op
;
struct
test_buffers
*
bufs
=
NULL
;
int
i
,
j
,
ret
;
struct
rte_bbdev_info
info
;
uint16_t
num_to_enq
;
TEST_ASSERT_SUCCESS
((
burst_sz
>
MAX_BURST
),
"BURST_SIZE should be <= %u"
,
MAX_BURST
);
rte_bbdev_info_get
(
tp
->
dev_id
,
&
info
);
TEST_ASSERT_SUCCESS
((
num_ops
>
info
.
drv
.
queue_size_lim
),
"NUM_OPS cannot exceed %u for this device"
,
info
.
drv
.
queue_size_lim
);
bufs
=
&
tp
->
op_params
->
q_bufs
[
GET_SOCKET
(
info
.
socket_id
)][
queue_id
];
while
(
rte_atomic16_read
(
&
tp
->
op_params
->
sync
)
==
SYNC_WAIT
)
rte_pause
();
ret
=
rte_bbdev_enc_op_alloc_bulk
(
tp
->
op_params
->
mp
,
ops_enq
,
num_ops
);
TEST_ASSERT_SUCCESS
(
ret
,
"Allocation failed for %d ops"
,
num_ops
);
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
copy_reference_enc_op
(
ops_enq
,
num_ops
,
0
,
bufs
->
inputs
,
bufs
->
hard_outputs
,
ref_op
);
/* Set counter to validate the ordering */
for
(
j
=
0
;
j
<
num_ops
;
++
j
)
ops_enq
[
j
]
->
opaque_data
=
(
void
*
)(
uintptr_t
)
j
;
for
(
i
=
0
;
i
<
TEST_REPETITIONS
;
++
i
)
{
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
for
(
j
=
0
;
j
<
num_ops
;
++
j
)
mbuf_reset
(
ops_enq
[
j
]
->
turbo_enc
.
output
.
data
);
start_time
=
rte_rdtsc_precise
();
for
(
enq
=
0
,
deq
=
0
;
enq
<
num_ops
;)
{
num_to_enq
=
burst_sz
;
if
(
unlikely
(
num_ops
-
enq
<
num_to_enq
))
num_to_enq
=
num_ops
-
enq
;
enq
+=
rte_bbdev_enqueue_enc_ops
(
tp
->
dev_id
,
queue_id
,
&
ops_enq
[
enq
],
num_to_enq
);
deq
+=
rte_bbdev_dequeue_enc_ops
(
tp
->
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
enq
-
deq
);
}
/* dequeue the remaining */
while
(
deq
<
enq
)
{
deq
+=
rte_bbdev_dequeue_enc_ops
(
tp
->
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
enq
-
deq
);
}
total_time
+=
rte_rdtsc_precise
()
-
start_time
;
}
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
{
ret
=
validate_enc_op
(
ops_deq
,
num_ops
,
ref_op
);
TEST_ASSERT_SUCCESS
(
ret
,
"Validation failed!"
);
}
rte_bbdev_enc_op_free_bulk
(
ops_enq
,
num_ops
);
double
tb_len_bits
=
calc_enc_TB_size
(
ref_op
);
tp
->
ops_per_sec
=
((
double
)
num_ops
*
TEST_REPETITIONS
)
/
((
double
)
total_time
/
(
double
)
rte_get_tsc_hz
());
tp
->
mbps
=
(((
double
)(
num_ops
*
TEST_REPETITIONS
*
tb_len_bits
))
/
1000000
.
0
)
/
((
double
)
total_time
/
(
double
)
rte_get_tsc_hz
());
return
TEST_SUCCESS
;
}
static
int
throughput_pmd_lcore_ldpc_enc
(
void
*
arg
)
{
struct
thread_params
*
tp
=
arg
;
uint16_t
enq
,
deq
;
uint64_t
total_time
=
0
,
start_time
;
const
uint16_t
queue_id
=
tp
->
queue_id
;
const
uint16_t
burst_sz
=
tp
->
op_params
->
burst_sz
;
const
uint16_t
num_ops
=
tp
->
op_params
->
num_to_process
;
struct
rte_bbdev_enc_op
*
ops_enq
[
num_ops
];
struct
rte_bbdev_enc_op
*
ops_deq
[
num_ops
];
struct
rte_bbdev_enc_op
*
ref_op
=
tp
->
op_params
->
ref_enc_op
;
struct
test_buffers
*
bufs
=
NULL
;
int
i
,
j
,
ret
;
struct
rte_bbdev_info
info
;
uint16_t
num_to_enq
;
TEST_ASSERT_SUCCESS
((
burst_sz
>
MAX_BURST
),
"BURST_SIZE should be <= %u"
,
MAX_BURST
);
rte_bbdev_info_get
(
tp
->
dev_id
,
&
info
);
TEST_ASSERT_SUCCESS
((
num_ops
>
info
.
drv
.
queue_size_lim
),
"NUM_OPS cannot exceed %u for this device"
,
info
.
drv
.
queue_size_lim
);
bufs
=
&
tp
->
op_params
->
q_bufs
[
GET_SOCKET
(
info
.
socket_id
)][
queue_id
];
while
(
rte_atomic16_read
(
&
tp
->
op_params
->
sync
)
==
SYNC_WAIT
)
rte_pause
();
ret
=
rte_bbdev_enc_op_alloc_bulk
(
tp
->
op_params
->
mp
,
ops_enq
,
num_ops
);
TEST_ASSERT_SUCCESS
(
ret
,
"Allocation failed for %d ops"
,
num_ops
);
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
copy_reference_ldpc_enc_op
(
ops_enq
,
num_ops
,
0
,
bufs
->
inputs
,
bufs
->
hard_outputs
,
ref_op
);
/* Set counter to validate the ordering */
for
(
j
=
0
;
j
<
num_ops
;
++
j
)
ops_enq
[
j
]
->
opaque_data
=
(
void
*
)(
uintptr_t
)
j
;
for
(
i
=
0
;
i
<
TEST_REPETITIONS
;
++
i
)
{
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
for
(
j
=
0
;
j
<
num_ops
;
++
j
)
mbuf_reset
(
ops_enq
[
j
]
->
turbo_enc
.
output
.
data
);
start_time
=
rte_rdtsc_precise
();
for
(
enq
=
0
,
deq
=
0
;
enq
<
num_ops
;)
{
num_to_enq
=
burst_sz
;
if
(
unlikely
(
num_ops
-
enq
<
num_to_enq
))
num_to_enq
=
num_ops
-
enq
;
enq
+=
rte_bbdev_enqueue_ldpc_enc_ops
(
tp
->
dev_id
,
queue_id
,
&
ops_enq
[
enq
],
num_to_enq
);
deq
+=
rte_bbdev_dequeue_ldpc_enc_ops
(
tp
->
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
enq
-
deq
);
}
/* dequeue the remaining */
while
(
deq
<
enq
)
{
deq
+=
rte_bbdev_dequeue_ldpc_enc_ops
(
tp
->
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
enq
-
deq
);
}
total_time
+=
rte_rdtsc_precise
()
-
start_time
;
}
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
{
ret
=
validate_ldpc_enc_op
(
ops_deq
,
num_ops
,
ref_op
);
TEST_ASSERT_SUCCESS
(
ret
,
"Validation failed!"
);
}
rte_bbdev_enc_op_free_bulk
(
ops_enq
,
num_ops
);
double
tb_len_bits
=
calc_ldpc_enc_TB_size
(
ref_op
);
tp
->
ops_per_sec
=
((
double
)
num_ops
*
TEST_REPETITIONS
)
/
((
double
)
total_time
/
(
double
)
rte_get_tsc_hz
());
tp
->
mbps
=
(((
double
)(
num_ops
*
TEST_REPETITIONS
*
tb_len_bits
))
/
1000000
.
0
)
/
((
double
)
total_time
/
(
double
)
rte_get_tsc_hz
());
return
TEST_SUCCESS
;
}
static
void
print_enc_throughput
(
struct
thread_params
*
t_params
,
unsigned
int
used_cores
)
{
unsigned
int
iter
=
0
;
double
total_mops
=
0
,
total_mbps
=
0
;
for
(
iter
=
0
;
iter
<
used_cores
;
iter
++
)
{
printf
(
"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps
\n
"
,
t_params
[
iter
].
lcore_id
,
t_params
[
iter
].
ops_per_sec
,
t_params
[
iter
].
mbps
);
total_mops
+=
t_params
[
iter
].
ops_per_sec
;
total_mbps
+=
t_params
[
iter
].
mbps
;
}
printf
(
"
\n
Total throughput for %u cores: %.8lg MOPS, %.8lg Mbps
\n
"
,
used_cores
,
total_mops
,
total_mbps
);
}
/* Aggregate the performance results over the number of cores used */
static
void
print_dec_throughput
(
struct
thread_params
*
t_params
,
unsigned
int
used_cores
)
{
unsigned
int
core_idx
=
0
;
double
total_mops
=
0
,
total_mbps
=
0
;
uint8_t
iter_count
=
0
;
for
(
core_idx
=
0
;
core_idx
<
used_cores
;
core_idx
++
)
{
printf
(
"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps @ max %u iterations
\n
"
,
t_params
[
core_idx
].
lcore_id
,
t_params
[
core_idx
].
ops_per_sec
,
t_params
[
core_idx
].
mbps
,
t_params
[
core_idx
].
iter_count
);
total_mops
+=
t_params
[
core_idx
].
ops_per_sec
;
total_mbps
+=
t_params
[
core_idx
].
mbps
;
iter_count
=
RTE_MAX
(
iter_count
,
t_params
[
core_idx
].
iter_count
);
}
printf
(
"
\n
Total throughput for %u cores: %.8lg MOPS, %.8lg Mbps @ max %u iterations
\n
"
,
used_cores
,
total_mops
,
total_mbps
,
iter_count
);
}
/* Aggregate the performance results over the number of cores used */
static
void
print_dec_bler
(
struct
thread_params
*
t_params
,
unsigned
int
used_cores
)
{
unsigned
int
core_idx
=
0
;
double
total_mbps
=
0
,
total_bler
=
0
,
total_iter
=
0
;
double
snr
=
get_snr
();
for
(
core_idx
=
0
;
core_idx
<
used_cores
;
core_idx
++
)
{
printf
(
"Core%u BLER %.1f %% - Iters %.1f - Tp %.1f Mbps %s
\n
"
,
t_params
[
core_idx
].
lcore_id
,
t_params
[
core_idx
].
bler
*
100
,
t_params
[
core_idx
].
iter_average
,
t_params
[
core_idx
].
mbps
,
get_vector_filename
());
total_mbps
+=
t_params
[
core_idx
].
mbps
;
total_bler
+=
t_params
[
core_idx
].
bler
;
total_iter
+=
t_params
[
core_idx
].
iter_average
;
}
total_bler
/=
used_cores
;
total_iter
/=
used_cores
;
printf
(
"SNR %.2f BLER %.1f %% - Iterations %.1f %d - Tp %.1f Mbps %s
\n
"
,
snr
,
total_bler
*
100
,
total_iter
,
get_iter_max
(),
total_mbps
,
get_vector_filename
());
}
/*
* Test function that determines BLER wireless performance
*/
static
int
bler_test
(
struct
active_device
*
ad
,
struct
test_op_params
*
op_params
)
{
int
ret
;
unsigned
int
lcore_id
,
used_cores
=
0
;
struct
thread_params
*
t_params
;
struct
rte_bbdev_info
info
;
lcore_function_t
*
bler_function
;
uint16_t
num_lcores
;
const
char
*
op_type_str
;
rte_bbdev_info_get
(
ad
->
dev_id
,
&
info
);
op_type_str
=
rte_bbdev_op_type_str
(
test_vector
.
op_type
);
TEST_ASSERT_NOT_NULL
(
op_type_str
,
"Invalid op type: %u"
,
test_vector
.
op_type
);
printf
(
"+ ------------------------------------------------------- +
\n
"
);
printf
(
"== test: bler
\n
dev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg
\n
"
,
info
.
dev_name
,
ad
->
nb_queues
,
op_params
->
burst_sz
,
op_params
->
num_to_process
,
op_params
->
num_lcores
,
op_type_str
,
intr_enabled
?
"Interrupt mode"
:
"PMD mode"
,
(
double
)
rte_get_tsc_hz
()
/
1000000000
.
0
);
/* Set number of lcores */
num_lcores
=
(
ad
->
nb_queues
<
(
op_params
->
num_lcores
))
?
ad
->
nb_queues
:
op_params
->
num_lcores
;
/* Allocate memory for thread parameters structure */
t_params
=
rte_zmalloc
(
NULL
,
num_lcores
*
sizeof
(
struct
thread_params
),
RTE_CACHE_LINE_SIZE
);
TEST_ASSERT_NOT_NULL
(
t_params
,
"Failed to alloc %zuB for t_params"
,
RTE_ALIGN
(
sizeof
(
struct
thread_params
)
*
num_lcores
,
RTE_CACHE_LINE_SIZE
));
if
(
test_vector
.
op_type
==
RTE_BBDEV_OP_LDPC_DEC
)
bler_function
=
bler_pmd_lcore_ldpc_dec
;
else
return
TEST_SKIPPED
;
rte_atomic16_set
(
&
op_params
->
sync
,
SYNC_WAIT
);
/* Master core is set at first entry */
t_params
[
0
].
dev_id
=
ad
->
dev_id
;
t_params
[
0
].
lcore_id
=
rte_lcore_id
();
t_params
[
0
].
op_params
=
op_params
;
t_params
[
0
].
queue_id
=
ad
->
queue_ids
[
used_cores
++
];
t_params
[
0
].
iter_count
=
0
;
RTE_LCORE_FOREACH_SLAVE
(
lcore_id
)
{
if
(
used_cores
>=
num_lcores
)
break
;
t_params
[
used_cores
].
dev_id
=
ad
->
dev_id
;
t_params
[
used_cores
].
lcore_id
=
lcore_id
;
t_params
[
used_cores
].
op_params
=
op_params
;
t_params
[
used_cores
].
queue_id
=
ad
->
queue_ids
[
used_cores
];
t_params
[
used_cores
].
iter_count
=
0
;
rte_eal_remote_launch
(
bler_function
,
&
t_params
[
used_cores
++
],
lcore_id
);
}
rte_atomic16_set
(
&
op_params
->
sync
,
SYNC_START
);
ret
=
bler_function
(
&
t_params
[
0
]);
/* Master core is always used */
for
(
used_cores
=
1
;
used_cores
<
num_lcores
;
used_cores
++
)
ret
|=
rte_eal_wait_lcore
(
t_params
[
used_cores
].
lcore_id
);
print_dec_bler
(
t_params
,
num_lcores
);
/* Return if test failed */
if
(
ret
)
{
rte_free
(
t_params
);
return
ret
;
}
/* Function to print something here*/
rte_free
(
t_params
);
return
ret
;
}
/*
* Test function that determines how long an enqueue + dequeue of a burst
* takes on available lcores.
*/
static
int
throughput_test
(
struct
active_device
*
ad
,
struct
test_op_params
*
op_params
)
{
int
ret
;
unsigned
int
lcore_id
,
used_cores
=
0
;
struct
thread_params
*
t_params
,
*
tp
;
struct
rte_bbdev_info
info
;
lcore_function_t
*
throughput_function
;
uint16_t
num_lcores
;
const
char
*
op_type_str
;
rte_bbdev_info_get
(
ad
->
dev_id
,
&
info
);
op_type_str
=
rte_bbdev_op_type_str
(
test_vector
.
op_type
);
TEST_ASSERT_NOT_NULL
(
op_type_str
,
"Invalid op type: %u"
,
test_vector
.
op_type
);
printf
(
"+ ------------------------------------------------------- +
\n
"
);
printf
(
"== new test: throughput
\n
dev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg
\n
"
,
info
.
dev_name
,
ad
->
nb_queues
,
op_params
->
burst_sz
,
op_params
->
num_to_process
,
op_params
->
num_lcores
,
op_type_str
,
intr_enabled
?
"Interrupt mode"
:
"PMD mode"
,
(
double
)
rte_get_tsc_hz
()
/
1000000000
.
0
);
/* Set number of lcores */
num_lcores
=
(
ad
->
nb_queues
<
(
op_params
->
num_lcores
))
?
ad
->
nb_queues
:
op_params
->
num_lcores
;
/* Allocate memory for thread parameters structure */
t_params
=
rte_zmalloc
(
NULL
,
num_lcores
*
sizeof
(
struct
thread_params
),
RTE_CACHE_LINE_SIZE
);
TEST_ASSERT_NOT_NULL
(
t_params
,
"Failed to alloc %zuB for t_params"
,
RTE_ALIGN
(
sizeof
(
struct
thread_params
)
*
num_lcores
,
RTE_CACHE_LINE_SIZE
));
if
(
intr_enabled
)
{
if
(
test_vector
.
op_type
==
RTE_BBDEV_OP_TURBO_DEC
)
throughput_function
=
throughput_intr_lcore_dec
;
else
if
(
test_vector
.
op_type
==
RTE_BBDEV_OP_LDPC_DEC
)
throughput_function
=
throughput_intr_lcore_ldpc_dec
;
else
if
(
test_vector
.
op_type
==
RTE_BBDEV_OP_TURBO_ENC
)
throughput_function
=
throughput_intr_lcore_enc
;
else
if
(
test_vector
.
op_type
==
RTE_BBDEV_OP_LDPC_ENC
)
throughput_function
=
throughput_intr_lcore_ldpc_enc
;
else
throughput_function
=
throughput_intr_lcore_enc
;
/* Dequeue interrupt callback registration */
ret
=
rte_bbdev_callback_register
(
ad
->
dev_id
,
RTE_BBDEV_EVENT_DEQUEUE
,
dequeue_event_callback
,
t_params
);
if
(
ret
<
0
)
{
rte_free
(
t_params
);
return
ret
;
}
}
else
{
if
(
test_vector
.
op_type
==
RTE_BBDEV_OP_TURBO_DEC
)
throughput_function
=
throughput_pmd_lcore_dec
;
else
if
(
test_vector
.
op_type
==
RTE_BBDEV_OP_LDPC_DEC
)
throughput_function
=
throughput_pmd_lcore_ldpc_dec
;
else
if
(
test_vector
.
op_type
==
RTE_BBDEV_OP_TURBO_ENC
)
throughput_function
=
throughput_pmd_lcore_enc
;
else
if
(
test_vector
.
op_type
==
RTE_BBDEV_OP_LDPC_ENC
)
throughput_function
=
throughput_pmd_lcore_ldpc_enc
;
else
throughput_function
=
throughput_pmd_lcore_enc
;
}
rte_atomic16_set
(
&
op_params
->
sync
,
SYNC_WAIT
);
/* Master core is set at first entry */
t_params
[
0
].
dev_id
=
ad
->
dev_id
;
t_params
[
0
].
lcore_id
=
rte_lcore_id
();
t_params
[
0
].
op_params
=
op_params
;
t_params
[
0
].
queue_id
=
ad
->
queue_ids
[
used_cores
++
];
t_params
[
0
].
iter_count
=
0
;
RTE_LCORE_FOREACH_SLAVE
(
lcore_id
)
{
if
(
used_cores
>=
num_lcores
)
break
;
t_params
[
used_cores
].
dev_id
=
ad
->
dev_id
;
t_params
[
used_cores
].
lcore_id
=
lcore_id
;
t_params
[
used_cores
].
op_params
=
op_params
;
t_params
[
used_cores
].
queue_id
=
ad
->
queue_ids
[
used_cores
];
t_params
[
used_cores
].
iter_count
=
0
;
rte_eal_remote_launch
(
throughput_pmd_lcore_ldpc_dec
,
&
t_params
[
used_cores
++
],
lcore_id
);
}
rte_atomic16_set
(
&
op_params
->
sync
,
SYNC_START
);
ret
=
throughput_function
(
&
t_params
[
0
]);
/* Master core is always used */
for
(
used_cores
=
1
;
used_cores
<
num_lcores
;
used_cores
++
)
ret
|=
rte_eal_wait_lcore
(
t_params
[
used_cores
].
lcore_id
);
/* Return if test failed */
if
(
ret
)
{
rte_free
(
t_params
);
return
ret
;
}
/* Print throughput if interrupts are disabled and test passed */
if
(
!
intr_enabled
)
{
if
(
test_vector
.
op_type
==
RTE_BBDEV_OP_TURBO_DEC
||
test_vector
.
op_type
==
RTE_BBDEV_OP_LDPC_DEC
)
print_dec_throughput
(
t_params
,
num_lcores
);
else
print_enc_throughput
(
t_params
,
num_lcores
);
rte_free
(
t_params
);
return
ret
;
}
/* In interrupt TC we need to wait for the interrupt callback to deqeue
* all pending operations. Skip waiting for queues which reported an
* error using processing_status variable.
* Wait for master lcore operations.
*/
tp
=
&
t_params
[
0
];
while
((
rte_atomic16_read
(
&
tp
->
nb_dequeued
)
<
op_params
->
num_to_process
)
&&
(
rte_atomic16_read
(
&
tp
->
processing_status
)
!=
TEST_FAILED
))
rte_pause
();
tp
->
ops_per_sec
/=
TEST_REPETITIONS
;
tp
->
mbps
/=
TEST_REPETITIONS
;
ret
|=
(
int
)
rte_atomic16_read
(
&
tp
->
processing_status
);
/* Wait for slave lcores operations */
for
(
used_cores
=
1
;
used_cores
<
num_lcores
;
used_cores
++
)
{
tp
=
&
t_params
[
used_cores
];
while
((
rte_atomic16_read
(
&
tp
->
nb_dequeued
)
<
op_params
->
num_to_process
)
&&
(
rte_atomic16_read
(
&
tp
->
processing_status
)
!=
TEST_FAILED
))
rte_pause
();
tp
->
ops_per_sec
/=
TEST_REPETITIONS
;
tp
->
mbps
/=
TEST_REPETITIONS
;
ret
|=
(
int
)
rte_atomic16_read
(
&
tp
->
processing_status
);
}
/* Print throughput if test passed */
if
(
!
ret
)
{
if
(
test_vector
.
op_type
==
RTE_BBDEV_OP_TURBO_DEC
||
test_vector
.
op_type
==
RTE_BBDEV_OP_LDPC_DEC
)
print_dec_throughput
(
t_params
,
num_lcores
);
else
if
(
test_vector
.
op_type
==
RTE_BBDEV_OP_TURBO_ENC
||
test_vector
.
op_type
==
RTE_BBDEV_OP_LDPC_ENC
)
print_enc_throughput
(
t_params
,
num_lcores
);
}
rte_free
(
t_params
);
return
ret
;
}
static
int
latency_test_dec
(
struct
rte_mempool
*
mempool
,
struct
test_buffers
*
bufs
,
struct
rte_bbdev_dec_op
*
ref_op
,
int
vector_mask
,
uint16_t
dev_id
,
uint16_t
queue_id
,
const
uint16_t
num_to_process
,
uint16_t
burst_sz
,
uint64_t
*
total_time
,
uint64_t
*
min_time
,
uint64_t
*
max_time
)
{
int
ret
=
TEST_SUCCESS
;
uint16_t
i
,
j
,
dequeued
;
struct
rte_bbdev_dec_op
*
ops_enq
[
MAX_BURST
],
*
ops_deq
[
MAX_BURST
];
uint64_t
start_time
=
0
,
last_time
=
0
;
for
(
i
=
0
,
dequeued
=
0
;
dequeued
<
num_to_process
;
++
i
)
{
uint16_t
enq
=
0
,
deq
=
0
;
bool
first_time
=
true
;
last_time
=
0
;
if
(
unlikely
(
num_to_process
-
dequeued
<
burst_sz
))
burst_sz
=
num_to_process
-
dequeued
;
ret
=
rte_bbdev_dec_op_alloc_bulk
(
mempool
,
ops_enq
,
burst_sz
);
TEST_ASSERT_SUCCESS
(
ret
,
"rte_bbdev_dec_op_alloc_bulk() failed"
);
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
copy_reference_dec_op
(
ops_enq
,
burst_sz
,
dequeued
,
bufs
->
inputs
,
bufs
->
hard_outputs
,
bufs
->
soft_outputs
,
ref_op
);
/* Set counter to validate the ordering */
for
(
j
=
0
;
j
<
burst_sz
;
++
j
)
ops_enq
[
j
]
->
opaque_data
=
(
void
*
)(
uintptr_t
)
j
;
start_time
=
rte_rdtsc_precise
();
enq
=
rte_bbdev_enqueue_dec_ops
(
dev_id
,
queue_id
,
&
ops_enq
[
enq
],
burst_sz
);
TEST_ASSERT
(
enq
==
burst_sz
,
"Error enqueueing burst, expected %u, got %u"
,
burst_sz
,
enq
);
/* Dequeue */
do
{
deq
+=
rte_bbdev_dequeue_dec_ops
(
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
burst_sz
-
deq
);
if
(
likely
(
first_time
&&
(
deq
>
0
)))
{
last_time
=
rte_rdtsc_precise
()
-
start_time
;
first_time
=
false
;
}
}
while
(
unlikely
(
burst_sz
!=
deq
));
*
max_time
=
RTE_MAX
(
*
max_time
,
last_time
);
*
min_time
=
RTE_MIN
(
*
min_time
,
last_time
);
*
total_time
+=
last_time
;
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
{
ret
=
validate_dec_op
(
ops_deq
,
burst_sz
,
ref_op
,
vector_mask
);
TEST_ASSERT_SUCCESS
(
ret
,
"Validation failed!"
);
}
rte_bbdev_dec_op_free_bulk
(
ops_enq
,
deq
);
dequeued
+=
deq
;
}
return
i
;
}
static
int
latency_test_ldpc_dec
(
struct
rte_mempool
*
mempool
,
struct
test_buffers
*
bufs
,
struct
rte_bbdev_dec_op
*
ref_op
,
int
vector_mask
,
uint16_t
dev_id
,
uint16_t
queue_id
,
const
uint16_t
num_to_process
,
uint16_t
burst_sz
,
uint64_t
*
total_time
,
uint64_t
*
min_time
,
uint64_t
*
max_time
)
{
int
ret
=
TEST_SUCCESS
;
uint16_t
i
,
j
,
dequeued
;
struct
rte_bbdev_dec_op
*
ops_enq
[
MAX_BURST
],
*
ops_deq
[
MAX_BURST
];
uint64_t
start_time
=
0
,
last_time
=
0
;
bool
extDdr
=
ldpc_cap_flags
&
RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE
;
for
(
i
=
0
,
dequeued
=
0
;
dequeued
<
num_to_process
;
++
i
)
{
uint16_t
enq
=
0
,
deq
=
0
;
bool
first_time
=
true
;
last_time
=
0
;
if
(
unlikely
(
num_to_process
-
dequeued
<
burst_sz
))
burst_sz
=
num_to_process
-
dequeued
;
ret
=
rte_bbdev_dec_op_alloc_bulk
(
mempool
,
ops_enq
,
burst_sz
);
TEST_ASSERT_SUCCESS
(
ret
,
"rte_bbdev_dec_op_alloc_bulk() failed"
);
/* For latency tests we need to disable early termination */
if
(
check_bit
(
ref_op
->
ldpc_dec
.
op_flags
,
RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE
))
ref_op
->
ldpc_dec
.
op_flags
-=
RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE
;
ref_op
->
ldpc_dec
.
iter_max
=
get_iter_max
();
ref_op
->
ldpc_dec
.
iter_count
=
ref_op
->
ldpc_dec
.
iter_max
;
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
copy_reference_ldpc_dec_op
(
ops_enq
,
burst_sz
,
dequeued
,
bufs
->
inputs
,
bufs
->
hard_outputs
,
bufs
->
soft_outputs
,
bufs
->
harq_inputs
,
bufs
->
harq_outputs
,
ref_op
);
if
(
extDdr
)
preload_harq_ddr
(
dev_id
,
queue_id
,
ops_enq
,
burst_sz
,
true
);
/* Set counter to validate the ordering */
for
(
j
=
0
;
j
<
burst_sz
;
++
j
)
ops_enq
[
j
]
->
opaque_data
=
(
void
*
)(
uintptr_t
)
j
;
start_time
=
rte_rdtsc_precise
();
enq
=
rte_bbdev_enqueue_ldpc_dec_ops
(
dev_id
,
queue_id
,
&
ops_enq
[
enq
],
burst_sz
);
TEST_ASSERT
(
enq
==
burst_sz
,
"Error enqueueing burst, expected %u, got %u"
,
burst_sz
,
enq
);
/* Dequeue */
do
{
deq
+=
rte_bbdev_dequeue_ldpc_dec_ops
(
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
burst_sz
-
deq
);
if
(
likely
(
first_time
&&
(
deq
>
0
)))
{
last_time
=
rte_rdtsc_precise
()
-
start_time
;
first_time
=
false
;
}
}
while
(
unlikely
(
burst_sz
!=
deq
));
*
max_time
=
RTE_MAX
(
*
max_time
,
last_time
);
*
min_time
=
RTE_MIN
(
*
min_time
,
last_time
);
*
total_time
+=
last_time
;
if
(
extDdr
)
retrieve_harq_ddr
(
dev_id
,
queue_id
,
ops_enq
,
burst_sz
);
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
{
ret
=
validate_ldpc_dec_op
(
ops_deq
,
burst_sz
,
ref_op
,
vector_mask
);
TEST_ASSERT_SUCCESS
(
ret
,
"Validation failed!"
);
}
rte_bbdev_dec_op_free_bulk
(
ops_enq
,
deq
);
dequeued
+=
deq
;
}
return
i
;
}
static
int
latency_test_enc
(
struct
rte_mempool
*
mempool
,
struct
test_buffers
*
bufs
,
struct
rte_bbdev_enc_op
*
ref_op
,
uint16_t
dev_id
,
uint16_t
queue_id
,
const
uint16_t
num_to_process
,
uint16_t
burst_sz
,
uint64_t
*
total_time
,
uint64_t
*
min_time
,
uint64_t
*
max_time
)
{
int
ret
=
TEST_SUCCESS
;
uint16_t
i
,
j
,
dequeued
;
struct
rte_bbdev_enc_op
*
ops_enq
[
MAX_BURST
],
*
ops_deq
[
MAX_BURST
];
uint64_t
start_time
=
0
,
last_time
=
0
;
for
(
i
=
0
,
dequeued
=
0
;
dequeued
<
num_to_process
;
++
i
)
{
uint16_t
enq
=
0
,
deq
=
0
;
bool
first_time
=
true
;
last_time
=
0
;
if
(
unlikely
(
num_to_process
-
dequeued
<
burst_sz
))
burst_sz
=
num_to_process
-
dequeued
;
ret
=
rte_bbdev_enc_op_alloc_bulk
(
mempool
,
ops_enq
,
burst_sz
);
TEST_ASSERT_SUCCESS
(
ret
,
"rte_bbdev_enc_op_alloc_bulk() failed"
);
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
copy_reference_enc_op
(
ops_enq
,
burst_sz
,
dequeued
,
bufs
->
inputs
,
bufs
->
hard_outputs
,
ref_op
);
/* Set counter to validate the ordering */
for
(
j
=
0
;
j
<
burst_sz
;
++
j
)
ops_enq
[
j
]
->
opaque_data
=
(
void
*
)(
uintptr_t
)
j
;
start_time
=
rte_rdtsc_precise
();
enq
=
rte_bbdev_enqueue_enc_ops
(
dev_id
,
queue_id
,
&
ops_enq
[
enq
],
burst_sz
);
TEST_ASSERT
(
enq
==
burst_sz
,
"Error enqueueing burst, expected %u, got %u"
,
burst_sz
,
enq
);
/* Dequeue */
do
{
deq
+=
rte_bbdev_dequeue_enc_ops
(
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
burst_sz
-
deq
);
if
(
likely
(
first_time
&&
(
deq
>
0
)))
{
last_time
+=
rte_rdtsc_precise
()
-
start_time
;
first_time
=
false
;
}
}
while
(
unlikely
(
burst_sz
!=
deq
));
*
max_time
=
RTE_MAX
(
*
max_time
,
last_time
);
*
min_time
=
RTE_MIN
(
*
min_time
,
last_time
);
*
total_time
+=
last_time
;
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
{
ret
=
validate_enc_op
(
ops_deq
,
burst_sz
,
ref_op
);
TEST_ASSERT_SUCCESS
(
ret
,
"Validation failed!"
);
}
rte_bbdev_enc_op_free_bulk
(
ops_enq
,
deq
);
dequeued
+=
deq
;
}
return
i
;
}
static
int
latency_test_ldpc_enc
(
struct
rte_mempool
*
mempool
,
struct
test_buffers
*
bufs
,
struct
rte_bbdev_enc_op
*
ref_op
,
uint16_t
dev_id
,
uint16_t
queue_id
,
const
uint16_t
num_to_process
,
uint16_t
burst_sz
,
uint64_t
*
total_time
,
uint64_t
*
min_time
,
uint64_t
*
max_time
)
{
int
ret
=
TEST_SUCCESS
;
uint16_t
i
,
j
,
dequeued
;
struct
rte_bbdev_enc_op
*
ops_enq
[
MAX_BURST
],
*
ops_deq
[
MAX_BURST
];
uint64_t
start_time
=
0
,
last_time
=
0
;
for
(
i
=
0
,
dequeued
=
0
;
dequeued
<
num_to_process
;
++
i
)
{
uint16_t
enq
=
0
,
deq
=
0
;
bool
first_time
=
true
;
last_time
=
0
;
if
(
unlikely
(
num_to_process
-
dequeued
<
burst_sz
))
burst_sz
=
num_to_process
-
dequeued
;
ret
=
rte_bbdev_enc_op_alloc_bulk
(
mempool
,
ops_enq
,
burst_sz
);
TEST_ASSERT_SUCCESS
(
ret
,
"rte_bbdev_enc_op_alloc_bulk() failed"
);
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
copy_reference_ldpc_enc_op
(
ops_enq
,
burst_sz
,
dequeued
,
bufs
->
inputs
,
bufs
->
hard_outputs
,
ref_op
);
/* Set counter to validate the ordering */
for
(
j
=
0
;
j
<
burst_sz
;
++
j
)
ops_enq
[
j
]
->
opaque_data
=
(
void
*
)(
uintptr_t
)
j
;
start_time
=
rte_rdtsc_precise
();
enq
=
rte_bbdev_enqueue_ldpc_enc_ops
(
dev_id
,
queue_id
,
&
ops_enq
[
enq
],
burst_sz
);
TEST_ASSERT
(
enq
==
burst_sz
,
"Error enqueueing burst, expected %u, got %u"
,
burst_sz
,
enq
);
/* Dequeue */
do
{
deq
+=
rte_bbdev_dequeue_ldpc_enc_ops
(
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
burst_sz
-
deq
);
if
(
likely
(
first_time
&&
(
deq
>
0
)))
{
last_time
+=
rte_rdtsc_precise
()
-
start_time
;
first_time
=
false
;
}
}
while
(
unlikely
(
burst_sz
!=
deq
));
*
max_time
=
RTE_MAX
(
*
max_time
,
last_time
);
*
min_time
=
RTE_MIN
(
*
min_time
,
last_time
);
*
total_time
+=
last_time
;
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
{
ret
=
validate_enc_op
(
ops_deq
,
burst_sz
,
ref_op
);
TEST_ASSERT_SUCCESS
(
ret
,
"Validation failed!"
);
}
rte_bbdev_enc_op_free_bulk
(
ops_enq
,
deq
);
dequeued
+=
deq
;
}
return
i
;
}
static
int
latency_test
(
struct
active_device
*
ad
,
struct
test_op_params
*
op_params
)
{
int
iter
;
uint16_t
burst_sz
=
op_params
->
burst_sz
;
const
uint16_t
num_to_process
=
op_params
->
num_to_process
;
const
enum
rte_bbdev_op_type
op_type
=
test_vector
.
op_type
;
const
uint16_t
queue_id
=
ad
->
queue_ids
[
0
];
struct
test_buffers
*
bufs
=
NULL
;
struct
rte_bbdev_info
info
;
uint64_t
total_time
,
min_time
,
max_time
;
const
char
*
op_type_str
;
total_time
=
max_time
=
0
;
min_time
=
UINT64_MAX
;
TEST_ASSERT_SUCCESS
((
burst_sz
>
MAX_BURST
),
"BURST_SIZE should be <= %u"
,
MAX_BURST
);
rte_bbdev_info_get
(
ad
->
dev_id
,
&
info
);
bufs
=
&
op_params
->
q_bufs
[
GET_SOCKET
(
info
.
socket_id
)][
queue_id
];
op_type_str
=
rte_bbdev_op_type_str
(
op_type
);
TEST_ASSERT_NOT_NULL
(
op_type_str
,
"Invalid op type: %u"
,
op_type
);
printf
(
"+ ------------------------------------------------------- +
\n
"
);
printf
(
"== test: validation/latency
\n
dev: %s, burst size: %u, num ops: %u, op type: %s
\n
"
,
info
.
dev_name
,
burst_sz
,
num_to_process
,
op_type_str
);
if
(
op_type
==
RTE_BBDEV_OP_TURBO_DEC
)
iter
=
latency_test_dec
(
op_params
->
mp
,
bufs
,
op_params
->
ref_dec_op
,
op_params
->
vector_mask
,
ad
->
dev_id
,
queue_id
,
num_to_process
,
burst_sz
,
&
total_time
,
&
min_time
,
&
max_time
);
else
if
(
op_type
==
RTE_BBDEV_OP_TURBO_ENC
)
iter
=
latency_test_enc
(
op_params
->
mp
,
bufs
,
op_params
->
ref_enc_op
,
ad
->
dev_id
,
queue_id
,
num_to_process
,
burst_sz
,
&
total_time
,
&
min_time
,
&
max_time
);
else
if
(
op_type
==
RTE_BBDEV_OP_LDPC_ENC
)
iter
=
latency_test_ldpc_enc
(
op_params
->
mp
,
bufs
,
op_params
->
ref_enc_op
,
ad
->
dev_id
,
queue_id
,
num_to_process
,
burst_sz
,
&
total_time
,
&
min_time
,
&
max_time
);
else
if
(
op_type
==
RTE_BBDEV_OP_LDPC_DEC
)
iter
=
latency_test_ldpc_dec
(
op_params
->
mp
,
bufs
,
op_params
->
ref_dec_op
,
op_params
->
vector_mask
,
ad
->
dev_id
,
queue_id
,
num_to_process
,
burst_sz
,
&
total_time
,
&
min_time
,
&
max_time
);
else
iter
=
latency_test_enc
(
op_params
->
mp
,
bufs
,
op_params
->
ref_enc_op
,
ad
->
dev_id
,
queue_id
,
num_to_process
,
burst_sz
,
&
total_time
,
&
min_time
,
&
max_time
);
if
(
iter
<=
0
)
return
TEST_FAILED
;
double
tb_len_bits
=
calc_ldpc_enc_TB_size
(
ref_op
);
printf
(
"Operation latency:
\n
"
"
\t
avg: %lg cycles, %lg us
\n
"
"
\t
min: %lg cycles, %lg us
\n
"
"
\t
max: %lg cycles, %lg us
\n
"
,
(
double
)
total_time
/
(
double
)
iter
,
(
double
)(
total_time
*
1000000
)
/
(
double
)
iter
/
(
double
)
rte_get_tsc_hz
(),
(
double
)
min_time
,
(
double
)(
min_time
*
1000000
)
/
(
double
)
rte_get_tsc_hz
(),
(
double
)
max_time
,
(
double
)(
max_time
*
1000000
)
/
tp
->
ops_per_sec
=
((
double
)
num_ops
*
TEST_REPETITIONS
)
/
((
double
)
total_time
/
(
double
)
rte_get_tsc_hz
());
tp
->
mbps
=
(((
double
)(
num_ops
*
TEST_REPETITIONS
*
tb_len_bits
))
/
1000000
.
0
)
/
((
double
)
total_time
/
(
double
)
rte_get_tsc_hz
());
return
TEST_SUCCESS
;
}
#ifdef RTE_BBDEV_OFFLOAD_COST
static
int
get_bbdev_queue_stats
(
uint16_t
dev_id
,
uint16_t
queue_id
,
struct
rte_bbdev_stats
*
stats
)
{
struct
rte_bbdev
*
dev
=
&
rte_bbdev_devices
[
dev_id
];
struct
rte_bbdev_stats
*
q_stats
;
if
(
queue_id
>=
dev
->
data
->
num_queues
)
return
-
1
;
q_stats
=
&
dev
->
data
->
queues
[
queue_id
].
queue_stats
;
stats
->
enqueued_count
=
q_stats
->
enqueued_count
;
stats
->
dequeued_count
=
q_stats
->
dequeued_count
;
stats
->
enqueue_err_count
=
q_stats
->
enqueue_err_count
;
stats
->
dequeue_err_count
=
q_stats
->
dequeue_err_count
;
stats
->
acc_offload_cycles
=
q_stats
->
acc_offload_cycles
;
return
0
;
}
static
int
offload_latency_test_dec
(
struct
rte_mempool
*
mempool
,
struct
test_buffers
*
bufs
,
struct
rte_bbdev_dec_op
*
ref_op
,
uint16_t
dev_id
,
uint16_t
queue_id
,
const
uint16_t
num_to_process
,
uint16_t
burst_sz
,
struct
test_time_stats
*
time_st
)
{
int
i
,
dequeued
,
ret
;
struct
rte_bbdev_dec_op
*
ops_enq
[
MAX_BURST
],
*
ops_deq
[
MAX_BURST
];
uint64_t
enq_start_time
,
deq_start_time
;
uint64_t
enq_sw_last_time
,
deq_last_time
;
struct
rte_bbdev_stats
stats
;
for
(
i
=
0
,
dequeued
=
0
;
dequeued
<
num_to_process
;
++
i
)
{
uint16_t
enq
=
0
,
deq
=
0
;
if
(
unlikely
(
num_to_process
-
dequeued
<
burst_sz
))
burst_sz
=
num_to_process
-
dequeued
;
rte_bbdev_dec_op_alloc_bulk
(
mempool
,
ops_enq
,
burst_sz
);
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
copy_reference_dec_op
(
ops_enq
,
burst_sz
,
dequeued
,
bufs
->
inputs
,
bufs
->
hard_outputs
,
bufs
->
soft_outputs
,
ref_op
);
/* Start time meas for enqueue function offload latency */
enq_start_time
=
rte_rdtsc_precise
();
do
{
enq
+=
rte_bbdev_enqueue_dec_ops
(
dev_id
,
queue_id
,
&
ops_enq
[
enq
],
burst_sz
-
enq
);
}
while
(
unlikely
(
burst_sz
!=
enq
));
ret
=
get_bbdev_queue_stats
(
dev_id
,
queue_id
,
&
stats
);
TEST_ASSERT_SUCCESS
(
ret
,
"Failed to get stats for queue (%u) of device (%u)"
,
queue_id
,
dev_id
);
enq_sw_last_time
=
rte_rdtsc_precise
()
-
enq_start_time
-
stats
.
acc_offload_cycles
;
time_st
->
enq_sw_max_time
=
RTE_MAX
(
time_st
->
enq_sw_max_time
,
enq_sw_last_time
);
time_st
->
enq_sw_min_time
=
RTE_MIN
(
time_st
->
enq_sw_min_time
,
enq_sw_last_time
);
time_st
->
enq_sw_total_time
+=
enq_sw_last_time
;
time_st
->
enq_acc_max_time
=
RTE_MAX
(
time_st
->
enq_acc_max_time
,
stats
.
acc_offload_cycles
);
time_st
->
enq_acc_min_time
=
RTE_MIN
(
time_st
->
enq_acc_min_time
,
stats
.
acc_offload_cycles
);
time_st
->
enq_acc_total_time
+=
stats
.
acc_offload_cycles
;
/* give time for device to process ops */
rte_delay_us
(
200
);
/* Start time meas for dequeue function offload latency */
deq_start_time
=
rte_rdtsc_precise
();
/* Dequeue one operation */
do
{
deq
+=
rte_bbdev_dequeue_dec_ops
(
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
1
);
}
while
(
unlikely
(
deq
!=
1
));
deq_last_time
=
rte_rdtsc_precise
()
-
deq_start_time
;
time_st
->
deq_max_time
=
RTE_MAX
(
time_st
->
deq_max_time
,
deq_last_time
);
time_st
->
deq_min_time
=
RTE_MIN
(
time_st
->
deq_min_time
,
deq_last_time
);
time_st
->
deq_total_time
+=
deq_last_time
;
/* Dequeue remaining operations if needed*/
while
(
burst_sz
!=
deq
)
deq
+=
rte_bbdev_dequeue_dec_ops
(
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
burst_sz
-
deq
);
rte_bbdev_dec_op_free_bulk
(
ops_enq
,
deq
);
dequeued
+=
deq
;
}
return
i
;
}
static
int
offload_latency_test_ldpc_dec
(
struct
rte_mempool
*
mempool
,
struct
test_buffers
*
bufs
,
struct
rte_bbdev_dec_op
*
ref_op
,
uint16_t
dev_id
,
uint16_t
queue_id
,
const
uint16_t
num_to_process
,
uint16_t
burst_sz
,
struct
test_time_stats
*
time_st
)
static
void
print_enc_throughput
(
struct
thread_params
*
t_params
,
unsigned
int
used_cores
)
{
int
i
,
dequeued
,
ret
;
struct
rte_bbdev_dec_op
*
ops_enq
[
MAX_BURST
],
*
ops_deq
[
MAX_BURST
];
uint64_t
enq_start_time
,
deq_start_time
;
uint64_t
enq_sw_last_time
,
deq_last_time
;
struct
rte_bbdev_stats
stats
;
bool
extDdr
=
ldpc_cap_flags
&
RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE
;
for
(
i
=
0
,
dequeued
=
0
;
dequeued
<
num_to_process
;
++
i
)
{
uint16_t
enq
=
0
,
deq
=
0
;
if
(
unlikely
(
num_to_process
-
dequeued
<
burst_sz
))
burst_sz
=
num_to_process
-
dequeued
;
rte_bbdev_dec_op_alloc_bulk
(
mempool
,
ops_enq
,
burst_sz
);
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
copy_reference_ldpc_dec_op
(
ops_enq
,
burst_sz
,
dequeued
,
bufs
->
inputs
,
bufs
->
hard_outputs
,
bufs
->
soft_outputs
,
bufs
->
harq_inputs
,
bufs
->
harq_outputs
,
ref_op
);
if
(
extDdr
)
preload_harq_ddr
(
dev_id
,
queue_id
,
ops_enq
,
burst_sz
,
true
);
/* Start time meas for enqueue function offload latency */
enq_start_time
=
rte_rdtsc_precise
();
do
{
enq
+=
rte_bbdev_enqueue_ldpc_dec_ops
(
dev_id
,
queue_id
,
&
ops_enq
[
enq
],
burst_sz
-
enq
);
}
while
(
unlikely
(
burst_sz
!=
enq
));
enq_sw_last_time
=
rte_rdtsc_precise
()
-
enq_start_time
;
ret
=
get_bbdev_queue_stats
(
dev_id
,
queue_id
,
&
stats
);
TEST_ASSERT_SUCCESS
(
ret
,
"Failed to get stats for queue (%u) of device (%u)"
,
queue_id
,
dev_id
);
enq_sw_last_time
-=
stats
.
acc_offload_cycles
;
time_st
->
enq_sw_max_time
=
RTE_MAX
(
time_st
->
enq_sw_max_time
,
enq_sw_last_time
);
time_st
->
enq_sw_min_time
=
RTE_MIN
(
time_st
->
enq_sw_min_time
,
enq_sw_last_time
);
time_st
->
enq_sw_total_time
+=
enq_sw_last_time
;
time_st
->
enq_acc_max_time
=
RTE_MAX
(
time_st
->
enq_acc_max_time
,
stats
.
acc_offload_cycles
);
time_st
->
enq_acc_min_time
=
RTE_MIN
(
time_st
->
enq_acc_min_time
,
stats
.
acc_offload_cycles
);
time_st
->
enq_acc_total_time
+=
stats
.
acc_offload_cycles
;
/* give time for device to process ops */
rte_delay_us
(
200
);
/* Start time meas for dequeue function offload latency */
deq_start_time
=
rte_rdtsc_precise
();
/* Dequeue one operation */
do
{
deq
+=
rte_bbdev_dequeue_ldpc_dec_ops
(
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
1
);
}
while
(
unlikely
(
deq
!=
1
));
deq_last_time
=
rte_rdtsc_precise
()
-
deq_start_time
;
time_st
->
deq_max_time
=
RTE_MAX
(
time_st
->
deq_max_time
,
deq_last_time
);
time_st
->
deq_min_time
=
RTE_MIN
(
time_st
->
deq_min_time
,
deq_last_time
);
time_st
->
deq_total_time
+=
deq_last_time
;
/* Dequeue remaining operations if needed*/
while
(
burst_sz
!=
deq
)
deq
+=
rte_bbdev_dequeue_ldpc_dec_ops
(
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
burst_sz
-
deq
);
if
(
extDdr
)
{
/* Read loopback is not thread safe */
retrieve_harq_ddr
(
dev_id
,
queue_id
,
ops_enq
,
burst_sz
);
}
unsigned
int
iter
=
0
;
double
total_mops
=
0
,
total_mbps
=
0
;
rte_bbdev_dec_op_free_bulk
(
ops_enq
,
deq
);
dequeued
+=
deq
;
for
(
iter
=
0
;
iter
<
used_cores
;
iter
++
)
{
printf
(
"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps
\n
"
,
t_params
[
iter
].
lcore_id
,
t_params
[
iter
].
ops_per_sec
,
t_params
[
iter
].
mbps
);
total_mops
+=
t_params
[
iter
].
ops_per_sec
;
total_mbps
+=
t_params
[
iter
].
mbps
;
}
return
i
;
printf
(
"
\n
Total throughput for %u cores: %.8lg MOPS, %.8lg Mbps
\n
"
,
used_cores
,
total_mops
,
total_mbps
);
}
static
int
offload_latency_test_enc
(
struct
rte_mempool
*
mempool
,
struct
test_buffers
*
bufs
,
struct
rte_bbdev_enc_op
*
ref_op
,
uint16_t
dev_id
,
uint16_t
queue_id
,
const
uint16_t
num_to_process
,
uint16_t
burst_sz
,
struct
test_time_stats
*
time_st
)
/* Aggregate the performance results over the number of cores used */
static
void
print_dec_throughput
(
struct
thread_params
*
t_params
,
unsigned
int
used_cores
)
{
int
i
,
dequeued
,
ret
;
struct
rte_bbdev_enc_op
*
ops_enq
[
MAX_BURST
],
*
ops_deq
[
MAX_BURST
];
uint64_t
enq_start_time
,
deq_start_time
;
uint64_t
enq_sw_last_time
,
deq_last_time
;
struct
rte_bbdev_stats
stats
;
for
(
i
=
0
,
dequeued
=
0
;
dequeued
<
num_to_process
;
++
i
)
{
uint16_t
enq
=
0
,
deq
=
0
;
if
(
unlikely
(
num_to_process
-
dequeued
<
burst_sz
))
burst_sz
=
num_to_process
-
dequeued
;
ret
=
rte_bbdev_enc_op_alloc_bulk
(
mempool
,
ops_enq
,
burst_sz
);
TEST_ASSERT_SUCCESS
(
ret
,
"rte_bbdev_enc_op_alloc_bulk() failed"
);
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
copy_reference_enc_op
(
ops_enq
,
burst_sz
,
dequeued
,
bufs
->
inputs
,
bufs
->
hard_outputs
,
ref_op
);
/* Start time meas for enqueue function offload latency */
enq_start_time
=
rte_rdtsc_precise
();
do
{
enq
+=
rte_bbdev_enqueue_enc_ops
(
dev_id
,
queue_id
,
&
ops_enq
[
enq
],
burst_sz
-
enq
);
}
while
(
unlikely
(
burst_sz
!=
enq
));
enq_sw_last_time
=
rte_rdtsc_precise
()
-
enq_start_time
;
unsigned
int
core_idx
=
0
;
double
total_mops
=
0
,
total_mbps
=
0
;
uint8_t
iter_count
=
0
;
ret
=
get_bbdev_queue_stats
(
dev_id
,
queue_id
,
&
stats
);
TEST_ASSERT_SUCCESS
(
ret
,
"Failed to get stats for queue (%u) of device (%u)"
,
queue_id
,
dev_id
);
enq_sw_last_time
-=
stats
.
acc_offload_cycles
;
time_st
->
enq_sw_max_time
=
RTE_MAX
(
time_st
->
enq_sw_max_time
,
enq_sw_last_time
);
time_st
->
enq_sw_min_time
=
RTE_MIN
(
time_st
->
enq_sw_min_time
,
enq_sw_last_time
);
time_st
->
enq_sw_total_time
+=
enq_sw_last_time
;
time_st
->
enq_acc_max_time
=
RTE_MAX
(
time_st
->
enq_acc_max_time
,
stats
.
acc_offload_cycles
);
time_st
->
enq_acc_min_time
=
RTE_MIN
(
time_st
->
enq_acc_min_time
,
stats
.
acc_offload_cycles
);
time_st
->
enq_acc_total_time
+=
stats
.
acc_offload_cycles
;
/* give time for device to process ops */
rte_delay_us
(
200
);
/* Start time meas for dequeue function offload latency */
deq_start_time
=
rte_rdtsc_precise
();
/* Dequeue one operation */
do
{
deq
+=
rte_bbdev_dequeue_enc_ops
(
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
1
);
}
while
(
unlikely
(
deq
!=
1
));
deq_last_time
=
rte_rdtsc_precise
()
-
deq_start_time
;
time_st
->
deq_max_time
=
RTE_MAX
(
time_st
->
deq_max_time
,
deq_last_time
);
time_st
->
deq_min_time
=
RTE_MIN
(
time_st
->
deq_min_time
,
deq_last_time
);
time_st
->
deq_total_time
+=
deq_last_time
;
while
(
burst_sz
!=
deq
)
deq
+=
rte_bbdev_dequeue_enc_ops
(
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
burst_sz
-
deq
);
rte_bbdev_enc_op_free_bulk
(
ops_enq
,
deq
);
dequeued
+=
deq
;
for
(
core_idx
=
0
;
core_idx
<
used_cores
;
core_idx
++
)
{
printf
(
"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps @ max %u iterations
\n
"
,
t_params
[
core_idx
].
lcore_id
,
t_params
[
core_idx
].
ops_per_sec
,
t_params
[
core_idx
].
mbps
,
t_params
[
core_idx
].
iter_count
);
total_mops
+=
t_params
[
core_idx
].
ops_per_sec
;
total_mbps
+=
t_params
[
core_idx
].
mbps
;
iter_count
=
RTE_MAX
(
iter_count
,
t_params
[
core_idx
].
iter_count
);
}
return
i
;
printf
(
"
\n
Total throughput for %u cores: %.8lg MOPS, %.8lg Mbps @ max %u iterations
\n
"
,
used_cores
,
total_mops
,
total_mbps
,
iter_count
);
}
static
int
offload_latency_test_ldpc_enc
(
struct
rte_mempool
*
mempool
,
struct
test_buffers
*
bufs
,
struct
rte_bbdev_enc_op
*
ref_op
,
uint16_t
dev_id
,
uint16_t
queue_id
,
const
uint16_t
num_to_process
,
uint16_t
burst_sz
,
struct
test_time_stats
*
time_st
)
/* Aggregate the performance results over the number of cores used */
static
void
print_dec_bler
(
struct
thread_params
*
t_params
,
unsigned
int
used_cores
)
{
int
i
,
dequeued
,
ret
;
struct
rte_bbdev_enc_op
*
ops_enq
[
MAX_BURST
],
*
ops_deq
[
MAX_BURST
];
uint64_t
enq_start_time
,
deq_start_time
;
uint64_t
enq_sw_last_time
,
deq_last_time
;
struct
rte_bbdev_stats
stats
;
for
(
i
=
0
,
dequeued
=
0
;
dequeued
<
num_to_process
;
++
i
)
{
uint16_t
enq
=
0
,
deq
=
0
;
if
(
unlikely
(
num_to_process
-
dequeued
<
burst_sz
))
burst_sz
=
num_to_process
-
dequeued
;
unsigned
int
core_idx
=
0
;
double
total_mbps
=
0
,
total_bler
=
0
,
total_iter
=
0
;
double
snr
=
get_snr
();
ret
=
rte_bbdev_enc_op_alloc_bulk
(
mempool
,
ops_enq
,
burst_sz
);
TEST_ASSERT_SUCCESS
(
ret
,
"rte_bbdev_enc_op_alloc_bulk() failed"
);
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
copy_reference_ldpc_enc_op
(
ops_enq
,
burst_sz
,
dequeued
,
bufs
->
inputs
,
bufs
->
hard_outputs
,
ref_op
);
/* Start time meas for enqueue function offload latency */
enq_start_time
=
rte_rdtsc_precise
();
do
{
enq
+=
rte_bbdev_enqueue_ldpc_enc_ops
(
dev_id
,
queue_id
,
&
ops_enq
[
enq
],
burst_sz
-
enq
);
}
while
(
unlikely
(
burst_sz
!=
enq
));
enq_sw_last_time
=
rte_rdtsc_precise
()
-
enq_start_time
;
ret
=
get_bbdev_queue_stats
(
dev_id
,
queue_id
,
&
stats
);
TEST_ASSERT_SUCCESS
(
ret
,
"Failed to get stats for queue (%u) of device (%u)"
,
queue_id
,
dev_id
);
enq_sw_last_time
-=
stats
.
acc_offload_cycles
;
time_st
->
enq_sw_max_time
=
RTE_MAX
(
time_st
->
enq_sw_max_time
,
enq_sw_last_time
);
time_st
->
enq_sw_min_time
=
RTE_MIN
(
time_st
->
enq_sw_min_time
,
enq_sw_last_time
);
time_st
->
enq_sw_total_time
+=
enq_sw_last_time
;
time_st
->
enq_acc_max_time
=
RTE_MAX
(
time_st
->
enq_acc_max_time
,
stats
.
acc_offload_cycles
);
time_st
->
enq_acc_min_time
=
RTE_MIN
(
time_st
->
enq_acc_min_time
,
stats
.
acc_offload_cycles
);
time_st
->
enq_acc_total_time
+=
stats
.
acc_offload_cycles
;
/* give time for device to process ops */
rte_delay_us
(
200
);
/* Start time meas for dequeue function offload latency */
deq_start_time
=
rte_rdtsc_precise
();
/* Dequeue one operation */
do
{
deq
+=
rte_bbdev_dequeue_ldpc_enc_ops
(
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
1
);
}
while
(
unlikely
(
deq
!=
1
));
deq_last_time
=
rte_rdtsc_precise
()
-
deq_start_time
;
time_st
->
deq_max_time
=
RTE_MAX
(
time_st
->
deq_max_time
,
deq_last_time
);
time_st
->
deq_min_time
=
RTE_MIN
(
time_st
->
deq_min_time
,
deq_last_time
);
time_st
->
deq_total_time
+=
deq_last_time
;
while
(
burst_sz
!=
deq
)
deq
+=
rte_bbdev_dequeue_ldpc_enc_ops
(
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
burst_sz
-
deq
);
rte_bbdev_enc_op_free_bulk
(
ops_enq
,
deq
);
dequeued
+=
deq
;
for
(
core_idx
=
0
;
core_idx
<
used_cores
;
core_idx
++
)
{
printf
(
"Core%u BLER %.1f %% - Iters %.1f - Tp %.1f Mbps %s
\n
"
,
t_params
[
core_idx
].
lcore_id
,
t_params
[
core_idx
].
bler
*
100
,
t_params
[
core_idx
].
iter_average
,
t_params
[
core_idx
].
mbps
,
get_vector_filename
());
total_mbps
+=
t_params
[
core_idx
].
mbps
;
total_bler
+=
t_params
[
core_idx
].
bler
;
total_iter
+=
t_params
[
core_idx
].
iter_average
;
}
total_bler
/=
used_cores
;
total_iter
/=
used_cores
;
return
i
;
printf
(
"SNR %.2f BLER %.1f %% - Iterations %.1f %d - Tp %.1f Mbps %s
\n
"
,
snr
,
total_bler
*
100
,
total_iter
,
get_iter_max
(),
total_mbps
,
get_vector_filename
());
}
#endif
/*
* Test function that determines how long an enqueue + dequeue of a burst
* takes on available lcores.
*/
static
int
offload_cos
t_test
(
struct
active_device
*
ad
,
throughpu
t_test
(
struct
active_device
*
ad
,
struct
test_op_params
*
op_params
)
{
#ifndef RTE_BBDEV_OFFLOAD_COST
RTE_SET_USED
(
ad
);
RTE_SET_USED
(
op_params
);
printf
(
"Offload latency test is disabled.
\n
"
);
printf
(
"Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.
\n
"
);
return
TEST_SKIPPED
;
#else
int
iter
;
uint16_t
burst_sz
=
op_params
->
burst_sz
;
const
uint16_t
num_to_process
=
op_params
->
num_to_process
;
const
enum
rte_bbdev_op_type
op_type
=
test_vector
.
op_type
;
const
uint16_t
queue_id
=
ad
->
queue_ids
[
0
];
struct
test_buffers
*
bufs
=
NULL
;
int
ret
;
unsigned
int
lcore_id
,
used_cores
=
0
;
struct
thread_params
*
t_params
,
*
tp
;
struct
rte_bbdev_info
info
;
lcore_function_t
*
throughput_function
;
uint16_t
num_lcores
;
const
char
*
op_type_str
;
struct
test_time_stats
time_st
;
memset
(
&
time_st
,
0
,
sizeof
(
struct
test_time_stats
));
time_st
.
enq_sw_min_time
=
UINT64_MAX
;
time_st
.
enq_acc_min_time
=
UINT64_MAX
;
time_st
.
deq_min_time
=
UINT64_MAX
;
TEST_ASSERT_SUCCESS
((
burst_sz
>
MAX_BURST
),
"BURST_SIZE should be <= %u"
,
MAX_BURST
);
rte_bbdev_info_get
(
ad
->
dev_id
,
&
info
);
bufs
=
&
op_params
->
q_bufs
[
GET_SOCKET
(
info
.
socket_id
)][
queue_id
];
op_type_str
=
rte_bbdev_op_type_str
(
op_type
);
TEST_ASSERT_NOT_NULL
(
op_type_str
,
"Invalid op type: %u"
,
op_type
);
op_type_str
=
rte_bbdev_op_type_str
(
test_vector
.
op_type
);
TEST_ASSERT_NOT_NULL
(
op_type_str
,
"Invalid op type: %u"
,
test_vector
.
op_type
);
printf
(
"+ ------------------------------------------------------- +
\n
"
);
printf
(
"== test: offload latency test
\n
dev: %s, burst size: %u, num ops: %u, op type: %s
\n
"
,
info
.
dev_name
,
burst_sz
,
num_to_process
,
op_type_str
);
if
(
op_type
==
RTE_BBDEV_OP_TURBO_DEC
)
iter
=
offload_latency_test_dec
(
op_params
->
mp
,
bufs
,
op_params
->
ref_dec_op
,
ad
->
dev_id
,
queue_id
,
num_to_process
,
burst_sz
,
&
time_st
);
else
if
(
op_type
==
RTE_BBDEV_OP_TURBO_ENC
)
iter
=
offload_latency_test_enc
(
op_params
->
mp
,
bufs
,
op_params
->
ref_enc_op
,
ad
->
dev_id
,
queue_id
,
num_to_process
,
burst_sz
,
&
time_st
);
else
if
(
op_type
==
RTE_BBDEV_OP_LDPC_ENC
)
iter
=
offload_latency_test_ldpc_enc
(
op_params
->
mp
,
bufs
,
op_params
->
ref_enc_op
,
ad
->
dev_id
,
queue_id
,
num_to_process
,
burst_sz
,
&
time_st
);
else
if
(
op_type
==
RTE_BBDEV_OP_LDPC_DEC
)
iter
=
offload_latency_test_ldpc_dec
(
op_params
->
mp
,
bufs
,
op_params
->
ref_dec_op
,
ad
->
dev_id
,
queue_id
,
num_to_process
,
burst_sz
,
&
time_st
);
else
iter
=
offload_latency_test_enc
(
op_params
->
mp
,
bufs
,
op_params
->
ref_enc_op
,
ad
->
dev_id
,
queue_id
,
num_to_process
,
burst_sz
,
&
time_st
);
if
(
iter
<=
0
)
return
TEST_FAILED
;
printf
(
"== new test: throughput
\n
dev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg
\n
"
,
info
.
dev_name
,
ad
->
nb_queues
,
op_params
->
burst_sz
,
op_params
->
num_to_process
,
op_params
->
num_lcores
,
op_type_str
,
intr_enabled
?
"Interrupt mode"
:
"PMD mode"
,
(
double
)
rte_get_tsc_hz
()
/
1000000000
.
0
);
printf
(
"Enqueue driver offload cost latency:
\n
"
"
\t
avg: %lg cycles, %lg us
\n
"
"
\t
min: %lg cycles, %lg us
\n
"
"
\t
max: %lg cycles, %lg us
\n
"
"Enqueue accelerator offload cost latency:
\n
"
"
\t
avg: %lg cycles, %lg us
\n
"
"
\t
min: %lg cycles, %lg us
\n
"
"
\t
max: %lg cycles, %lg us
\n
"
,
(
double
)
time_st
.
enq_sw_total_time
/
(
double
)
iter
,
(
double
)(
time_st
.
enq_sw_total_time
*
1000000
)
/
(
double
)
iter
/
(
double
)
rte_get_tsc_hz
(),
(
double
)
time_st
.
enq_sw_min_time
,
(
double
)(
time_st
.
enq_sw_min_time
*
1000000
)
/
rte_get_tsc_hz
(),
(
double
)
time_st
.
enq_sw_max_time
,
(
double
)(
time_st
.
enq_sw_max_time
*
1000000
)
/
rte_get_tsc_hz
(),
(
double
)
time_st
.
enq_acc_total_time
/
(
double
)
iter
,
(
double
)(
time_st
.
enq_acc_total_time
*
1000000
)
/
(
double
)
iter
/
(
double
)
rte_get_tsc_hz
(),
(
double
)
time_st
.
enq_acc_min_time
,
(
double
)(
time_st
.
enq_acc_min_time
*
1000000
)
/
rte_get_tsc_hz
(),
(
double
)
time_st
.
enq_acc_max_time
,
(
double
)(
time_st
.
enq_acc_max_time
*
1000000
)
/
rte_get_tsc_hz
());
printf
(
"Dequeue offload cost latency - one op:
\n
"
"
\t
avg: %lg cycles, %lg us
\n
"
"
\t
min: %lg cycles, %lg us
\n
"
"
\t
max: %lg cycles, %lg us
\n
"
,
(
double
)
time_st
.
deq_total_time
/
(
double
)
iter
,
(
double
)(
time_st
.
deq_total_time
*
1000000
)
/
(
double
)
iter
/
(
double
)
rte_get_tsc_hz
(),
(
double
)
time_st
.
deq_min_time
,
(
double
)(
time_st
.
deq_min_time
*
1000000
)
/
rte_get_tsc_hz
(),
(
double
)
time_st
.
deq_max_time
,
(
double
)(
time_st
.
deq_max_time
*
1000000
)
/
rte_get_tsc_hz
());
/* Set number of lcores */
num_lcores
=
(
ad
->
nb_queues
<
(
op_params
->
num_lcores
))
?
ad
->
nb_queues
:
op_params
->
num_lcores
;
return
TEST_SUCCESS
;
#endif
}
/* Allocate memory for thread parameters structure */
t_params
=
rte_zmalloc
(
NULL
,
num_lcores
*
sizeof
(
struct
thread_params
),
RTE_CACHE_LINE_SIZE
);
TEST_ASSERT_NOT_NULL
(
t_params
,
"Failed to alloc %zuB for t_params"
,
RTE_ALIGN
(
sizeof
(
struct
thread_params
)
*
num_lcores
,
RTE_CACHE_LINE_SIZE
));
#ifdef RTE_BBDEV_OFFLOAD_COST
static
int
offload_latency_empty_q_test_dec
(
uint16_t
dev_id
,
uint16_t
queue_id
,
const
uint16_t
num_to_process
,
uint16_t
burst_sz
,
uint64_t
*
deq_total_time
,
uint64_t
*
deq_min_time
,
uint64_t
*
deq_max_time
,
const
enum
rte_bbdev_op_type
op_type
)
{
int
i
,
deq_total
;
struct
rte_bbdev_dec_op
*
ops
[
MAX_BURST
];
uint64_t
deq_start_time
,
deq_last_time
;
rte_atomic16_set
(
&
op_params
->
sync
,
SYNC_WAIT
);
/* Test deq offload latency from an empty queue */
/* Master core is set at first entry */
t_params
[
0
].
dev_id
=
ad
->
dev_id
;
t_params
[
0
].
lcore_id
=
rte_lcore_id
();
t_params
[
0
].
op_params
=
op_params
;
t_params
[
0
].
queue_id
=
ad
->
queue_ids
[
used_cores
++
];
t_params
[
0
].
iter_count
=
0
;
for
(
i
=
0
,
deq_total
=
0
;
deq_total
<
num_to_process
;
++
i
,
deq_total
+=
burst_sz
)
{
deq_start_time
=
rte_rdtsc_precise
()
;
RTE_LCORE_FOREACH_SLAVE
(
lcore_id
)
{
if
(
used_cores
>=
num_lcores
)
break
;
if
(
unlikely
(
num_to_process
-
deq_total
<
burst_sz
))
burst_sz
=
num_to_process
-
deq_total
;
if
(
op_type
==
RTE_BBDEV_OP_LDPC_DEC
)
rte_bbdev_dequeue_ldpc_dec_ops
(
dev_id
,
queue_id
,
ops
,
burst_sz
);
else
rte_bbdev_dequeue_dec_ops
(
dev_id
,
queue_id
,
ops
,
burst_sz
);
t_params
[
used_cores
].
dev_id
=
ad
->
dev_id
;
t_params
[
used_cores
].
lcore_id
=
lcore_id
;
t_params
[
used_cores
].
op_params
=
op_params
;
t_params
[
used_cores
].
queue_id
=
ad
->
queue_ids
[
used_cores
];
t_params
[
used_cores
].
iter_count
=
0
;
deq_last_time
=
rte_rdtsc_precise
()
-
deq_start_time
;
*
deq_max_time
=
RTE_MAX
(
*
deq_max_time
,
deq_last_time
);
*
deq_min_time
=
RTE_MIN
(
*
deq_min_time
,
deq_last_time
);
*
deq_total_time
+=
deq_last_time
;
rte_eal_remote_launch
(
throughput_pmd_lcore_ldpc_dec
,
&
t_params
[
used_cores
++
],
lcore_id
);
}
r
eturn
i
;
}
r
te_atomic16_set
(
&
op_params
->
sync
,
SYNC_START
)
;
ret
=
throughput_pmd_lcore_ldpc_dec
(
&
t_params
[
0
]);
static
int
offload_latency_empty_q_test_enc
(
uint16_t
dev_id
,
uint16_t
queue_id
,
const
uint16_t
num_to_process
,
uint16_t
burst_sz
,
uint64_t
*
deq_total_time
,
uint64_t
*
deq_min_time
,
uint64_t
*
deq_max_time
,
const
enum
rte_bbdev_op_type
op_type
)
{
int
i
,
deq_total
;
struct
rte_bbdev_enc_op
*
ops
[
MAX_BURST
];
uint64_t
deq_start_time
,
deq_last_time
;
/* Test deq offload latency from an empty queue */
for
(
i
=
0
,
deq_total
=
0
;
deq_total
<
num_to_process
;
++
i
,
deq_total
+=
burst_sz
)
{
deq_start_time
=
rte_rdtsc_precise
();
if
(
unlikely
(
num_to_process
-
deq_total
<
burst_sz
))
burst_sz
=
num_to_process
-
deq_total
;
if
(
op_type
==
RTE_BBDEV_OP_LDPC_ENC
)
rte_bbdev_dequeue_ldpc_enc_ops
(
dev_id
,
queue_id
,
ops
,
burst_sz
);
else
rte_bbdev_dequeue_enc_ops
(
dev_id
,
queue_id
,
ops
,
burst_sz
);
/* Master core is always used */
for
(
used_cores
=
1
;
used_cores
<
num_lcores
;
used_cores
++
)
ret
|=
rte_eal_wait_lcore
(
t_params
[
used_cores
].
lcore_id
);
deq_last_time
=
rte_rdtsc_precise
()
-
deq_start_time
;
*
deq_max_time
=
RTE_MAX
(
*
deq_max_time
,
deq_last_time
);
*
deq_min_time
=
RTE_MIN
(
*
deq_min_time
,
deq_last_time
);
*
deq_total_time
+=
deq_last_time
;
/* Return if test failed */
if
(
ret
)
{
rte_free
(
t_params
);
return
ret
;
}
return
i
;
}
#endif
static
int
offload_latency_empty_q_test
(
struct
active_device
*
ad
,
struct
test_op_params
*
op_params
)
{
#ifndef RTE_BBDEV_OFFLOAD_COST
RTE_SET_USED
(
ad
);
RTE_SET_USED
(
op_params
);
printf
(
"Offload latency empty dequeue test is disabled.
\n
"
);
printf
(
"Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.
\n
"
);
return
TEST_SKIPPED
;
#else
int
iter
;
uint64_t
deq_total_time
,
deq_min_time
,
deq_max_time
;
uint16_t
burst_sz
=
op_params
->
burst_sz
;
const
uint16_t
num_to_process
=
op_params
->
num_to_process
;
const
enum
rte_bbdev_op_type
op_type
=
test_vector
.
op_type
;
const
uint16_t
queue_id
=
ad
->
queue_ids
[
0
];
struct
rte_bbdev_info
info
;
const
char
*
op_type_str
;
deq_total_time
=
deq_max_time
=
0
;
deq_min_time
=
UINT64_MAX
;
TEST_ASSERT_SUCCESS
((
burst_sz
>
MAX_BURST
),
"BURST_SIZE should be <= %u"
,
MAX_BURST
);
rte_bbdev_info_get
(
ad
->
dev_id
,
&
info
);
/* Print throughput if interrupts are disabled and test passed */
if
(
!
intr_enabled
)
{
if
(
test_vector
.
op_type
==
RTE_BBDEV_OP_TURBO_DEC
||
test_vector
.
op_type
==
RTE_BBDEV_OP_LDPC_DEC
)
print_dec_throughput
(
t_params
,
num_lcores
);
else
print_enc_throughput
(
t_params
,
num_lcores
);
rte_free
(
t_params
);
return
ret
;
}
op_type_str
=
rte_bbdev_op_type_str
(
op_type
);
TEST_ASSERT_NOT_NULL
(
op_type_str
,
"Invalid op type: %u"
,
op_type
);
/* In interrupt TC we need to wait for the interrupt callback to deqeue
* all pending operations. Skip waiting for queues which reported an
* error using processing_status variable.
* Wait for master lcore operations.
*/
tp
=
&
t_params
[
0
];
while
((
rte_atomic16_read
(
&
tp
->
nb_dequeued
)
<
op_params
->
num_to_process
)
&&
(
rte_atomic16_read
(
&
tp
->
processing_status
)
!=
TEST_FAILED
))
rte_pause
();
printf
(
"+ ------------------------------------------------------- +
\n
"
)
;
printf
(
"== test: offload latency empty dequeue
\n
dev: %s, burst size: %u, num ops: %u, op type: %s
\n
"
,
info
.
dev_name
,
burst_sz
,
num_to_process
,
op_type_str
);
tp
->
ops_per_sec
/=
TEST_REPETITIONS
;
tp
->
mbps
/=
TEST_REPETITIONS
;
ret
|=
(
int
)
rte_atomic16_read
(
&
tp
->
processing_status
);
if
(
op_type
==
RTE_BBDEV_OP_TURBO_DEC
||
op_type
==
RTE_BBDEV_OP_LDPC_DEC
)
iter
=
offload_latency_empty_q_test_dec
(
ad
->
dev_id
,
queue_id
,
num_to_process
,
burst_sz
,
&
deq_total_time
,
&
deq_min_time
,
&
deq_max_time
,
op_type
);
else
iter
=
offload_latency_empty_q_test_enc
(
ad
->
dev_id
,
queue_id
,
num_to_process
,
burst_sz
,
&
deq_total_time
,
&
deq_min_time
,
&
deq_max_time
,
op_type
);
/* Wait for slave lcores operations */
for
(
used_cores
=
1
;
used_cores
<
num_lcores
;
used_cores
++
)
{
tp
=
&
t_params
[
used_cores
];
if
(
iter
<=
0
)
return
TEST_FAILED
;
while
((
rte_atomic16_read
(
&
tp
->
nb_dequeued
)
<
op_params
->
num_to_process
)
&&
(
rte_atomic16_read
(
&
tp
->
processing_status
)
!=
TEST_FAILED
))
rte_pause
();
printf
(
"Empty dequeue offload:
\n
"
"
\t
avg: %lg cycles, %lg us
\n
"
"
\t
min: %lg cycles, %lg us
\n
"
"
\t
max: %lg cycles, %lg us
\n
"
,
(
double
)
deq_total_time
/
(
double
)
iter
,
(
double
)(
deq_total_time
*
1000000
)
/
(
double
)
iter
/
(
double
)
rte_get_tsc_hz
(),
(
double
)
deq_min_time
,
(
double
)(
deq_min_time
*
1000000
)
/
rte_get_tsc_hz
(),
(
double
)
deq_max_time
,
(
double
)(
deq_max_time
*
1000000
)
/
rte_get_tsc_hz
());
tp
->
ops_per_sec
/=
TEST_REPETITIONS
;
tp
->
mbps
/=
TEST_REPETITIONS
;
ret
|=
(
int
)
rte_atomic16_read
(
&
tp
->
processing_status
);
}
return
TEST_SUCCESS
;
#endif
}
/* Print throughput if test passed */
if
(
!
ret
)
{
if
(
test_vector
.
op_type
==
RTE_BBDEV_OP_TURBO_DEC
||
test_vector
.
op_type
==
RTE_BBDEV_OP_LDPC_DEC
)
print_dec_throughput
(
t_params
,
num_lcores
);
else
if
(
test_vector
.
op_type
==
RTE_BBDEV_OP_TURBO_ENC
||
test_vector
.
op_type
==
RTE_BBDEV_OP_LDPC_ENC
)
print_enc_throughput
(
t_params
,
num_lcores
);
}
static
int
bler_tc
(
void
)
{
return
run_test_case
(
bler_test
);
rte_free
(
t_params
);
return
ret
;
}
static
int
...
...
@@ -5070,23 +3361,7 @@ throughput_tc(void)
return
run_test_case
(
throughput_test
);
}
static
int
offload_cost_tc
(
void
)
{
return
run_test_case
(
offload_cost_test
);
}
static
int
offload_latency_empty_q_tc
(
void
)
{
return
run_test_case
(
offload_latency_empty_q_test
);
}
static
int
latency_tc
(
void
)
{
return
run_test_case
(
latency_test
);
}
static
int
interrupt_tc
(
void
)
...
...
@@ -5094,16 +3369,6 @@ interrupt_tc(void)
return
run_test_case
(
throughput_test
);
}
static
struct
unit_test_suite
bbdev_bler_testsuite
=
{
.
suite_name
=
"BBdev BLER Tests"
,
.
setup
=
testsuite_setup
,
.
teardown
=
testsuite_teardown
,
.
unit_test_cases
=
{
TEST_CASE_ST
(
ut_setup
,
ut_teardown
,
bler_tc
),
TEST_CASES_END
()
/**< NULL terminate unit test array */
}
};
static
struct
unit_test_suite
bbdev_throughput_testsuite
=
{
.
suite_name
=
"BBdev Throughput Tests"
,
.
setup
=
testsuite_setup
,
...
...
@@ -5114,53 +3379,8 @@ static struct unit_test_suite bbdev_throughput_testsuite = {
}
};
static
struct
unit_test_suite
bbdev_validation_testsuite
=
{
.
suite_name
=
"BBdev Validation Tests"
,
.
setup
=
testsuite_setup
,
.
teardown
=
testsuite_teardown
,
.
unit_test_cases
=
{
TEST_CASE_ST
(
ut_setup
,
ut_teardown
,
latency_tc
),
TEST_CASES_END
()
/**< NULL terminate unit test array */
}
};
static
struct
unit_test_suite
bbdev_latency_testsuite
=
{
.
suite_name
=
"BBdev Latency Tests"
,
.
setup
=
testsuite_setup
,
.
teardown
=
testsuite_teardown
,
.
unit_test_cases
=
{
TEST_CASE_ST
(
ut_setup
,
ut_teardown
,
latency_tc
),
TEST_CASES_END
()
/**< NULL terminate unit test array */
}
};
static
struct
unit_test_suite
bbdev_offload_cost_testsuite
=
{
.
suite_name
=
"BBdev Offload Cost Tests"
,
.
setup
=
testsuite_setup
,
.
teardown
=
testsuite_teardown
,
.
unit_test_cases
=
{
TEST_CASE_ST
(
ut_setup
,
ut_teardown
,
offload_cost_tc
),
TEST_CASE_ST
(
ut_setup
,
ut_teardown
,
offload_latency_empty_q_tc
),
TEST_CASES_END
()
/**< NULL terminate unit test array */
}
};
static
struct
unit_test_suite
bbdev_interrupt_testsuite
=
{
.
suite_name
=
"BBdev Interrupt Tests"
,
.
setup
=
interrupt_testsuite_setup
,
.
teardown
=
testsuite_teardown
,
.
unit_test_cases
=
{
TEST_CASE_ST
(
ut_setup
,
ut_teardown
,
interrupt_tc
),
TEST_CASES_END
()
/**< NULL terminate unit test array */
}
};
//REGISTER_TEST_COMMAND(bler, bbdev_bler_testsuite);
REGISTER_TEST_COMMAND
(
throughput
,
bbdev_throughput_testsuite
);
//REGISTER_TEST_COMMAND(validation, bbdev_validation_testsuite);
//REGISTER_TEST_COMMAND(latency, bbdev_latency_testsuite);
//REGISTER_TEST_COMMAND(offload, bbdev_offload_cost_testsuite);
//REGISTER_TEST_COMMAND(interrupt, bbdev_interrupt_testsuite);
#define MAX_QUEUES RTE_MAX_LCORE
...
...
@@ -5331,162 +3551,6 @@ print_usage(const char *prog_name)
printf
(
"
\n
"
);
}
static
int
parse_args
(
int
argc
,
char
**
argv
,
struct
test_params
*
tp
)
{
int
opt
,
option_index
;
unsigned
int
num_tests
=
0
;
bool
test_cases_present
=
false
;
bool
test_vector_present
=
false
;
struct
test_command
*
t
;
char
*
tokens
[
MAX_CMDLINE_TESTCASES
];
int
tc
,
ret
;
static
struct
option
lgopts
[]
=
{
{
"num-ops"
,
1
,
0
,
'n'
},
{
"burst-size"
,
1
,
0
,
'b'
},
{
"test-cases"
,
1
,
0
,
'c'
},
{
"test-vector"
,
1
,
0
,
'v'
},
{
"lcores"
,
1
,
0
,
'l'
},
{
"snr"
,
1
,
0
,
's'
},
{
"iter_max"
,
6
,
0
,
't'
},
{
"init-device"
,
0
,
0
,
'i'
},
{
"help"
,
0
,
0
,
'h'
},
{
NULL
,
0
,
0
,
0
}
};
tp
->
iter_max
=
DEFAULT_ITER
;
while
((
opt
=
getopt_long
(
argc
,
argv
,
"hin:b:c:v:l:s:t:"
,
lgopts
,
&
option_index
))
!=
EOF
)
switch
(
opt
)
{
case
'n'
:
TEST_ASSERT
(
strlen
(
optarg
)
>
0
,
"Num of operations is not provided"
);
tp
->
num_ops
=
strtol
(
optarg
,
NULL
,
10
);
break
;
case
'b'
:
TEST_ASSERT
(
strlen
(
optarg
)
>
0
,
"Burst size is not provided"
);
tp
->
burst_sz
=
strtol
(
optarg
,
NULL
,
10
);
TEST_ASSERT
(
tp
->
burst_sz
<=
MAX_BURST
,
"Burst size mustn't be greater than %u"
,
MAX_BURST
);
break
;
case
'c'
:
TEST_ASSERT
(
test_cases_present
==
false
,
"Test cases provided more than once"
);
test_cases_present
=
true
;
ret
=
rte_strsplit
(
optarg
,
strlen
(
optarg
),
tokens
,
MAX_CMDLINE_TESTCASES
,
tc_sep
);
TEST_ASSERT
(
ret
<=
MAX_CMDLINE_TESTCASES
,
"Too many test cases (max=%d)"
,
MAX_CMDLINE_TESTCASES
);
for
(
tc
=
0
;
tc
<
ret
;
++
tc
)
{
/* Find matching test case */
TAILQ_FOREACH
(
t
,
&
commands_list
,
next
)
if
(
!
strcmp
(
tokens
[
tc
],
t
->
command
))
tp
->
test_to_run
[
num_tests
]
=
t
;
TEST_ASSERT
(
tp
->
test_to_run
[
num_tests
]
!=
NULL
,
"Unknown test case: %s"
,
tokens
[
tc
]);
++
num_tests
;
}
break
;
case
'v'
:
TEST_ASSERT
(
test_vector_present
==
false
,
"Test vector provided more than once"
);
test_vector_present
=
true
;
TEST_ASSERT
(
strlen
(
optarg
)
>
0
,
"Config file name is null"
);
snprintf
(
tp
->
test_vector_filename
,
sizeof
(
tp
->
test_vector_filename
),
"%s"
,
optarg
);
break
;
case
's'
:
TEST_ASSERT
(
strlen
(
optarg
)
>
0
,
"SNR is not provided"
);
tp
->
snr
=
strtod
(
optarg
,
NULL
);
break
;
case
't'
:
TEST_ASSERT
(
strlen
(
optarg
)
>
0
,
"Iter_max is not provided"
);
tp
->
iter_max
=
strtol
(
optarg
,
NULL
,
10
);
break
;
case
'l'
:
TEST_ASSERT
(
strlen
(
optarg
)
>
0
,
"Num of lcores is not provided"
);
tp
->
num_lcores
=
strtol
(
optarg
,
NULL
,
10
);
TEST_ASSERT
(
tp
->
num_lcores
<=
RTE_MAX_LCORE
,
"Num of lcores mustn't be greater than %u"
,
RTE_MAX_LCORE
);
break
;
case
'i'
:
/* indicate fpga fec config required */
tp
->
init_device
=
true
;
break
;
case
'h'
:
print_usage
(
argv
[
0
]);
return
0
;
default:
printf
(
"ERROR: Unknown option: -%c
\n
"
,
opt
);
return
-
1
;
}
if
(
tp
->
num_ops
==
0
)
{
printf
(
"WARNING: Num of operations was not provided or was set 0. Set to default (%u)
\n
"
,
DEFAULT_OPS
);
tp
->
num_ops
=
DEFAULT_OPS
;
}
if
(
tp
->
burst_sz
==
0
)
{
printf
(
"WARNING: Burst size was not provided or was set 0. Set to default (%u)
\n
"
,
DEFAULT_BURST
);
tp
->
burst_sz
=
DEFAULT_BURST
;
}
if
(
tp
->
num_lcores
==
0
)
{
printf
(
"WARNING: Num of lcores was not provided or was set 0. Set to value from RTE config (%u)
\n
"
,
rte_lcore_count
());
tp
->
num_lcores
=
rte_lcore_count
();
}
TEST_ASSERT
(
tp
->
burst_sz
<=
tp
->
num_ops
,
"Burst size (%u) mustn't be greater than num ops (%u)"
,
tp
->
burst_sz
,
tp
->
num_ops
);
tp
->
num_tests
=
num_tests
;
return
0
;
}
static
int
run_all_tests
(
void
)
{
int
ret
=
TEST_SUCCESS
;
struct
test_command
*
t
;
TAILQ_FOREACH
(
t
,
&
commands_list
,
next
)
ret
|=
(
int
)
t
->
callback
();
return
ret
;
}
static
int
run_parsed_tests
(
struct
test_params
*
tp
)
{
int
ret
=
TEST_SUCCESS
;
unsigned
int
i
;
for
(
i
=
0
;
i
<
tp
->
num_tests
;
++
i
)
ret
|=
(
int
)
tp
->
test_to_run
[
i
]
->
callback
();
return
ret
;
}
static
int
init_input
(
uint32_t
**
data
,
uint32_t
data_length
)
...
...
@@ -5652,13 +3716,6 @@ argv_re[1] = "-v";
argv_re
[
2
]
=
"../../../targets/ARCH/test-bbdev/test_vectors/ldpc_dec_v8480.data"
;
//printf("after ......ret %d argc %d argv %s %s %s %s\n", ret,argc, argv[0], argv[1], argv[2], argv[3],argv[4]);
/* Parse application arguments (after the EAL ones) */
/* ret = parse_args(argc_re, argv_re, &test_params);
if (ret < 0) {
print_usage(argv_re[0]);
return 1;
}
*/
memset
(
&
test_vector_dec
,
0
,
sizeof
(
struct
test_bbdev_vector
));
...
...
@@ -5707,11 +3764,19 @@ test_params.num_ops=2;
test_params
.
burst_sz
=
2
;
test_params
.
num_lcores
=
1
;
test_params
.
num_tests
=
1
;
run_all_tests
();
//run_all_tests();
testsuite_setup
();
ut_setup
();
throughput_tc
();
char
*
data
=
ldpc_output
;
data_len
=
(
p_decParams
->
BG
==
1
)
?
(
22
*
p_decParams
->
Z
)
:
(
10
*
p_decParams
->
Z
);
memcpy
(
&
p_out
[
0
],
data
,
C
*
data_len
);
//p_out = ldpc_output;
ut_teardown
();
//for (i=0;i<8;i++)
//printf("p_out[%d] = %x addr %p ldpcout addr %p\n",i,p_out[i],p_out+i,ldpc_output+i);
...
...
openair1/PHY/NR_TRANSPORT/nr_ulsch_decoding.c
View file @
887f8a90
...
...
@@ -675,12 +675,12 @@ uint32_t nr_ulsch_decoding(PHY_VARS_gNB *phy_vars_gNB,
#endif
no_iteration_ldpc
=
ulsch
->
max_ldpc_iterations
+
1
;
}
for
(
int
k
=
0
;
k
<
8
;
k
++
)
/*
for (int k=0;k<8;k++)
{
printf("output decoder [%d] = 0x%02x \n", k, harq_process->c[r][k]);
printf("llrprocbuf [%d] = %x adr %p\n", k, llrProcBuf[k], llrProcBuf+k);
}
*/
memcpy
(
harq_process
->
b
+
offset
,
harq_process
->
c
[
r
],
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment