Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
O
OpenXG-RAN
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
spbro
OpenXG-RAN
Commits
52450c44
Commit
52450c44
authored
Jul 27, 2021
by
Hongzhi Wang
Committed by
Hongzhi Wang
Mar 09, 2022
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
code cleanup ldpc offload
parent
4238ff04
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
324 additions
and
2259 deletions
+324
-2259
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder_offload.c
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder_offload.c
+322
-2257
openair1/PHY/NR_TRANSPORT/nr_ulsch_decoding.c
openair1/PHY/NR_TRANSPORT/nr_ulsch_decoding.c
+2
-2
No files found.
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder_offload.c
View file @
52450c44
...
...
@@ -1930,11 +1930,11 @@ printf("segment %d offset %d length %d data length %d\n",i, offset,total_data_si
// TEST_ASSERT(orig_op->segments[i].length == data_len,
// "Length of segment differ in original (%u) and filled (%u) op",
// orig_op->segments[i].length, data_len);
TEST_ASSERT_BUFFERS_ARE_EQUAL
(
orig_op
->
segments
[
i
].
addr
,
/*
TEST_ASSERT_BUFFERS_ARE_EQUAL(orig_op->segments[i].addr,
rte_pktmbuf_mtod_offset(m, uint32_t *, offset),
data_len,
"Output buffers (CB=%u) are not equal", i);
*/
m
=
m
->
next
;
}
...
...
@@ -2157,29 +2157,6 @@ validate_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
return
TEST_SUCCESS
;
}
/* Check Number of code blocks errors */
static
int
validate_ldpc_bler
(
struct
rte_bbdev_dec_op
**
ops
,
const
uint16_t
n
)
{
unsigned
int
i
;
struct
op_data_entries
*
hard_data_orig
=
&
test_vector
.
entries
[
DATA_HARD_OUTPUT
];
struct
rte_bbdev_op_ldpc_dec
*
ops_td
;
struct
rte_bbdev_op_data
*
hard_output
;
int
errors
=
0
;
struct
rte_mbuf
*
m
;
for
(
i
=
0
;
i
<
n
;
++
i
)
{
ops_td
=
&
ops
[
i
]
->
ldpc_dec
;
hard_output
=
&
ops_td
->
hard_output
;
m
=
hard_output
->
data
;
if
(
memcmp
(
rte_pktmbuf_mtod_offset
(
m
,
uint32_t
*
,
0
),
hard_data_orig
->
segments
[
0
].
addr
,
hard_data_orig
->
segments
[
0
].
length
))
errors
++
;
}
return
errors
;
}
static
int
validate_ldpc_dec_op
(
struct
rte_bbdev_dec_op
**
ops
,
const
uint16_t
n
,
...
...
@@ -2939,2129 +2916,443 @@ printf("bufs len %d data %x addr orig %p addr %p\n",bufs->inputs[0].data->data_l
return
TEST_SUCCESS
;
}
static
int
throughput_
intr_lcore
_dec
(
void
*
arg
)
throughput_
pmd_lcore_ldpc
_dec
(
void
*
arg
)
{
struct
thread_params
*
tp
=
arg
;
unsigned
int
enqueued
;
uint16_t
enq
,
deq
;
uint64_t
total_time
=
0
,
start_time
;
const
uint16_t
queue_id
=
tp
->
queue_id
;
const
uint16_t
burst_sz
=
tp
->
op_params
->
burst_sz
;
const
uint16_t
num_to_process
=
tp
->
op_params
->
num_to_process
;
struct
rte_bbdev_dec_op
*
ops
[
num_to_process
];
const
uint16_t
num_ops
=
tp
->
op_params
->
num_to_process
;
struct
rte_bbdev_dec_op
*
ops_enq
[
num_ops
];
struct
rte_bbdev_dec_op
*
ops_deq
[
num_ops
];
struct
rte_bbdev_dec_op
*
ref_op
=
tp
->
op_params
->
ref_dec_op
;
struct
test_buffers
*
bufs
=
NULL
;
int
i
,
j
,
ret
;
struct
rte_bbdev_info
info
;
int
ret
,
i
,
j
;
uint16_t
num_to_enq
,
enq
;
uint16_t
num_to_enq
;
struct
rte_bbdev_op_data
*
hard_output
;
struct
rte_bbdev_op_ldpc_dec
*
ops_td
;
bool
extDdr
=
check_bit
(
ldpc_cap_flags
,
RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE
);
bool
loopback
=
check_bit
(
ref_op
->
ldpc_dec
.
op_flags
,
RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK
);
bool
hc_out
=
check_bit
(
ref_op
->
ldpc_dec
.
op_flags
,
RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE
);
TEST_ASSERT_SUCCESS
((
burst_sz
>
MAX_BURST
),
"BURST_SIZE should be <= %u"
,
MAX_BURST
);
TEST_ASSERT_SUCCESS
(
rte_bbdev_queue_intr_enable
(
tp
->
dev_id
,
queue_id
),
"Failed to enable interrupts for dev: %u, queue_id: %u"
,
tp
->
dev_id
,
queue_id
);
rte_bbdev_info_get
(
tp
->
dev_id
,
&
info
);
TEST_ASSERT_SUCCESS
((
num_
to_proces
s
>
info
.
drv
.
queue_size_lim
),
TEST_ASSERT_SUCCESS
((
num_
op
s
>
info
.
drv
.
queue_size_lim
),
"NUM_OPS cannot exceed %u for this device"
,
info
.
drv
.
queue_size_lim
);
bufs
=
&
tp
->
op_params
->
q_bufs
[
GET_SOCKET
(
info
.
socket_id
)][
queue_id
];
rte_atomic16_clear
(
&
tp
->
processing_status
);
rte_atomic16_clear
(
&
tp
->
nb_dequeued
);
//&op_params->q_bufs[socket_id][queue_id].inputs
//printf("bufs len %d\n",bufs->input.data->data_len);
while
(
rte_atomic16_read
(
&
tp
->
op_params
->
sync
)
==
SYNC_WAIT
)
rte_pause
();
ret
=
rte_bbdev_dec_op_alloc_bulk
(
tp
->
op_params
->
mp
,
ops
,
num_to_process
);
TEST_ASSERT_SUCCESS
(
ret
,
"Allocation failed for %d ops"
,
num_to_process
);
ret
=
rte_bbdev_dec_op_alloc_bulk
(
tp
->
op_params
->
mp
,
ops_enq
,
num_ops
);
TEST_ASSERT_SUCCESS
(
ret
,
"Allocation failed for %d ops"
,
num_ops
);
/* For throughput tests we need to disable early termination */
if
(
check_bit
(
ref_op
->
ldpc_dec
.
op_flags
,
RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE
))
ref_op
->
ldpc_dec
.
op_flags
-=
RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE
;
ref_op
->
ldpc_dec
.
iter_max
=
get_iter_max
();
ref_op
->
ldpc_dec
.
iter_count
=
ref_op
->
ldpc_dec
.
iter_max
;
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
copy_reference_
dec_op
(
ops
,
num_to_proces
s
,
0
,
bufs
->
inputs
,
copy_reference_
ldpc_dec_op
(
ops_enq
,
num_op
s
,
0
,
bufs
->
inputs
,
bufs
->
hard_outputs
,
bufs
->
soft_outputs
,
tp
->
op_params
->
ref_dec
_op
);
bufs
->
harq_inputs
,
bufs
->
harq_outputs
,
ref
_op
);
/* Set counter to validate the ordering */
for
(
j
=
0
;
j
<
num_
to_proces
s
;
++
j
)
ops
[
j
]
->
opaque_data
=
(
void
*
)(
uintptr_t
)
j
;
for
(
j
=
0
;
j
<
num_
op
s
;
++
j
)
ops
_enq
[
j
]
->
opaque_data
=
(
void
*
)(
uintptr_t
)
j
;
for
(
j
=
0
;
j
<
TEST_REPETITIONS
;
++
j
)
{
for
(
i
=
0
;
i
<
num_to_process
;
++
i
)
rte_pktmbuf_reset
(
ops
[
i
]
->
turbo_dec
.
hard_output
.
data
);
for
(
i
=
0
;
i
<
TEST_REPETITIONS
;
++
i
)
{
for
(
j
=
0
;
j
<
num_ops
;
++
j
)
{
if
(
!
loopback
)
mbuf_reset
(
ops_enq
[
j
]
->
ldpc_dec
.
hard_output
.
data
);
if
(
hc_out
||
loopback
)
mbuf_reset
(
ops_enq
[
j
]
->
ldpc_dec
.
harq_combined_output
.
data
);
}
if
(
extDdr
)
{
bool
preload
=
i
==
(
TEST_REPETITIONS
-
1
);
preload_harq_ddr
(
tp
->
dev_id
,
queue_id
,
ops_enq
,
num_ops
,
preload
);
}
start_time
=
rte_rdtsc_precise
();
tp
->
start_time
=
rte_rdtsc_precise
();
for
(
enqueued
=
0
;
enqueued
<
num_to_process
;)
{
for
(
enq
=
0
,
deq
=
0
;
enq
<
num_ops
;)
{
num_to_enq
=
burst_sz
;
if
(
unlikely
(
num_to_process
-
enqueued
<
num_to_enq
))
num_to_enq
=
num_to_process
-
enqueued
;
if
(
unlikely
(
num_ops
-
enq
<
num_to_enq
))
num_to_enq
=
num_ops
-
enq
;
//printf("pmd lcore ldpc dec data %x\n", *ops_enq[enq]->ldpc_dec.input.addr);
enq
=
0
;
do
{
enq
+=
rte_bbdev_enqueue_dec_ops
(
tp
->
dev_id
,
queue_id
,
&
ops
[
enqueued
],
num_to_enq
);
}
while
(
unlikely
(
num_to_enq
!=
enq
));
enqueued
+=
enq
;
enq
+=
rte_bbdev_enqueue_ldpc_dec_ops
(
tp
->
dev_id
,
queue_id
,
&
ops_enq
[
enq
],
num_to_enq
);
/* Write to thread burst_sz current number of enqueued
* descriptors. It ensures that proper number of
* descriptors will be dequeued in callback
* function - needed for last batch in case where
* the number of operations is not a multiple of
* burst size.
*/
rte_atomic16_set
(
&
tp
->
burst_sz
,
num_to_enq
);
deq
+=
rte_bbdev_dequeue_ldpc_dec_ops
(
tp
->
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
enq
-
deq
);
/* Wait until processing of previous batch is
* completed
*/
while
(
rte_atomic16_read
(
&
tp
->
nb_dequeued
)
!=
(
int16_t
)
enqueued
)
rte_pause
();
/* ops_td = &ops_deq[enq]->ldpc_dec;
hard_output = &ops_td->hard_output;
struct rte_mbuf *m = hard_output->data;
printf("deq nb segs %d\n", m->nb_segs);
*/
}
/* dequeue the remaining */
while
(
deq
<
enq
)
{
deq
+=
rte_bbdev_dequeue_ldpc_dec_ops
(
tp
->
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
enq
-
deq
);
}
if
(
j
!=
TEST_REPETITIONS
-
1
)
rte_atomic16_clear
(
&
tp
->
nb_dequeued
);
total_time
+=
rte_rdtsc_precise
()
-
start_time
;
}
tp
->
iter_count
=
0
;
/* get the max of iter_count for all dequeued ops */
for
(
i
=
0
;
i
<
num_ops
;
++
i
)
{
tp
->
iter_count
=
RTE_MAX
(
ops_enq
[
i
]
->
ldpc_dec
.
iter_count
,
tp
->
iter_count
);
}
if
(
extDdr
)
{
/* Read loopback is not thread safe */
retrieve_harq_ddr
(
tp
->
dev_id
,
queue_id
,
ops_enq
,
num_ops
);
}
//if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
//printf("op type != OP NONE\n");
ret
=
validate_ldpc_dec_op
(
ops_deq
,
num_ops
,
ref_op
,
tp
->
op_params
->
vector_mask
);
TEST_ASSERT_SUCCESS
(
ret
,
"Validation failed!"
);
//}
rte_bbdev_dec_op_free_bulk
(
ops_enq
,
num_ops
);
double
tb_len_bits
=
calc_ldpc_dec_TB_size
(
ref_op
);
tp
->
ops_per_sec
=
((
double
)
num_ops
*
TEST_REPETITIONS
)
/
((
double
)
total_time
/
(
double
)
rte_get_tsc_hz
());
tp
->
mbps
=
(((
double
)(
num_ops
*
TEST_REPETITIONS
*
tb_len_bits
))
/
1000000
.
0
)
/
((
double
)
total_time
/
(
double
)
rte_get_tsc_hz
());
return
TEST_SUCCESS
;
}
static
int
throughput_
intr_lcore
_enc
(
void
*
arg
)
throughput_
pmd_lcore_ldpc
_enc
(
void
*
arg
)
{
struct
thread_params
*
tp
=
arg
;
unsigned
int
enqueued
;
uint16_t
enq
,
deq
;
uint64_t
total_time
=
0
,
start_time
;
const
uint16_t
queue_id
=
tp
->
queue_id
;
const
uint16_t
burst_sz
=
tp
->
op_params
->
burst_sz
;
const
uint16_t
num_to_process
=
tp
->
op_params
->
num_to_process
;
struct
rte_bbdev_enc_op
*
ops
[
num_to_process
];
const
uint16_t
num_ops
=
tp
->
op_params
->
num_to_process
;
struct
rte_bbdev_enc_op
*
ops_enq
[
num_ops
];
struct
rte_bbdev_enc_op
*
ops_deq
[
num_ops
];
struct
rte_bbdev_enc_op
*
ref_op
=
tp
->
op_params
->
ref_enc_op
;
struct
test_buffers
*
bufs
=
NULL
;
int
i
,
j
,
ret
;
struct
rte_bbdev_info
info
;
int
ret
,
i
,
j
;
uint16_t
num_to_enq
,
enq
;
uint16_t
num_to_enq
;
TEST_ASSERT_SUCCESS
((
burst_sz
>
MAX_BURST
),
"BURST_SIZE should be <= %u"
,
MAX_BURST
);
TEST_ASSERT_SUCCESS
(
rte_bbdev_queue_intr_enable
(
tp
->
dev_id
,
queue_id
),
"Failed to enable interrupts for dev: %u, queue_id: %u"
,
tp
->
dev_id
,
queue_id
);
rte_bbdev_info_get
(
tp
->
dev_id
,
&
info
);
TEST_ASSERT_SUCCESS
((
num_
to_proces
s
>
info
.
drv
.
queue_size_lim
),
TEST_ASSERT_SUCCESS
((
num_
op
s
>
info
.
drv
.
queue_size_lim
),
"NUM_OPS cannot exceed %u for this device"
,
info
.
drv
.
queue_size_lim
);
bufs
=
&
tp
->
op_params
->
q_bufs
[
GET_SOCKET
(
info
.
socket_id
)][
queue_id
];
rte_atomic16_clear
(
&
tp
->
processing_status
);
rte_atomic16_clear
(
&
tp
->
nb_dequeued
);
while
(
rte_atomic16_read
(
&
tp
->
op_params
->
sync
)
==
SYNC_WAIT
)
rte_pause
();
ret
=
rte_bbdev_enc_op_alloc_bulk
(
tp
->
op_params
->
mp
,
ops
,
num_
to_proces
s
);
ret
=
rte_bbdev_enc_op_alloc_bulk
(
tp
->
op_params
->
mp
,
ops
_enq
,
num_
op
s
);
TEST_ASSERT_SUCCESS
(
ret
,
"Allocation failed for %d ops"
,
num_
to_proces
s
);
num_
op
s
);
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
copy_reference_
enc_op
(
ops
,
num_to_proces
s
,
0
,
bufs
->
inputs
,
bufs
->
hard_outputs
,
tp
->
op_params
->
ref_enc
_op
);
copy_reference_
ldpc_enc_op
(
ops_enq
,
num_op
s
,
0
,
bufs
->
inputs
,
bufs
->
hard_outputs
,
ref
_op
);
/* Set counter to validate the ordering */
for
(
j
=
0
;
j
<
num_
to_proces
s
;
++
j
)
ops
[
j
]
->
opaque_data
=
(
void
*
)(
uintptr_t
)
j
;
for
(
j
=
0
;
j
<
num_
op
s
;
++
j
)
ops
_enq
[
j
]
->
opaque_data
=
(
void
*
)(
uintptr_t
)
j
;
for
(
j
=
0
;
j
<
TEST_REPETITIONS
;
++
j
)
{
for
(
i
=
0
;
i
<
num_to_process
;
++
i
)
rte_pktmbuf_reset
(
ops
[
i
]
->
turbo_enc
.
output
.
data
);
for
(
i
=
0
;
i
<
TEST_REPETITIONS
;
++
i
)
{
tp
->
start_time
=
rte_rdtsc_precise
();
for
(
enqueued
=
0
;
enqueued
<
num_to_process
;)
{
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
for
(
j
=
0
;
j
<
num_ops
;
++
j
)
mbuf_reset
(
ops_enq
[
j
]
->
turbo_enc
.
output
.
data
);
start_time
=
rte_rdtsc_precise
();
for
(
enq
=
0
,
deq
=
0
;
enq
<
num_ops
;)
{
num_to_enq
=
burst_sz
;
if
(
unlikely
(
num_
to_process
-
enqueued
<
num_to_enq
))
num_to_enq
=
num_
to_process
-
enqueued
;
if
(
unlikely
(
num_
ops
-
enq
<
num_to_enq
))
num_to_enq
=
num_
ops
-
enq
;
enq
=
0
;
do
{
enq
+=
rte_bbdev_enqueue_enc_ops
(
tp
->
dev_id
,
queue_id
,
&
ops
[
enqueued
],
num_to_enq
);
}
while
(
unlikely
(
enq
!=
num_to_enq
));
enqueued
+=
enq
;
enq
+=
rte_bbdev_enqueue_ldpc_enc_ops
(
tp
->
dev_id
,
queue_id
,
&
ops_enq
[
enq
],
num_to_enq
);
/* Write to thread burst_sz current number of enqueued
* descriptors. It ensures that proper number of
* descriptors will be dequeued in callback
* function - needed for last batch in case where
* the number of operations is not a multiple of
* burst size.
*/
rte_atomic16_set
(
&
tp
->
burst_sz
,
num_to_enq
);
deq
+=
rte_bbdev_dequeue_ldpc_enc_ops
(
tp
->
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
enq
-
deq
);
}
/* Wait until processing of previous batch is
* completed
*/
while
(
rte_atomic16_read
(
&
tp
->
nb_dequeued
)
!=
(
int16_t
)
enqueued
)
rte_pause
();
/* dequeue the remaining */
while
(
deq
<
enq
)
{
deq
+=
rte_bbdev_dequeue_ldpc_enc_ops
(
tp
->
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
enq
-
deq
);
}
if
(
j
!=
TEST_REPETITIONS
-
1
)
rte_atomic16_clear
(
&
tp
->
nb_dequeued
)
;
total_time
+=
rte_rdtsc_precise
()
-
start_time
;
}
return
TEST_SUCCESS
;
}
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
{
ret
=
validate_ldpc_enc_op
(
ops_deq
,
num_ops
,
ref_op
);
TEST_ASSERT_SUCCESS
(
ret
,
"Validation failed!"
);
}
rte_bbdev_enc_op_free_bulk
(
ops_enq
,
num_ops
);
static
int
throughput_intr_lcore_ldpc_enc
(
void
*
arg
)
{
struct
thread_params
*
tp
=
arg
;
unsigned
int
enqueued
;
const
uint16_t
queue_id
=
tp
->
queue_id
;
const
uint16_t
burst_sz
=
tp
->
op_params
->
burst_sz
;
const
uint16_t
num_to_process
=
tp
->
op_params
->
num_to_process
;
struct
rte_bbdev_enc_op
*
ops
[
num_to_process
];
struct
test_buffers
*
bufs
=
NULL
;
struct
rte_bbdev_info
info
;
int
ret
,
i
,
j
;
uint16_t
num_to_enq
,
enq
;
TEST_ASSERT_SUCCESS
((
burst_sz
>
MAX_BURST
),
"BURST_SIZE should be <= %u"
,
MAX_BURST
);
TEST_ASSERT_SUCCESS
(
rte_bbdev_queue_intr_enable
(
tp
->
dev_id
,
queue_id
),
"Failed to enable interrupts for dev: %u, queue_id: %u"
,
tp
->
dev_id
,
queue_id
);
rte_bbdev_info_get
(
tp
->
dev_id
,
&
info
);
TEST_ASSERT_SUCCESS
((
num_to_process
>
info
.
drv
.
queue_size_lim
),
"NUM_OPS cannot exceed %u for this device"
,
info
.
drv
.
queue_size_lim
);
bufs
=
&
tp
->
op_params
->
q_bufs
[
GET_SOCKET
(
info
.
socket_id
)][
queue_id
];
rte_atomic16_clear
(
&
tp
->
processing_status
);
rte_atomic16_clear
(
&
tp
->
nb_dequeued
);
while
(
rte_atomic16_read
(
&
tp
->
op_params
->
sync
)
==
SYNC_WAIT
)
rte_pause
();
ret
=
rte_bbdev_enc_op_alloc_bulk
(
tp
->
op_params
->
mp
,
ops
,
num_to_process
);
TEST_ASSERT_SUCCESS
(
ret
,
"Allocation failed for %d ops"
,
num_to_process
);
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
copy_reference_ldpc_enc_op
(
ops
,
num_to_process
,
0
,
bufs
->
inputs
,
bufs
->
hard_outputs
,
tp
->
op_params
->
ref_enc_op
);
/* Set counter to validate the ordering */
for
(
j
=
0
;
j
<
num_to_process
;
++
j
)
ops
[
j
]
->
opaque_data
=
(
void
*
)(
uintptr_t
)
j
;
for
(
j
=
0
;
j
<
TEST_REPETITIONS
;
++
j
)
{
for
(
i
=
0
;
i
<
num_to_process
;
++
i
)
rte_pktmbuf_reset
(
ops
[
i
]
->
turbo_enc
.
output
.
data
);
tp
->
start_time
=
rte_rdtsc_precise
();
for
(
enqueued
=
0
;
enqueued
<
num_to_process
;)
{
num_to_enq
=
burst_sz
;
if
(
unlikely
(
num_to_process
-
enqueued
<
num_to_enq
))
num_to_enq
=
num_to_process
-
enqueued
;
enq
=
0
;
do
{
enq
+=
rte_bbdev_enqueue_ldpc_enc_ops
(
tp
->
dev_id
,
queue_id
,
&
ops
[
enqueued
],
num_to_enq
);
}
while
(
unlikely
(
enq
!=
num_to_enq
));
enqueued
+=
enq
;
/* Write to thread burst_sz current number of enqueued
* descriptors. It ensures that proper number of
* descriptors will be dequeued in callback
* function - needed for last batch in case where
* the number of operations is not a multiple of
* burst size.
*/
rte_atomic16_set
(
&
tp
->
burst_sz
,
num_to_enq
);
/* Wait until processing of previous batch is
* completed
*/
while
(
rte_atomic16_read
(
&
tp
->
nb_dequeued
)
!=
(
int16_t
)
enqueued
)
rte_pause
();
}
if
(
j
!=
TEST_REPETITIONS
-
1
)
rte_atomic16_clear
(
&
tp
->
nb_dequeued
);
}
return
TEST_SUCCESS
;
}
static
int
throughput_pmd_lcore_dec
(
void
*
arg
)
{
struct
thread_params
*
tp
=
arg
;
uint16_t
enq
,
deq
;
uint64_t
total_time
=
0
,
start_time
;
const
uint16_t
queue_id
=
tp
->
queue_id
;
const
uint16_t
burst_sz
=
tp
->
op_params
->
burst_sz
;
const
uint16_t
num_ops
=
tp
->
op_params
->
num_to_process
;
struct
rte_bbdev_dec_op
*
ops_enq
[
num_ops
];
struct
rte_bbdev_dec_op
*
ops_deq
[
num_ops
];
struct
rte_bbdev_dec_op
*
ref_op
=
tp
->
op_params
->
ref_dec_op
;
struct
test_buffers
*
bufs
=
NULL
;
int
i
,
j
,
ret
;
struct
rte_bbdev_info
info
;
uint16_t
num_to_enq
;
TEST_ASSERT_SUCCESS
((
burst_sz
>
MAX_BURST
),
"BURST_SIZE should be <= %u"
,
MAX_BURST
);
rte_bbdev_info_get
(
tp
->
dev_id
,
&
info
);
TEST_ASSERT_SUCCESS
((
num_ops
>
info
.
drv
.
queue_size_lim
),
"NUM_OPS cannot exceed %u for this device"
,
info
.
drv
.
queue_size_lim
);
bufs
=
&
tp
->
op_params
->
q_bufs
[
GET_SOCKET
(
info
.
socket_id
)][
queue_id
];
while
(
rte_atomic16_read
(
&
tp
->
op_params
->
sync
)
==
SYNC_WAIT
)
rte_pause
();
ret
=
rte_bbdev_dec_op_alloc_bulk
(
tp
->
op_params
->
mp
,
ops_enq
,
num_ops
);
TEST_ASSERT_SUCCESS
(
ret
,
"Allocation failed for %d ops"
,
num_ops
);
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
copy_reference_dec_op
(
ops_enq
,
num_ops
,
0
,
bufs
->
inputs
,
bufs
->
hard_outputs
,
bufs
->
soft_outputs
,
ref_op
);
/* Set counter to validate the ordering */
for
(
j
=
0
;
j
<
num_ops
;
++
j
)
ops_enq
[
j
]
->
opaque_data
=
(
void
*
)(
uintptr_t
)
j
;
for
(
i
=
0
;
i
<
TEST_REPETITIONS
;
++
i
)
{
for
(
j
=
0
;
j
<
num_ops
;
++
j
)
mbuf_reset
(
ops_enq
[
j
]
->
turbo_dec
.
hard_output
.
data
);
start_time
=
rte_rdtsc_precise
();
for
(
enq
=
0
,
deq
=
0
;
enq
<
num_ops
;)
{
num_to_enq
=
burst_sz
;
if
(
unlikely
(
num_ops
-
enq
<
num_to_enq
))
num_to_enq
=
num_ops
-
enq
;
enq
+=
rte_bbdev_enqueue_dec_ops
(
tp
->
dev_id
,
queue_id
,
&
ops_enq
[
enq
],
num_to_enq
);
deq
+=
rte_bbdev_dequeue_dec_ops
(
tp
->
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
enq
-
deq
);
}
/* dequeue the remaining */
while
(
deq
<
enq
)
{
deq
+=
rte_bbdev_dequeue_dec_ops
(
tp
->
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
enq
-
deq
);
}
total_time
+=
rte_rdtsc_precise
()
-
start_time
;
}
tp
->
iter_count
=
0
;
/* get the max of iter_count for all dequeued ops */
for
(
i
=
0
;
i
<
num_ops
;
++
i
)
{
tp
->
iter_count
=
RTE_MAX
(
ops_enq
[
i
]
->
turbo_dec
.
iter_count
,
tp
->
iter_count
);
}
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
{
ret
=
validate_dec_op
(
ops_deq
,
num_ops
,
ref_op
,
tp
->
op_params
->
vector_mask
);
TEST_ASSERT_SUCCESS
(
ret
,
"Validation failed!"
);
}
rte_bbdev_dec_op_free_bulk
(
ops_enq
,
num_ops
);
double
tb_len_bits
=
calc_dec_TB_size
(
ref_op
);
tp
->
ops_per_sec
=
((
double
)
num_ops
*
TEST_REPETITIONS
)
/
((
double
)
total_time
/
(
double
)
rte_get_tsc_hz
());
tp
->
mbps
=
(((
double
)(
num_ops
*
TEST_REPETITIONS
*
tb_len_bits
))
/
1000000
.
0
)
/
((
double
)
total_time
/
(
double
)
rte_get_tsc_hz
());
return
TEST_SUCCESS
;
}
static
int
bler_pmd_lcore_ldpc_dec
(
void
*
arg
)
{
struct
thread_params
*
tp
=
arg
;
uint16_t
enq
,
deq
;
uint64_t
total_time
=
0
,
start_time
;
const
uint16_t
queue_id
=
tp
->
queue_id
;
const
uint16_t
burst_sz
=
tp
->
op_params
->
burst_sz
;
const
uint16_t
num_ops
=
tp
->
op_params
->
num_to_process
;
struct
rte_bbdev_dec_op
*
ops_enq
[
num_ops
];
struct
rte_bbdev_dec_op
*
ops_deq
[
num_ops
];
struct
rte_bbdev_dec_op
*
ref_op
=
tp
->
op_params
->
ref_dec_op
;
struct
test_buffers
*
bufs
=
NULL
;
int
i
,
j
,
ret
;
float
parity_bler
=
0
;
struct
rte_bbdev_info
info
;
uint16_t
num_to_enq
;
bool
extDdr
=
check_bit
(
ldpc_cap_flags
,
RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE
);
bool
loopback
=
check_bit
(
ref_op
->
ldpc_dec
.
op_flags
,
RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK
);
bool
hc_out
=
check_bit
(
ref_op
->
ldpc_dec
.
op_flags
,
RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE
);
TEST_ASSERT_SUCCESS
((
burst_sz
>
MAX_BURST
),
"BURST_SIZE should be <= %u"
,
MAX_BURST
);
rte_bbdev_info_get
(
tp
->
dev_id
,
&
info
);
TEST_ASSERT_SUCCESS
((
num_ops
>
info
.
drv
.
queue_size_lim
),
"NUM_OPS cannot exceed %u for this device"
,
info
.
drv
.
queue_size_lim
);
bufs
=
&
tp
->
op_params
->
q_bufs
[
GET_SOCKET
(
info
.
socket_id
)][
queue_id
];
while
(
rte_atomic16_read
(
&
tp
->
op_params
->
sync
)
==
SYNC_WAIT
)
rte_pause
();
ret
=
rte_bbdev_dec_op_alloc_bulk
(
tp
->
op_params
->
mp
,
ops_enq
,
num_ops
);
TEST_ASSERT_SUCCESS
(
ret
,
"Allocation failed for %d ops"
,
num_ops
);
/* For BLER tests we need to enable early termination */
if
(
!
check_bit
(
ref_op
->
ldpc_dec
.
op_flags
,
RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE
))
ref_op
->
ldpc_dec
.
op_flags
+=
RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE
;
ref_op
->
ldpc_dec
.
iter_max
=
get_iter_max
();
ref_op
->
ldpc_dec
.
iter_count
=
ref_op
->
ldpc_dec
.
iter_max
;
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
copy_reference_ldpc_dec_op
(
ops_enq
,
num_ops
,
0
,
bufs
->
inputs
,
bufs
->
hard_outputs
,
bufs
->
soft_outputs
,
bufs
->
harq_inputs
,
bufs
->
harq_outputs
,
ref_op
);
generate_llr_input
(
num_ops
,
bufs
->
inputs
,
ref_op
);
/* Set counter to validate the ordering */
for
(
j
=
0
;
j
<
num_ops
;
++
j
)
ops_enq
[
j
]
->
opaque_data
=
(
void
*
)(
uintptr_t
)
j
;
for
(
i
=
0
;
i
<
1
;
++
i
)
{
/* Could add more iterations */
for
(
j
=
0
;
j
<
num_ops
;
++
j
)
{
if
(
!
loopback
)
mbuf_reset
(
ops_enq
[
j
]
->
ldpc_dec
.
hard_output
.
data
);
if
(
hc_out
||
loopback
)
mbuf_reset
(
ops_enq
[
j
]
->
ldpc_dec
.
harq_combined_output
.
data
);
}
if
(
extDdr
)
{
bool
preload
=
i
==
(
TEST_REPETITIONS
-
1
);
preload_harq_ddr
(
tp
->
dev_id
,
queue_id
,
ops_enq
,
num_ops
,
preload
);
}
start_time
=
rte_rdtsc_precise
();
for
(
enq
=
0
,
deq
=
0
;
enq
<
num_ops
;)
{
num_to_enq
=
burst_sz
;
if
(
unlikely
(
num_ops
-
enq
<
num_to_enq
))
num_to_enq
=
num_ops
-
enq
;
enq
+=
rte_bbdev_enqueue_ldpc_dec_ops
(
tp
->
dev_id
,
queue_id
,
&
ops_enq
[
enq
],
num_to_enq
);
deq
+=
rte_bbdev_dequeue_ldpc_dec_ops
(
tp
->
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
enq
-
deq
);
}
/* dequeue the remaining */
while
(
deq
<
enq
)
{
deq
+=
rte_bbdev_dequeue_ldpc_dec_ops
(
tp
->
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
enq
-
deq
);
}
total_time
+=
rte_rdtsc_precise
()
-
start_time
;
}
tp
->
iter_count
=
0
;
tp
->
iter_average
=
0
;
/* get the max of iter_count for all dequeued ops */
for
(
i
=
0
;
i
<
num_ops
;
++
i
)
{
tp
->
iter_count
=
RTE_MAX
(
ops_enq
[
i
]
->
ldpc_dec
.
iter_count
,
tp
->
iter_count
);
tp
->
iter_average
+=
(
double
)
ops_enq
[
i
]
->
ldpc_dec
.
iter_count
;
if
(
ops_enq
[
i
]
->
status
&
(
1
<<
RTE_BBDEV_SYNDROME_ERROR
))
parity_bler
+=
1
.
0
;
}
parity_bler
/=
num_ops
;
/* This one is based on SYND */
tp
->
iter_average
/=
num_ops
;
tp
->
bler
=
(
double
)
validate_ldpc_bler
(
ops_deq
,
num_ops
)
/
num_ops
;
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
&&
tp
->
bler
==
0
&&
parity_bler
==
0
&&
!
hc_out
)
{
ret
=
validate_ldpc_dec_op
(
ops_deq
,
num_ops
,
ref_op
,
tp
->
op_params
->
vector_mask
);
TEST_ASSERT_SUCCESS
(
ret
,
"Validation failed!"
);
}
rte_bbdev_dec_op_free_bulk
(
ops_enq
,
num_ops
);
double
tb_len_bits
=
calc_ldpc_dec_TB_size
(
ref_op
);
tp
->
ops_per_sec
=
((
double
)
num_ops
*
1
)
/
((
double
)
total_time
/
(
double
)
rte_get_tsc_hz
());
tp
->
mbps
=
(((
double
)(
num_ops
*
1
*
tb_len_bits
))
/
1000000
.
0
)
/
((
double
)
total_time
/
(
double
)
rte_get_tsc_hz
());
return
TEST_SUCCESS
;
}
static
int
throughput_pmd_lcore_ldpc_dec
(
void
*
arg
)
{
struct
thread_params
*
tp
=
arg
;
uint16_t
enq
,
deq
;
uint64_t
total_time
=
0
,
start_time
;
const
uint16_t
queue_id
=
tp
->
queue_id
;
const
uint16_t
burst_sz
=
tp
->
op_params
->
burst_sz
;
const
uint16_t
num_ops
=
tp
->
op_params
->
num_to_process
;
struct
rte_bbdev_dec_op
*
ops_enq
[
num_ops
];
struct
rte_bbdev_dec_op
*
ops_deq
[
num_ops
];
struct
rte_bbdev_dec_op
*
ref_op
=
tp
->
op_params
->
ref_dec_op
;
struct
test_buffers
*
bufs
=
NULL
;
int
i
,
j
,
ret
;
struct
rte_bbdev_info
info
;
uint16_t
num_to_enq
;
struct
rte_bbdev_op_data
*
hard_output
;
struct
rte_bbdev_op_ldpc_dec
*
ops_td
;
bool
extDdr
=
check_bit
(
ldpc_cap_flags
,
RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE
);
bool
loopback
=
check_bit
(
ref_op
->
ldpc_dec
.
op_flags
,
RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK
);
bool
hc_out
=
check_bit
(
ref_op
->
ldpc_dec
.
op_flags
,
RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE
);
TEST_ASSERT_SUCCESS
((
burst_sz
>
MAX_BURST
),
"BURST_SIZE should be <= %u"
,
MAX_BURST
);
rte_bbdev_info_get
(
tp
->
dev_id
,
&
info
);
TEST_ASSERT_SUCCESS
((
num_ops
>
info
.
drv
.
queue_size_lim
),
"NUM_OPS cannot exceed %u for this device"
,
info
.
drv
.
queue_size_lim
);
bufs
=
&
tp
->
op_params
->
q_bufs
[
GET_SOCKET
(
info
.
socket_id
)][
queue_id
];
//&op_params->q_bufs[socket_id][queue_id].inputs
//printf("bufs len %d\n",bufs->input.data->data_len);
while
(
rte_atomic16_read
(
&
tp
->
op_params
->
sync
)
==
SYNC_WAIT
)
rte_pause
();
ret
=
rte_bbdev_dec_op_alloc_bulk
(
tp
->
op_params
->
mp
,
ops_enq
,
num_ops
);
TEST_ASSERT_SUCCESS
(
ret
,
"Allocation failed for %d ops"
,
num_ops
);
/* For throughput tests we need to disable early termination */
if
(
check_bit
(
ref_op
->
ldpc_dec
.
op_flags
,
RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE
))
ref_op
->
ldpc_dec
.
op_flags
-=
RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE
;
ref_op
->
ldpc_dec
.
iter_max
=
get_iter_max
();
ref_op
->
ldpc_dec
.
iter_count
=
ref_op
->
ldpc_dec
.
iter_max
;
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
copy_reference_ldpc_dec_op
(
ops_enq
,
num_ops
,
0
,
bufs
->
inputs
,
bufs
->
hard_outputs
,
bufs
->
soft_outputs
,
bufs
->
harq_inputs
,
bufs
->
harq_outputs
,
ref_op
);
/* Set counter to validate the ordering */
for
(
j
=
0
;
j
<
num_ops
;
++
j
)
ops_enq
[
j
]
->
opaque_data
=
(
void
*
)(
uintptr_t
)
j
;
for
(
i
=
0
;
i
<
TEST_REPETITIONS
;
++
i
)
{
for
(
j
=
0
;
j
<
num_ops
;
++
j
)
{
if
(
!
loopback
)
mbuf_reset
(
ops_enq
[
j
]
->
ldpc_dec
.
hard_output
.
data
);
if
(
hc_out
||
loopback
)
mbuf_reset
(
ops_enq
[
j
]
->
ldpc_dec
.
harq_combined_output
.
data
);
}
if
(
extDdr
)
{
bool
preload
=
i
==
(
TEST_REPETITIONS
-
1
);
preload_harq_ddr
(
tp
->
dev_id
,
queue_id
,
ops_enq
,
num_ops
,
preload
);
}
start_time
=
rte_rdtsc_precise
();
for
(
enq
=
0
,
deq
=
0
;
enq
<
num_ops
;)
{
num_to_enq
=
burst_sz
;
if
(
unlikely
(
num_ops
-
enq
<
num_to_enq
))
num_to_enq
=
num_ops
-
enq
;
//printf("pmd lcore ldpc dec data %x\n", *ops_enq[enq]->ldpc_dec.input.addr);
enq
+=
rte_bbdev_enqueue_ldpc_dec_ops
(
tp
->
dev_id
,
queue_id
,
&
ops_enq
[
enq
],
num_to_enq
);
deq
+=
rte_bbdev_dequeue_ldpc_dec_ops
(
tp
->
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
enq
-
deq
);
/* ops_td = &ops_deq[enq]->ldpc_dec;
hard_output = &ops_td->hard_output;
struct rte_mbuf *m = hard_output->data;
printf("deq nb segs %d\n", m->nb_segs);
*/
}
/* dequeue the remaining */
while
(
deq
<
enq
)
{
deq
+=
rte_bbdev_dequeue_ldpc_dec_ops
(
tp
->
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
enq
-
deq
);
}
total_time
+=
rte_rdtsc_precise
()
-
start_time
;
}
tp
->
iter_count
=
0
;
/* get the max of iter_count for all dequeued ops */
for
(
i
=
0
;
i
<
num_ops
;
++
i
)
{
tp
->
iter_count
=
RTE_MAX
(
ops_enq
[
i
]
->
ldpc_dec
.
iter_count
,
tp
->
iter_count
);
}
if
(
extDdr
)
{
/* Read loopback is not thread safe */
retrieve_harq_ddr
(
tp
->
dev_id
,
queue_id
,
ops_enq
,
num_ops
);
}
//if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
//printf("op type != OP NONE\n");
ret
=
validate_ldpc_dec_op
(
ops_deq
,
num_ops
,
ref_op
,
tp
->
op_params
->
vector_mask
);
TEST_ASSERT_SUCCESS
(
ret
,
"Validation failed!"
);
//}
rte_bbdev_dec_op_free_bulk
(
ops_enq
,
num_ops
);
double
tb_len_bits
=
calc_ldpc_dec_TB_size
(
ref_op
);
tp
->
ops_per_sec
=
((
double
)
num_ops
*
TEST_REPETITIONS
)
/
((
double
)
total_time
/
(
double
)
rte_get_tsc_hz
());
tp
->
mbps
=
(((
double
)(
num_ops
*
TEST_REPETITIONS
*
tb_len_bits
))
/
1000000
.
0
)
/
((
double
)
total_time
/
(
double
)
rte_get_tsc_hz
());
return
TEST_SUCCESS
;
}
static
int
throughput_pmd_lcore_enc
(
void
*
arg
)
{
struct
thread_params
*
tp
=
arg
;
uint16_t
enq
,
deq
;
uint64_t
total_time
=
0
,
start_time
;
const
uint16_t
queue_id
=
tp
->
queue_id
;
const
uint16_t
burst_sz
=
tp
->
op_params
->
burst_sz
;
const
uint16_t
num_ops
=
tp
->
op_params
->
num_to_process
;
struct
rte_bbdev_enc_op
*
ops_enq
[
num_ops
];
struct
rte_bbdev_enc_op
*
ops_deq
[
num_ops
];
struct
rte_bbdev_enc_op
*
ref_op
=
tp
->
op_params
->
ref_enc_op
;
struct
test_buffers
*
bufs
=
NULL
;
int
i
,
j
,
ret
;
struct
rte_bbdev_info
info
;
uint16_t
num_to_enq
;
TEST_ASSERT_SUCCESS
((
burst_sz
>
MAX_BURST
),
"BURST_SIZE should be <= %u"
,
MAX_BURST
);
rte_bbdev_info_get
(
tp
->
dev_id
,
&
info
);
TEST_ASSERT_SUCCESS
((
num_ops
>
info
.
drv
.
queue_size_lim
),
"NUM_OPS cannot exceed %u for this device"
,
info
.
drv
.
queue_size_lim
);
bufs
=
&
tp
->
op_params
->
q_bufs
[
GET_SOCKET
(
info
.
socket_id
)][
queue_id
];
while
(
rte_atomic16_read
(
&
tp
->
op_params
->
sync
)
==
SYNC_WAIT
)
rte_pause
();
ret
=
rte_bbdev_enc_op_alloc_bulk
(
tp
->
op_params
->
mp
,
ops_enq
,
num_ops
);
TEST_ASSERT_SUCCESS
(
ret
,
"Allocation failed for %d ops"
,
num_ops
);
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
copy_reference_enc_op
(
ops_enq
,
num_ops
,
0
,
bufs
->
inputs
,
bufs
->
hard_outputs
,
ref_op
);
/* Set counter to validate the ordering */
for
(
j
=
0
;
j
<
num_ops
;
++
j
)
ops_enq
[
j
]
->
opaque_data
=
(
void
*
)(
uintptr_t
)
j
;
for
(
i
=
0
;
i
<
TEST_REPETITIONS
;
++
i
)
{
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
for
(
j
=
0
;
j
<
num_ops
;
++
j
)
mbuf_reset
(
ops_enq
[
j
]
->
turbo_enc
.
output
.
data
);
start_time
=
rte_rdtsc_precise
();
for
(
enq
=
0
,
deq
=
0
;
enq
<
num_ops
;)
{
num_to_enq
=
burst_sz
;
if
(
unlikely
(
num_ops
-
enq
<
num_to_enq
))
num_to_enq
=
num_ops
-
enq
;
enq
+=
rte_bbdev_enqueue_enc_ops
(
tp
->
dev_id
,
queue_id
,
&
ops_enq
[
enq
],
num_to_enq
);
deq
+=
rte_bbdev_dequeue_enc_ops
(
tp
->
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
enq
-
deq
);
}
/* dequeue the remaining */
while
(
deq
<
enq
)
{
deq
+=
rte_bbdev_dequeue_enc_ops
(
tp
->
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
enq
-
deq
);
}
total_time
+=
rte_rdtsc_precise
()
-
start_time
;
}
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
{
ret
=
validate_enc_op
(
ops_deq
,
num_ops
,
ref_op
);
TEST_ASSERT_SUCCESS
(
ret
,
"Validation failed!"
);
}
rte_bbdev_enc_op_free_bulk
(
ops_enq
,
num_ops
);
double
tb_len_bits
=
calc_enc_TB_size
(
ref_op
);
tp
->
ops_per_sec
=
((
double
)
num_ops
*
TEST_REPETITIONS
)
/
((
double
)
total_time
/
(
double
)
rte_get_tsc_hz
());
tp
->
mbps
=
(((
double
)(
num_ops
*
TEST_REPETITIONS
*
tb_len_bits
))
/
1000000
.
0
)
/
((
double
)
total_time
/
(
double
)
rte_get_tsc_hz
());
return
TEST_SUCCESS
;
}
static
int
throughput_pmd_lcore_ldpc_enc
(
void
*
arg
)
{
struct
thread_params
*
tp
=
arg
;
uint16_t
enq
,
deq
;
uint64_t
total_time
=
0
,
start_time
;
const
uint16_t
queue_id
=
tp
->
queue_id
;
const
uint16_t
burst_sz
=
tp
->
op_params
->
burst_sz
;
const
uint16_t
num_ops
=
tp
->
op_params
->
num_to_process
;
struct
rte_bbdev_enc_op
*
ops_enq
[
num_ops
];
struct
rte_bbdev_enc_op
*
ops_deq
[
num_ops
];
struct
rte_bbdev_enc_op
*
ref_op
=
tp
->
op_params
->
ref_enc_op
;
struct
test_buffers
*
bufs
=
NULL
;
int
i
,
j
,
ret
;
struct
rte_bbdev_info
info
;
uint16_t
num_to_enq
;
TEST_ASSERT_SUCCESS
((
burst_sz
>
MAX_BURST
),
"BURST_SIZE should be <= %u"
,
MAX_BURST
);
rte_bbdev_info_get
(
tp
->
dev_id
,
&
info
);
TEST_ASSERT_SUCCESS
((
num_ops
>
info
.
drv
.
queue_size_lim
),
"NUM_OPS cannot exceed %u for this device"
,
info
.
drv
.
queue_size_lim
);
bufs
=
&
tp
->
op_params
->
q_bufs
[
GET_SOCKET
(
info
.
socket_id
)][
queue_id
];
while
(
rte_atomic16_read
(
&
tp
->
op_params
->
sync
)
==
SYNC_WAIT
)
rte_pause
();
ret
=
rte_bbdev_enc_op_alloc_bulk
(
tp
->
op_params
->
mp
,
ops_enq
,
num_ops
);
TEST_ASSERT_SUCCESS
(
ret
,
"Allocation failed for %d ops"
,
num_ops
);
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
copy_reference_ldpc_enc_op
(
ops_enq
,
num_ops
,
0
,
bufs
->
inputs
,
bufs
->
hard_outputs
,
ref_op
);
/* Set counter to validate the ordering */
for
(
j
=
0
;
j
<
num_ops
;
++
j
)
ops_enq
[
j
]
->
opaque_data
=
(
void
*
)(
uintptr_t
)
j
;
for
(
i
=
0
;
i
<
TEST_REPETITIONS
;
++
i
)
{
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
for
(
j
=
0
;
j
<
num_ops
;
++
j
)
mbuf_reset
(
ops_enq
[
j
]
->
turbo_enc
.
output
.
data
);
start_time
=
rte_rdtsc_precise
();
for
(
enq
=
0
,
deq
=
0
;
enq
<
num_ops
;)
{
num_to_enq
=
burst_sz
;
if
(
unlikely
(
num_ops
-
enq
<
num_to_enq
))
num_to_enq
=
num_ops
-
enq
;
enq
+=
rte_bbdev_enqueue_ldpc_enc_ops
(
tp
->
dev_id
,
queue_id
,
&
ops_enq
[
enq
],
num_to_enq
);
deq
+=
rte_bbdev_dequeue_ldpc_enc_ops
(
tp
->
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
enq
-
deq
);
}
/* dequeue the remaining */
while
(
deq
<
enq
)
{
deq
+=
rte_bbdev_dequeue_ldpc_enc_ops
(
tp
->
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
enq
-
deq
);
}
total_time
+=
rte_rdtsc_precise
()
-
start_time
;
}
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
{
ret
=
validate_ldpc_enc_op
(
ops_deq
,
num_ops
,
ref_op
);
TEST_ASSERT_SUCCESS
(
ret
,
"Validation failed!"
);
}
rte_bbdev_enc_op_free_bulk
(
ops_enq
,
num_ops
);
double
tb_len_bits
=
calc_ldpc_enc_TB_size
(
ref_op
);
tp
->
ops_per_sec
=
((
double
)
num_ops
*
TEST_REPETITIONS
)
/
((
double
)
total_time
/
(
double
)
rte_get_tsc_hz
());
tp
->
mbps
=
(((
double
)(
num_ops
*
TEST_REPETITIONS
*
tb_len_bits
))
/
1000000
.
0
)
/
((
double
)
total_time
/
(
double
)
rte_get_tsc_hz
());
return
TEST_SUCCESS
;
}
static
void
print_enc_throughput
(
struct
thread_params
*
t_params
,
unsigned
int
used_cores
)
{
unsigned
int
iter
=
0
;
double
total_mops
=
0
,
total_mbps
=
0
;
for
(
iter
=
0
;
iter
<
used_cores
;
iter
++
)
{
printf
(
"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps
\n
"
,
t_params
[
iter
].
lcore_id
,
t_params
[
iter
].
ops_per_sec
,
t_params
[
iter
].
mbps
);
total_mops
+=
t_params
[
iter
].
ops_per_sec
;
total_mbps
+=
t_params
[
iter
].
mbps
;
}
printf
(
"
\n
Total throughput for %u cores: %.8lg MOPS, %.8lg Mbps
\n
"
,
used_cores
,
total_mops
,
total_mbps
);
}
/* Aggregate the performance results over the number of cores used */
static
void
print_dec_throughput
(
struct
thread_params
*
t_params
,
unsigned
int
used_cores
)
{
unsigned
int
core_idx
=
0
;
double
total_mops
=
0
,
total_mbps
=
0
;
uint8_t
iter_count
=
0
;
for
(
core_idx
=
0
;
core_idx
<
used_cores
;
core_idx
++
)
{
printf
(
"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps @ max %u iterations
\n
"
,
t_params
[
core_idx
].
lcore_id
,
t_params
[
core_idx
].
ops_per_sec
,
t_params
[
core_idx
].
mbps
,
t_params
[
core_idx
].
iter_count
);
total_mops
+=
t_params
[
core_idx
].
ops_per_sec
;
total_mbps
+=
t_params
[
core_idx
].
mbps
;
iter_count
=
RTE_MAX
(
iter_count
,
t_params
[
core_idx
].
iter_count
);
}
printf
(
"
\n
Total throughput for %u cores: %.8lg MOPS, %.8lg Mbps @ max %u iterations
\n
"
,
used_cores
,
total_mops
,
total_mbps
,
iter_count
);
}
/* Aggregate the performance results over the number of cores used */
static
void
print_dec_bler
(
struct
thread_params
*
t_params
,
unsigned
int
used_cores
)
{
unsigned
int
core_idx
=
0
;
double
total_mbps
=
0
,
total_bler
=
0
,
total_iter
=
0
;
double
snr
=
get_snr
();
for
(
core_idx
=
0
;
core_idx
<
used_cores
;
core_idx
++
)
{
printf
(
"Core%u BLER %.1f %% - Iters %.1f - Tp %.1f Mbps %s
\n
"
,
t_params
[
core_idx
].
lcore_id
,
t_params
[
core_idx
].
bler
*
100
,
t_params
[
core_idx
].
iter_average
,
t_params
[
core_idx
].
mbps
,
get_vector_filename
());
total_mbps
+=
t_params
[
core_idx
].
mbps
;
total_bler
+=
t_params
[
core_idx
].
bler
;
total_iter
+=
t_params
[
core_idx
].
iter_average
;
}
total_bler
/=
used_cores
;
total_iter
/=
used_cores
;
printf
(
"SNR %.2f BLER %.1f %% - Iterations %.1f %d - Tp %.1f Mbps %s
\n
"
,
snr
,
total_bler
*
100
,
total_iter
,
get_iter_max
(),
total_mbps
,
get_vector_filename
());
}
/*
* Test function that determines BLER wireless performance
*/
static
int
bler_test
(
struct
active_device
*
ad
,
struct
test_op_params
*
op_params
)
{
int
ret
;
unsigned
int
lcore_id
,
used_cores
=
0
;
struct
thread_params
*
t_params
;
struct
rte_bbdev_info
info
;
lcore_function_t
*
bler_function
;
uint16_t
num_lcores
;
const
char
*
op_type_str
;
rte_bbdev_info_get
(
ad
->
dev_id
,
&
info
);
op_type_str
=
rte_bbdev_op_type_str
(
test_vector
.
op_type
);
TEST_ASSERT_NOT_NULL
(
op_type_str
,
"Invalid op type: %u"
,
test_vector
.
op_type
);
printf
(
"+ ------------------------------------------------------- +
\n
"
);
printf
(
"== test: bler
\n
dev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg
\n
"
,
info
.
dev_name
,
ad
->
nb_queues
,
op_params
->
burst_sz
,
op_params
->
num_to_process
,
op_params
->
num_lcores
,
op_type_str
,
intr_enabled
?
"Interrupt mode"
:
"PMD mode"
,
(
double
)
rte_get_tsc_hz
()
/
1000000000
.
0
);
/* Set number of lcores */
num_lcores
=
(
ad
->
nb_queues
<
(
op_params
->
num_lcores
))
?
ad
->
nb_queues
:
op_params
->
num_lcores
;
/* Allocate memory for thread parameters structure */
t_params
=
rte_zmalloc
(
NULL
,
num_lcores
*
sizeof
(
struct
thread_params
),
RTE_CACHE_LINE_SIZE
);
TEST_ASSERT_NOT_NULL
(
t_params
,
"Failed to alloc %zuB for t_params"
,
RTE_ALIGN
(
sizeof
(
struct
thread_params
)
*
num_lcores
,
RTE_CACHE_LINE_SIZE
));
if
(
test_vector
.
op_type
==
RTE_BBDEV_OP_LDPC_DEC
)
bler_function
=
bler_pmd_lcore_ldpc_dec
;
else
return
TEST_SKIPPED
;
rte_atomic16_set
(
&
op_params
->
sync
,
SYNC_WAIT
);
/* Master core is set at first entry */
t_params
[
0
].
dev_id
=
ad
->
dev_id
;
t_params
[
0
].
lcore_id
=
rte_lcore_id
();
t_params
[
0
].
op_params
=
op_params
;
t_params
[
0
].
queue_id
=
ad
->
queue_ids
[
used_cores
++
];
t_params
[
0
].
iter_count
=
0
;
RTE_LCORE_FOREACH_SLAVE
(
lcore_id
)
{
if
(
used_cores
>=
num_lcores
)
break
;
t_params
[
used_cores
].
dev_id
=
ad
->
dev_id
;
t_params
[
used_cores
].
lcore_id
=
lcore_id
;
t_params
[
used_cores
].
op_params
=
op_params
;
t_params
[
used_cores
].
queue_id
=
ad
->
queue_ids
[
used_cores
];
t_params
[
used_cores
].
iter_count
=
0
;
rte_eal_remote_launch
(
bler_function
,
&
t_params
[
used_cores
++
],
lcore_id
);
}
rte_atomic16_set
(
&
op_params
->
sync
,
SYNC_START
);
ret
=
bler_function
(
&
t_params
[
0
]);
/* Master core is always used */
for
(
used_cores
=
1
;
used_cores
<
num_lcores
;
used_cores
++
)
ret
|=
rte_eal_wait_lcore
(
t_params
[
used_cores
].
lcore_id
);
print_dec_bler
(
t_params
,
num_lcores
);
/* Return if test failed */
if
(
ret
)
{
rte_free
(
t_params
);
return
ret
;
}
/* Function to print something here*/
rte_free
(
t_params
);
return
ret
;
}
/*
* Test function that determines how long an enqueue + dequeue of a burst
* takes on available lcores.
*/
static
int
throughput_test
(
struct
active_device
*
ad
,
struct
test_op_params
*
op_params
)
{
int
ret
;
unsigned
int
lcore_id
,
used_cores
=
0
;
struct
thread_params
*
t_params
,
*
tp
;
struct
rte_bbdev_info
info
;
lcore_function_t
*
throughput_function
;
uint16_t
num_lcores
;
const
char
*
op_type_str
;
rte_bbdev_info_get
(
ad
->
dev_id
,
&
info
);
op_type_str
=
rte_bbdev_op_type_str
(
test_vector
.
op_type
);
TEST_ASSERT_NOT_NULL
(
op_type_str
,
"Invalid op type: %u"
,
test_vector
.
op_type
);
printf
(
"+ ------------------------------------------------------- +
\n
"
);
printf
(
"== new test: throughput
\n
dev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg
\n
"
,
info
.
dev_name
,
ad
->
nb_queues
,
op_params
->
burst_sz
,
op_params
->
num_to_process
,
op_params
->
num_lcores
,
op_type_str
,
intr_enabled
?
"Interrupt mode"
:
"PMD mode"
,
(
double
)
rte_get_tsc_hz
()
/
1000000000
.
0
);
/* Set number of lcores */
num_lcores
=
(
ad
->
nb_queues
<
(
op_params
->
num_lcores
))
?
ad
->
nb_queues
:
op_params
->
num_lcores
;
/* Allocate memory for thread parameters structure */
t_params
=
rte_zmalloc
(
NULL
,
num_lcores
*
sizeof
(
struct
thread_params
),
RTE_CACHE_LINE_SIZE
);
TEST_ASSERT_NOT_NULL
(
t_params
,
"Failed to alloc %zuB for t_params"
,
RTE_ALIGN
(
sizeof
(
struct
thread_params
)
*
num_lcores
,
RTE_CACHE_LINE_SIZE
));
if
(
intr_enabled
)
{
if
(
test_vector
.
op_type
==
RTE_BBDEV_OP_TURBO_DEC
)
throughput_function
=
throughput_intr_lcore_dec
;
else
if
(
test_vector
.
op_type
==
RTE_BBDEV_OP_LDPC_DEC
)
throughput_function
=
throughput_intr_lcore_ldpc_dec
;
else
if
(
test_vector
.
op_type
==
RTE_BBDEV_OP_TURBO_ENC
)
throughput_function
=
throughput_intr_lcore_enc
;
else
if
(
test_vector
.
op_type
==
RTE_BBDEV_OP_LDPC_ENC
)
throughput_function
=
throughput_intr_lcore_ldpc_enc
;
else
throughput_function
=
throughput_intr_lcore_enc
;
/* Dequeue interrupt callback registration */
ret
=
rte_bbdev_callback_register
(
ad
->
dev_id
,
RTE_BBDEV_EVENT_DEQUEUE
,
dequeue_event_callback
,
t_params
);
if
(
ret
<
0
)
{
rte_free
(
t_params
);
return
ret
;
}
}
else
{
if
(
test_vector
.
op_type
==
RTE_BBDEV_OP_TURBO_DEC
)
throughput_function
=
throughput_pmd_lcore_dec
;
else
if
(
test_vector
.
op_type
==
RTE_BBDEV_OP_LDPC_DEC
)
throughput_function
=
throughput_pmd_lcore_ldpc_dec
;
else
if
(
test_vector
.
op_type
==
RTE_BBDEV_OP_TURBO_ENC
)
throughput_function
=
throughput_pmd_lcore_enc
;
else
if
(
test_vector
.
op_type
==
RTE_BBDEV_OP_LDPC_ENC
)
throughput_function
=
throughput_pmd_lcore_ldpc_enc
;
else
throughput_function
=
throughput_pmd_lcore_enc
;
}
rte_atomic16_set
(
&
op_params
->
sync
,
SYNC_WAIT
);
/* Master core is set at first entry */
t_params
[
0
].
dev_id
=
ad
->
dev_id
;
t_params
[
0
].
lcore_id
=
rte_lcore_id
();
t_params
[
0
].
op_params
=
op_params
;
t_params
[
0
].
queue_id
=
ad
->
queue_ids
[
used_cores
++
];
t_params
[
0
].
iter_count
=
0
;
RTE_LCORE_FOREACH_SLAVE
(
lcore_id
)
{
if
(
used_cores
>=
num_lcores
)
break
;
t_params
[
used_cores
].
dev_id
=
ad
->
dev_id
;
t_params
[
used_cores
].
lcore_id
=
lcore_id
;
t_params
[
used_cores
].
op_params
=
op_params
;
t_params
[
used_cores
].
queue_id
=
ad
->
queue_ids
[
used_cores
];
t_params
[
used_cores
].
iter_count
=
0
;
rte_eal_remote_launch
(
throughput_pmd_lcore_ldpc_dec
,
&
t_params
[
used_cores
++
],
lcore_id
);
}
rte_atomic16_set
(
&
op_params
->
sync
,
SYNC_START
);
ret
=
throughput_function
(
&
t_params
[
0
]);
/* Master core is always used */
for
(
used_cores
=
1
;
used_cores
<
num_lcores
;
used_cores
++
)
ret
|=
rte_eal_wait_lcore
(
t_params
[
used_cores
].
lcore_id
);
/* Return if test failed */
if
(
ret
)
{
rte_free
(
t_params
);
return
ret
;
}
/* Print throughput if interrupts are disabled and test passed */
if
(
!
intr_enabled
)
{
if
(
test_vector
.
op_type
==
RTE_BBDEV_OP_TURBO_DEC
||
test_vector
.
op_type
==
RTE_BBDEV_OP_LDPC_DEC
)
print_dec_throughput
(
t_params
,
num_lcores
);
else
print_enc_throughput
(
t_params
,
num_lcores
);
rte_free
(
t_params
);
return
ret
;
}
/* In interrupt TC we need to wait for the interrupt callback to deqeue
* all pending operations. Skip waiting for queues which reported an
* error using processing_status variable.
* Wait for master lcore operations.
*/
tp
=
&
t_params
[
0
];
while
((
rte_atomic16_read
(
&
tp
->
nb_dequeued
)
<
op_params
->
num_to_process
)
&&
(
rte_atomic16_read
(
&
tp
->
processing_status
)
!=
TEST_FAILED
))
rte_pause
();
tp
->
ops_per_sec
/=
TEST_REPETITIONS
;
tp
->
mbps
/=
TEST_REPETITIONS
;
ret
|=
(
int
)
rte_atomic16_read
(
&
tp
->
processing_status
);
/* Wait for slave lcores operations */
for
(
used_cores
=
1
;
used_cores
<
num_lcores
;
used_cores
++
)
{
tp
=
&
t_params
[
used_cores
];
while
((
rte_atomic16_read
(
&
tp
->
nb_dequeued
)
<
op_params
->
num_to_process
)
&&
(
rte_atomic16_read
(
&
tp
->
processing_status
)
!=
TEST_FAILED
))
rte_pause
();
tp
->
ops_per_sec
/=
TEST_REPETITIONS
;
tp
->
mbps
/=
TEST_REPETITIONS
;
ret
|=
(
int
)
rte_atomic16_read
(
&
tp
->
processing_status
);
}
/* Print throughput if test passed */
if
(
!
ret
)
{
if
(
test_vector
.
op_type
==
RTE_BBDEV_OP_TURBO_DEC
||
test_vector
.
op_type
==
RTE_BBDEV_OP_LDPC_DEC
)
print_dec_throughput
(
t_params
,
num_lcores
);
else
if
(
test_vector
.
op_type
==
RTE_BBDEV_OP_TURBO_ENC
||
test_vector
.
op_type
==
RTE_BBDEV_OP_LDPC_ENC
)
print_enc_throughput
(
t_params
,
num_lcores
);
}
rte_free
(
t_params
);
return
ret
;
}
static
int
latency_test_dec
(
struct
rte_mempool
*
mempool
,
struct
test_buffers
*
bufs
,
struct
rte_bbdev_dec_op
*
ref_op
,
int
vector_mask
,
uint16_t
dev_id
,
uint16_t
queue_id
,
const
uint16_t
num_to_process
,
uint16_t
burst_sz
,
uint64_t
*
total_time
,
uint64_t
*
min_time
,
uint64_t
*
max_time
)
{
int
ret
=
TEST_SUCCESS
;
uint16_t
i
,
j
,
dequeued
;
struct
rte_bbdev_dec_op
*
ops_enq
[
MAX_BURST
],
*
ops_deq
[
MAX_BURST
];
uint64_t
start_time
=
0
,
last_time
=
0
;
for
(
i
=
0
,
dequeued
=
0
;
dequeued
<
num_to_process
;
++
i
)
{
uint16_t
enq
=
0
,
deq
=
0
;
bool
first_time
=
true
;
last_time
=
0
;
if
(
unlikely
(
num_to_process
-
dequeued
<
burst_sz
))
burst_sz
=
num_to_process
-
dequeued
;
ret
=
rte_bbdev_dec_op_alloc_bulk
(
mempool
,
ops_enq
,
burst_sz
);
TEST_ASSERT_SUCCESS
(
ret
,
"rte_bbdev_dec_op_alloc_bulk() failed"
);
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
copy_reference_dec_op
(
ops_enq
,
burst_sz
,
dequeued
,
bufs
->
inputs
,
bufs
->
hard_outputs
,
bufs
->
soft_outputs
,
ref_op
);
/* Set counter to validate the ordering */
for
(
j
=
0
;
j
<
burst_sz
;
++
j
)
ops_enq
[
j
]
->
opaque_data
=
(
void
*
)(
uintptr_t
)
j
;
start_time
=
rte_rdtsc_precise
();
enq
=
rte_bbdev_enqueue_dec_ops
(
dev_id
,
queue_id
,
&
ops_enq
[
enq
],
burst_sz
);
TEST_ASSERT
(
enq
==
burst_sz
,
"Error enqueueing burst, expected %u, got %u"
,
burst_sz
,
enq
);
/* Dequeue */
do
{
deq
+=
rte_bbdev_dequeue_dec_ops
(
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
burst_sz
-
deq
);
if
(
likely
(
first_time
&&
(
deq
>
0
)))
{
last_time
=
rte_rdtsc_precise
()
-
start_time
;
first_time
=
false
;
}
}
while
(
unlikely
(
burst_sz
!=
deq
));
*
max_time
=
RTE_MAX
(
*
max_time
,
last_time
);
*
min_time
=
RTE_MIN
(
*
min_time
,
last_time
);
*
total_time
+=
last_time
;
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
{
ret
=
validate_dec_op
(
ops_deq
,
burst_sz
,
ref_op
,
vector_mask
);
TEST_ASSERT_SUCCESS
(
ret
,
"Validation failed!"
);
}
rte_bbdev_dec_op_free_bulk
(
ops_enq
,
deq
);
dequeued
+=
deq
;
}
return
i
;
}
static
int
latency_test_ldpc_dec
(
struct
rte_mempool
*
mempool
,
struct
test_buffers
*
bufs
,
struct
rte_bbdev_dec_op
*
ref_op
,
int
vector_mask
,
uint16_t
dev_id
,
uint16_t
queue_id
,
const
uint16_t
num_to_process
,
uint16_t
burst_sz
,
uint64_t
*
total_time
,
uint64_t
*
min_time
,
uint64_t
*
max_time
)
{
int
ret
=
TEST_SUCCESS
;
uint16_t
i
,
j
,
dequeued
;
struct
rte_bbdev_dec_op
*
ops_enq
[
MAX_BURST
],
*
ops_deq
[
MAX_BURST
];
uint64_t
start_time
=
0
,
last_time
=
0
;
bool
extDdr
=
ldpc_cap_flags
&
RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE
;
for
(
i
=
0
,
dequeued
=
0
;
dequeued
<
num_to_process
;
++
i
)
{
uint16_t
enq
=
0
,
deq
=
0
;
bool
first_time
=
true
;
last_time
=
0
;
if
(
unlikely
(
num_to_process
-
dequeued
<
burst_sz
))
burst_sz
=
num_to_process
-
dequeued
;
ret
=
rte_bbdev_dec_op_alloc_bulk
(
mempool
,
ops_enq
,
burst_sz
);
TEST_ASSERT_SUCCESS
(
ret
,
"rte_bbdev_dec_op_alloc_bulk() failed"
);
/* For latency tests we need to disable early termination */
if
(
check_bit
(
ref_op
->
ldpc_dec
.
op_flags
,
RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE
))
ref_op
->
ldpc_dec
.
op_flags
-=
RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE
;
ref_op
->
ldpc_dec
.
iter_max
=
get_iter_max
();
ref_op
->
ldpc_dec
.
iter_count
=
ref_op
->
ldpc_dec
.
iter_max
;
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
copy_reference_ldpc_dec_op
(
ops_enq
,
burst_sz
,
dequeued
,
bufs
->
inputs
,
bufs
->
hard_outputs
,
bufs
->
soft_outputs
,
bufs
->
harq_inputs
,
bufs
->
harq_outputs
,
ref_op
);
if
(
extDdr
)
preload_harq_ddr
(
dev_id
,
queue_id
,
ops_enq
,
burst_sz
,
true
);
/* Set counter to validate the ordering */
for
(
j
=
0
;
j
<
burst_sz
;
++
j
)
ops_enq
[
j
]
->
opaque_data
=
(
void
*
)(
uintptr_t
)
j
;
start_time
=
rte_rdtsc_precise
();
enq
=
rte_bbdev_enqueue_ldpc_dec_ops
(
dev_id
,
queue_id
,
&
ops_enq
[
enq
],
burst_sz
);
TEST_ASSERT
(
enq
==
burst_sz
,
"Error enqueueing burst, expected %u, got %u"
,
burst_sz
,
enq
);
/* Dequeue */
do
{
deq
+=
rte_bbdev_dequeue_ldpc_dec_ops
(
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
burst_sz
-
deq
);
if
(
likely
(
first_time
&&
(
deq
>
0
)))
{
last_time
=
rte_rdtsc_precise
()
-
start_time
;
first_time
=
false
;
}
}
while
(
unlikely
(
burst_sz
!=
deq
));
*
max_time
=
RTE_MAX
(
*
max_time
,
last_time
);
*
min_time
=
RTE_MIN
(
*
min_time
,
last_time
);
*
total_time
+=
last_time
;
if
(
extDdr
)
retrieve_harq_ddr
(
dev_id
,
queue_id
,
ops_enq
,
burst_sz
);
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
{
ret
=
validate_ldpc_dec_op
(
ops_deq
,
burst_sz
,
ref_op
,
vector_mask
);
TEST_ASSERT_SUCCESS
(
ret
,
"Validation failed!"
);
}
rte_bbdev_dec_op_free_bulk
(
ops_enq
,
deq
);
dequeued
+=
deq
;
}
return
i
;
}
static
int
latency_test_enc
(
struct
rte_mempool
*
mempool
,
struct
test_buffers
*
bufs
,
struct
rte_bbdev_enc_op
*
ref_op
,
uint16_t
dev_id
,
uint16_t
queue_id
,
const
uint16_t
num_to_process
,
uint16_t
burst_sz
,
uint64_t
*
total_time
,
uint64_t
*
min_time
,
uint64_t
*
max_time
)
{
int
ret
=
TEST_SUCCESS
;
uint16_t
i
,
j
,
dequeued
;
struct
rte_bbdev_enc_op
*
ops_enq
[
MAX_BURST
],
*
ops_deq
[
MAX_BURST
];
uint64_t
start_time
=
0
,
last_time
=
0
;
for
(
i
=
0
,
dequeued
=
0
;
dequeued
<
num_to_process
;
++
i
)
{
uint16_t
enq
=
0
,
deq
=
0
;
bool
first_time
=
true
;
last_time
=
0
;
if
(
unlikely
(
num_to_process
-
dequeued
<
burst_sz
))
burst_sz
=
num_to_process
-
dequeued
;
ret
=
rte_bbdev_enc_op_alloc_bulk
(
mempool
,
ops_enq
,
burst_sz
);
TEST_ASSERT_SUCCESS
(
ret
,
"rte_bbdev_enc_op_alloc_bulk() failed"
);
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
copy_reference_enc_op
(
ops_enq
,
burst_sz
,
dequeued
,
bufs
->
inputs
,
bufs
->
hard_outputs
,
ref_op
);
/* Set counter to validate the ordering */
for
(
j
=
0
;
j
<
burst_sz
;
++
j
)
ops_enq
[
j
]
->
opaque_data
=
(
void
*
)(
uintptr_t
)
j
;
start_time
=
rte_rdtsc_precise
();
enq
=
rte_bbdev_enqueue_enc_ops
(
dev_id
,
queue_id
,
&
ops_enq
[
enq
],
burst_sz
);
TEST_ASSERT
(
enq
==
burst_sz
,
"Error enqueueing burst, expected %u, got %u"
,
burst_sz
,
enq
);
/* Dequeue */
do
{
deq
+=
rte_bbdev_dequeue_enc_ops
(
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
burst_sz
-
deq
);
if
(
likely
(
first_time
&&
(
deq
>
0
)))
{
last_time
+=
rte_rdtsc_precise
()
-
start_time
;
first_time
=
false
;
}
}
while
(
unlikely
(
burst_sz
!=
deq
));
*
max_time
=
RTE_MAX
(
*
max_time
,
last_time
);
*
min_time
=
RTE_MIN
(
*
min_time
,
last_time
);
*
total_time
+=
last_time
;
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
{
ret
=
validate_enc_op
(
ops_deq
,
burst_sz
,
ref_op
);
TEST_ASSERT_SUCCESS
(
ret
,
"Validation failed!"
);
}
rte_bbdev_enc_op_free_bulk
(
ops_enq
,
deq
);
dequeued
+=
deq
;
}
return
i
;
}
static
int
latency_test_ldpc_enc
(
struct
rte_mempool
*
mempool
,
struct
test_buffers
*
bufs
,
struct
rte_bbdev_enc_op
*
ref_op
,
uint16_t
dev_id
,
uint16_t
queue_id
,
const
uint16_t
num_to_process
,
uint16_t
burst_sz
,
uint64_t
*
total_time
,
uint64_t
*
min_time
,
uint64_t
*
max_time
)
{
int
ret
=
TEST_SUCCESS
;
uint16_t
i
,
j
,
dequeued
;
struct
rte_bbdev_enc_op
*
ops_enq
[
MAX_BURST
],
*
ops_deq
[
MAX_BURST
];
uint64_t
start_time
=
0
,
last_time
=
0
;
for
(
i
=
0
,
dequeued
=
0
;
dequeued
<
num_to_process
;
++
i
)
{
uint16_t
enq
=
0
,
deq
=
0
;
bool
first_time
=
true
;
last_time
=
0
;
if
(
unlikely
(
num_to_process
-
dequeued
<
burst_sz
))
burst_sz
=
num_to_process
-
dequeued
;
ret
=
rte_bbdev_enc_op_alloc_bulk
(
mempool
,
ops_enq
,
burst_sz
);
TEST_ASSERT_SUCCESS
(
ret
,
"rte_bbdev_enc_op_alloc_bulk() failed"
);
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
copy_reference_ldpc_enc_op
(
ops_enq
,
burst_sz
,
dequeued
,
bufs
->
inputs
,
bufs
->
hard_outputs
,
ref_op
);
/* Set counter to validate the ordering */
for
(
j
=
0
;
j
<
burst_sz
;
++
j
)
ops_enq
[
j
]
->
opaque_data
=
(
void
*
)(
uintptr_t
)
j
;
start_time
=
rte_rdtsc_precise
();
enq
=
rte_bbdev_enqueue_ldpc_enc_ops
(
dev_id
,
queue_id
,
&
ops_enq
[
enq
],
burst_sz
);
TEST_ASSERT
(
enq
==
burst_sz
,
"Error enqueueing burst, expected %u, got %u"
,
burst_sz
,
enq
);
/* Dequeue */
do
{
deq
+=
rte_bbdev_dequeue_ldpc_enc_ops
(
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
burst_sz
-
deq
);
if
(
likely
(
first_time
&&
(
deq
>
0
)))
{
last_time
+=
rte_rdtsc_precise
()
-
start_time
;
first_time
=
false
;
}
}
while
(
unlikely
(
burst_sz
!=
deq
));
*
max_time
=
RTE_MAX
(
*
max_time
,
last_time
);
*
min_time
=
RTE_MIN
(
*
min_time
,
last_time
);
*
total_time
+=
last_time
;
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
{
ret
=
validate_enc_op
(
ops_deq
,
burst_sz
,
ref_op
);
TEST_ASSERT_SUCCESS
(
ret
,
"Validation failed!"
);
}
rte_bbdev_enc_op_free_bulk
(
ops_enq
,
deq
);
dequeued
+=
deq
;
}
return
i
;
}
static
int
latency_test
(
struct
active_device
*
ad
,
struct
test_op_params
*
op_params
)
{
int
iter
;
uint16_t
burst_sz
=
op_params
->
burst_sz
;
const
uint16_t
num_to_process
=
op_params
->
num_to_process
;
const
enum
rte_bbdev_op_type
op_type
=
test_vector
.
op_type
;
const
uint16_t
queue_id
=
ad
->
queue_ids
[
0
];
struct
test_buffers
*
bufs
=
NULL
;
struct
rte_bbdev_info
info
;
uint64_t
total_time
,
min_time
,
max_time
;
const
char
*
op_type_str
;
total_time
=
max_time
=
0
;
min_time
=
UINT64_MAX
;
TEST_ASSERT_SUCCESS
((
burst_sz
>
MAX_BURST
),
"BURST_SIZE should be <= %u"
,
MAX_BURST
);
rte_bbdev_info_get
(
ad
->
dev_id
,
&
info
);
bufs
=
&
op_params
->
q_bufs
[
GET_SOCKET
(
info
.
socket_id
)][
queue_id
];
op_type_str
=
rte_bbdev_op_type_str
(
op_type
);
TEST_ASSERT_NOT_NULL
(
op_type_str
,
"Invalid op type: %u"
,
op_type
);
printf
(
"+ ------------------------------------------------------- +
\n
"
);
printf
(
"== test: validation/latency
\n
dev: %s, burst size: %u, num ops: %u, op type: %s
\n
"
,
info
.
dev_name
,
burst_sz
,
num_to_process
,
op_type_str
);
if
(
op_type
==
RTE_BBDEV_OP_TURBO_DEC
)
iter
=
latency_test_dec
(
op_params
->
mp
,
bufs
,
op_params
->
ref_dec_op
,
op_params
->
vector_mask
,
ad
->
dev_id
,
queue_id
,
num_to_process
,
burst_sz
,
&
total_time
,
&
min_time
,
&
max_time
);
else
if
(
op_type
==
RTE_BBDEV_OP_TURBO_ENC
)
iter
=
latency_test_enc
(
op_params
->
mp
,
bufs
,
op_params
->
ref_enc_op
,
ad
->
dev_id
,
queue_id
,
num_to_process
,
burst_sz
,
&
total_time
,
&
min_time
,
&
max_time
);
else
if
(
op_type
==
RTE_BBDEV_OP_LDPC_ENC
)
iter
=
latency_test_ldpc_enc
(
op_params
->
mp
,
bufs
,
op_params
->
ref_enc_op
,
ad
->
dev_id
,
queue_id
,
num_to_process
,
burst_sz
,
&
total_time
,
&
min_time
,
&
max_time
);
else
if
(
op_type
==
RTE_BBDEV_OP_LDPC_DEC
)
iter
=
latency_test_ldpc_dec
(
op_params
->
mp
,
bufs
,
op_params
->
ref_dec_op
,
op_params
->
vector_mask
,
ad
->
dev_id
,
queue_id
,
num_to_process
,
burst_sz
,
&
total_time
,
&
min_time
,
&
max_time
);
else
iter
=
latency_test_enc
(
op_params
->
mp
,
bufs
,
op_params
->
ref_enc_op
,
ad
->
dev_id
,
queue_id
,
num_to_process
,
burst_sz
,
&
total_time
,
&
min_time
,
&
max_time
);
if
(
iter
<=
0
)
return
TEST_FAILED
;
double
tb_len_bits
=
calc_ldpc_enc_TB_size
(
ref_op
);
printf
(
"Operation latency:
\n
"
"
\t
avg: %lg cycles, %lg us
\n
"
"
\t
min: %lg cycles, %lg us
\n
"
"
\t
max: %lg cycles, %lg us
\n
"
,
(
double
)
total_time
/
(
double
)
iter
,
(
double
)(
total_time
*
1000000
)
/
(
double
)
iter
/
(
double
)
rte_get_tsc_hz
(),
(
double
)
min_time
,
(
double
)(
min_time
*
1000000
)
/
(
double
)
rte_get_tsc_hz
(),
(
double
)
max_time
,
(
double
)(
max_time
*
1000000
)
/
tp
->
ops_per_sec
=
((
double
)
num_ops
*
TEST_REPETITIONS
)
/
((
double
)
total_time
/
(
double
)
rte_get_tsc_hz
());
tp
->
mbps
=
(((
double
)(
num_ops
*
TEST_REPETITIONS
*
tb_len_bits
))
/
1000000
.
0
)
/
((
double
)
total_time
/
(
double
)
rte_get_tsc_hz
());
return
TEST_SUCCESS
;
}
#ifdef RTE_BBDEV_OFFLOAD_COST
static
int
get_bbdev_queue_stats
(
uint16_t
dev_id
,
uint16_t
queue_id
,
struct
rte_bbdev_stats
*
stats
)
{
struct
rte_bbdev
*
dev
=
&
rte_bbdev_devices
[
dev_id
];
struct
rte_bbdev_stats
*
q_stats
;
if
(
queue_id
>=
dev
->
data
->
num_queues
)
return
-
1
;
q_stats
=
&
dev
->
data
->
queues
[
queue_id
].
queue_stats
;
stats
->
enqueued_count
=
q_stats
->
enqueued_count
;
stats
->
dequeued_count
=
q_stats
->
dequeued_count
;
stats
->
enqueue_err_count
=
q_stats
->
enqueue_err_count
;
stats
->
dequeue_err_count
=
q_stats
->
dequeue_err_count
;
stats
->
acc_offload_cycles
=
q_stats
->
acc_offload_cycles
;
return
0
;
}
static
int
offload_latency_test_dec
(
struct
rte_mempool
*
mempool
,
struct
test_buffers
*
bufs
,
struct
rte_bbdev_dec_op
*
ref_op
,
uint16_t
dev_id
,
uint16_t
queue_id
,
const
uint16_t
num_to_process
,
uint16_t
burst_sz
,
struct
test_time_stats
*
time_st
)
{
int
i
,
dequeued
,
ret
;
struct
rte_bbdev_dec_op
*
ops_enq
[
MAX_BURST
],
*
ops_deq
[
MAX_BURST
];
uint64_t
enq_start_time
,
deq_start_time
;
uint64_t
enq_sw_last_time
,
deq_last_time
;
struct
rte_bbdev_stats
stats
;
for
(
i
=
0
,
dequeued
=
0
;
dequeued
<
num_to_process
;
++
i
)
{
uint16_t
enq
=
0
,
deq
=
0
;
if
(
unlikely
(
num_to_process
-
dequeued
<
burst_sz
))
burst_sz
=
num_to_process
-
dequeued
;
rte_bbdev_dec_op_alloc_bulk
(
mempool
,
ops_enq
,
burst_sz
);
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
copy_reference_dec_op
(
ops_enq
,
burst_sz
,
dequeued
,
bufs
->
inputs
,
bufs
->
hard_outputs
,
bufs
->
soft_outputs
,
ref_op
);
/* Start time meas for enqueue function offload latency */
enq_start_time
=
rte_rdtsc_precise
();
do
{
enq
+=
rte_bbdev_enqueue_dec_ops
(
dev_id
,
queue_id
,
&
ops_enq
[
enq
],
burst_sz
-
enq
);
}
while
(
unlikely
(
burst_sz
!=
enq
));
ret
=
get_bbdev_queue_stats
(
dev_id
,
queue_id
,
&
stats
);
TEST_ASSERT_SUCCESS
(
ret
,
"Failed to get stats for queue (%u) of device (%u)"
,
queue_id
,
dev_id
);
enq_sw_last_time
=
rte_rdtsc_precise
()
-
enq_start_time
-
stats
.
acc_offload_cycles
;
time_st
->
enq_sw_max_time
=
RTE_MAX
(
time_st
->
enq_sw_max_time
,
enq_sw_last_time
);
time_st
->
enq_sw_min_time
=
RTE_MIN
(
time_st
->
enq_sw_min_time
,
enq_sw_last_time
);
time_st
->
enq_sw_total_time
+=
enq_sw_last_time
;
time_st
->
enq_acc_max_time
=
RTE_MAX
(
time_st
->
enq_acc_max_time
,
stats
.
acc_offload_cycles
);
time_st
->
enq_acc_min_time
=
RTE_MIN
(
time_st
->
enq_acc_min_time
,
stats
.
acc_offload_cycles
);
time_st
->
enq_acc_total_time
+=
stats
.
acc_offload_cycles
;
/* give time for device to process ops */
rte_delay_us
(
200
);
/* Start time meas for dequeue function offload latency */
deq_start_time
=
rte_rdtsc_precise
();
/* Dequeue one operation */
do
{
deq
+=
rte_bbdev_dequeue_dec_ops
(
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
1
);
}
while
(
unlikely
(
deq
!=
1
));
deq_last_time
=
rte_rdtsc_precise
()
-
deq_start_time
;
time_st
->
deq_max_time
=
RTE_MAX
(
time_st
->
deq_max_time
,
deq_last_time
);
time_st
->
deq_min_time
=
RTE_MIN
(
time_st
->
deq_min_time
,
deq_last_time
);
time_st
->
deq_total_time
+=
deq_last_time
;
/* Dequeue remaining operations if needed*/
while
(
burst_sz
!=
deq
)
deq
+=
rte_bbdev_dequeue_dec_ops
(
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
burst_sz
-
deq
);
rte_bbdev_dec_op_free_bulk
(
ops_enq
,
deq
);
dequeued
+=
deq
;
}
return
i
;
}
static
int
offload_latency_test_ldpc_dec
(
struct
rte_mempool
*
mempool
,
struct
test_buffers
*
bufs
,
struct
rte_bbdev_dec_op
*
ref_op
,
uint16_t
dev_id
,
uint16_t
queue_id
,
const
uint16_t
num_to_process
,
uint16_t
burst_sz
,
struct
test_time_stats
*
time_st
)
static
void
print_enc_throughput
(
struct
thread_params
*
t_params
,
unsigned
int
used_cores
)
{
int
i
,
dequeued
,
ret
;
struct
rte_bbdev_dec_op
*
ops_enq
[
MAX_BURST
],
*
ops_deq
[
MAX_BURST
];
uint64_t
enq_start_time
,
deq_start_time
;
uint64_t
enq_sw_last_time
,
deq_last_time
;
struct
rte_bbdev_stats
stats
;
bool
extDdr
=
ldpc_cap_flags
&
RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE
;
for
(
i
=
0
,
dequeued
=
0
;
dequeued
<
num_to_process
;
++
i
)
{
uint16_t
enq
=
0
,
deq
=
0
;
if
(
unlikely
(
num_to_process
-
dequeued
<
burst_sz
))
burst_sz
=
num_to_process
-
dequeued
;
rte_bbdev_dec_op_alloc_bulk
(
mempool
,
ops_enq
,
burst_sz
);
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
copy_reference_ldpc_dec_op
(
ops_enq
,
burst_sz
,
dequeued
,
bufs
->
inputs
,
bufs
->
hard_outputs
,
bufs
->
soft_outputs
,
bufs
->
harq_inputs
,
bufs
->
harq_outputs
,
ref_op
);
if
(
extDdr
)
preload_harq_ddr
(
dev_id
,
queue_id
,
ops_enq
,
burst_sz
,
true
);
/* Start time meas for enqueue function offload latency */
enq_start_time
=
rte_rdtsc_precise
();
do
{
enq
+=
rte_bbdev_enqueue_ldpc_dec_ops
(
dev_id
,
queue_id
,
&
ops_enq
[
enq
],
burst_sz
-
enq
);
}
while
(
unlikely
(
burst_sz
!=
enq
));
enq_sw_last_time
=
rte_rdtsc_precise
()
-
enq_start_time
;
ret
=
get_bbdev_queue_stats
(
dev_id
,
queue_id
,
&
stats
);
TEST_ASSERT_SUCCESS
(
ret
,
"Failed to get stats for queue (%u) of device (%u)"
,
queue_id
,
dev_id
);
enq_sw_last_time
-=
stats
.
acc_offload_cycles
;
time_st
->
enq_sw_max_time
=
RTE_MAX
(
time_st
->
enq_sw_max_time
,
enq_sw_last_time
);
time_st
->
enq_sw_min_time
=
RTE_MIN
(
time_st
->
enq_sw_min_time
,
enq_sw_last_time
);
time_st
->
enq_sw_total_time
+=
enq_sw_last_time
;
time_st
->
enq_acc_max_time
=
RTE_MAX
(
time_st
->
enq_acc_max_time
,
stats
.
acc_offload_cycles
);
time_st
->
enq_acc_min_time
=
RTE_MIN
(
time_st
->
enq_acc_min_time
,
stats
.
acc_offload_cycles
);
time_st
->
enq_acc_total_time
+=
stats
.
acc_offload_cycles
;
/* give time for device to process ops */
rte_delay_us
(
200
);
/* Start time meas for dequeue function offload latency */
deq_start_time
=
rte_rdtsc_precise
();
/* Dequeue one operation */
do
{
deq
+=
rte_bbdev_dequeue_ldpc_dec_ops
(
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
1
);
}
while
(
unlikely
(
deq
!=
1
));
deq_last_time
=
rte_rdtsc_precise
()
-
deq_start_time
;
time_st
->
deq_max_time
=
RTE_MAX
(
time_st
->
deq_max_time
,
deq_last_time
);
time_st
->
deq_min_time
=
RTE_MIN
(
time_st
->
deq_min_time
,
deq_last_time
);
time_st
->
deq_total_time
+=
deq_last_time
;
/* Dequeue remaining operations if needed*/
while
(
burst_sz
!=
deq
)
deq
+=
rte_bbdev_dequeue_ldpc_dec_ops
(
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
burst_sz
-
deq
);
if
(
extDdr
)
{
/* Read loopback is not thread safe */
retrieve_harq_ddr
(
dev_id
,
queue_id
,
ops_enq
,
burst_sz
);
}
unsigned
int
iter
=
0
;
double
total_mops
=
0
,
total_mbps
=
0
;
rte_bbdev_dec_op_free_bulk
(
ops_enq
,
deq
);
dequeued
+=
deq
;
for
(
iter
=
0
;
iter
<
used_cores
;
iter
++
)
{
printf
(
"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps
\n
"
,
t_params
[
iter
].
lcore_id
,
t_params
[
iter
].
ops_per_sec
,
t_params
[
iter
].
mbps
);
total_mops
+=
t_params
[
iter
].
ops_per_sec
;
total_mbps
+=
t_params
[
iter
].
mbps
;
}
return
i
;
printf
(
"
\n
Total throughput for %u cores: %.8lg MOPS, %.8lg Mbps
\n
"
,
used_cores
,
total_mops
,
total_mbps
);
}
static
int
offload_latency_test_enc
(
struct
rte_mempool
*
mempool
,
struct
test_buffers
*
bufs
,
struct
rte_bbdev_enc_op
*
ref_op
,
uint16_t
dev_id
,
uint16_t
queue_id
,
const
uint16_t
num_to_process
,
uint16_t
burst_sz
,
struct
test_time_stats
*
time_st
)
/* Aggregate the performance results over the number of cores used */
static
void
print_dec_throughput
(
struct
thread_params
*
t_params
,
unsigned
int
used_cores
)
{
int
i
,
dequeued
,
ret
;
struct
rte_bbdev_enc_op
*
ops_enq
[
MAX_BURST
],
*
ops_deq
[
MAX_BURST
];
uint64_t
enq_start_time
,
deq_start_time
;
uint64_t
enq_sw_last_time
,
deq_last_time
;
struct
rte_bbdev_stats
stats
;
for
(
i
=
0
,
dequeued
=
0
;
dequeued
<
num_to_process
;
++
i
)
{
uint16_t
enq
=
0
,
deq
=
0
;
if
(
unlikely
(
num_to_process
-
dequeued
<
burst_sz
))
burst_sz
=
num_to_process
-
dequeued
;
ret
=
rte_bbdev_enc_op_alloc_bulk
(
mempool
,
ops_enq
,
burst_sz
);
TEST_ASSERT_SUCCESS
(
ret
,
"rte_bbdev_enc_op_alloc_bulk() failed"
);
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
copy_reference_enc_op
(
ops_enq
,
burst_sz
,
dequeued
,
bufs
->
inputs
,
bufs
->
hard_outputs
,
ref_op
);
/* Start time meas for enqueue function offload latency */
enq_start_time
=
rte_rdtsc_precise
();
do
{
enq
+=
rte_bbdev_enqueue_enc_ops
(
dev_id
,
queue_id
,
&
ops_enq
[
enq
],
burst_sz
-
enq
);
}
while
(
unlikely
(
burst_sz
!=
enq
));
enq_sw_last_time
=
rte_rdtsc_precise
()
-
enq_start_time
;
unsigned
int
core_idx
=
0
;
double
total_mops
=
0
,
total_mbps
=
0
;
uint8_t
iter_count
=
0
;
ret
=
get_bbdev_queue_stats
(
dev_id
,
queue_id
,
&
stats
);
TEST_ASSERT_SUCCESS
(
ret
,
"Failed to get stats for queue (%u) of device (%u)"
,
queue_id
,
dev_id
);
enq_sw_last_time
-=
stats
.
acc_offload_cycles
;
time_st
->
enq_sw_max_time
=
RTE_MAX
(
time_st
->
enq_sw_max_time
,
enq_sw_last_time
);
time_st
->
enq_sw_min_time
=
RTE_MIN
(
time_st
->
enq_sw_min_time
,
enq_sw_last_time
);
time_st
->
enq_sw_total_time
+=
enq_sw_last_time
;
time_st
->
enq_acc_max_time
=
RTE_MAX
(
time_st
->
enq_acc_max_time
,
stats
.
acc_offload_cycles
);
time_st
->
enq_acc_min_time
=
RTE_MIN
(
time_st
->
enq_acc_min_time
,
stats
.
acc_offload_cycles
);
time_st
->
enq_acc_total_time
+=
stats
.
acc_offload_cycles
;
/* give time for device to process ops */
rte_delay_us
(
200
);
/* Start time meas for dequeue function offload latency */
deq_start_time
=
rte_rdtsc_precise
();
/* Dequeue one operation */
do
{
deq
+=
rte_bbdev_dequeue_enc_ops
(
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
1
);
}
while
(
unlikely
(
deq
!=
1
));
deq_last_time
=
rte_rdtsc_precise
()
-
deq_start_time
;
time_st
->
deq_max_time
=
RTE_MAX
(
time_st
->
deq_max_time
,
deq_last_time
);
time_st
->
deq_min_time
=
RTE_MIN
(
time_st
->
deq_min_time
,
deq_last_time
);
time_st
->
deq_total_time
+=
deq_last_time
;
while
(
burst_sz
!=
deq
)
deq
+=
rte_bbdev_dequeue_enc_ops
(
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
burst_sz
-
deq
);
rte_bbdev_enc_op_free_bulk
(
ops_enq
,
deq
);
dequeued
+=
deq
;
for
(
core_idx
=
0
;
core_idx
<
used_cores
;
core_idx
++
)
{
printf
(
"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps @ max %u iterations
\n
"
,
t_params
[
core_idx
].
lcore_id
,
t_params
[
core_idx
].
ops_per_sec
,
t_params
[
core_idx
].
mbps
,
t_params
[
core_idx
].
iter_count
);
total_mops
+=
t_params
[
core_idx
].
ops_per_sec
;
total_mbps
+=
t_params
[
core_idx
].
mbps
;
iter_count
=
RTE_MAX
(
iter_count
,
t_params
[
core_idx
].
iter_count
);
}
return
i
;
printf
(
"
\n
Total throughput for %u cores: %.8lg MOPS, %.8lg Mbps @ max %u iterations
\n
"
,
used_cores
,
total_mops
,
total_mbps
,
iter_count
);
}
static
int
offload_latency_test_ldpc_enc
(
struct
rte_mempool
*
mempool
,
struct
test_buffers
*
bufs
,
struct
rte_bbdev_enc_op
*
ref_op
,
uint16_t
dev_id
,
uint16_t
queue_id
,
const
uint16_t
num_to_process
,
uint16_t
burst_sz
,
struct
test_time_stats
*
time_st
)
/* Aggregate the performance results over the number of cores used */
static
void
print_dec_bler
(
struct
thread_params
*
t_params
,
unsigned
int
used_cores
)
{
int
i
,
dequeued
,
ret
;
struct
rte_bbdev_enc_op
*
ops_enq
[
MAX_BURST
],
*
ops_deq
[
MAX_BURST
];
uint64_t
enq_start_time
,
deq_start_time
;
uint64_t
enq_sw_last_time
,
deq_last_time
;
struct
rte_bbdev_stats
stats
;
for
(
i
=
0
,
dequeued
=
0
;
dequeued
<
num_to_process
;
++
i
)
{
uint16_t
enq
=
0
,
deq
=
0
;
if
(
unlikely
(
num_to_process
-
dequeued
<
burst_sz
))
burst_sz
=
num_to_process
-
dequeued
;
unsigned
int
core_idx
=
0
;
double
total_mbps
=
0
,
total_bler
=
0
,
total_iter
=
0
;
double
snr
=
get_snr
();
ret
=
rte_bbdev_enc_op_alloc_bulk
(
mempool
,
ops_enq
,
burst_sz
);
TEST_ASSERT_SUCCESS
(
ret
,
"rte_bbdev_enc_op_alloc_bulk() failed"
);
if
(
test_vector
.
op_type
!=
RTE_BBDEV_OP_NONE
)
copy_reference_ldpc_enc_op
(
ops_enq
,
burst_sz
,
dequeued
,
bufs
->
inputs
,
bufs
->
hard_outputs
,
ref_op
);
/* Start time meas for enqueue function offload latency */
enq_start_time
=
rte_rdtsc_precise
();
do
{
enq
+=
rte_bbdev_enqueue_ldpc_enc_ops
(
dev_id
,
queue_id
,
&
ops_enq
[
enq
],
burst_sz
-
enq
);
}
while
(
unlikely
(
burst_sz
!=
enq
));
enq_sw_last_time
=
rte_rdtsc_precise
()
-
enq_start_time
;
ret
=
get_bbdev_queue_stats
(
dev_id
,
queue_id
,
&
stats
);
TEST_ASSERT_SUCCESS
(
ret
,
"Failed to get stats for queue (%u) of device (%u)"
,
queue_id
,
dev_id
);
enq_sw_last_time
-=
stats
.
acc_offload_cycles
;
time_st
->
enq_sw_max_time
=
RTE_MAX
(
time_st
->
enq_sw_max_time
,
enq_sw_last_time
);
time_st
->
enq_sw_min_time
=
RTE_MIN
(
time_st
->
enq_sw_min_time
,
enq_sw_last_time
);
time_st
->
enq_sw_total_time
+=
enq_sw_last_time
;
time_st
->
enq_acc_max_time
=
RTE_MAX
(
time_st
->
enq_acc_max_time
,
stats
.
acc_offload_cycles
);
time_st
->
enq_acc_min_time
=
RTE_MIN
(
time_st
->
enq_acc_min_time
,
stats
.
acc_offload_cycles
);
time_st
->
enq_acc_total_time
+=
stats
.
acc_offload_cycles
;
/* give time for device to process ops */
rte_delay_us
(
200
);
/* Start time meas for dequeue function offload latency */
deq_start_time
=
rte_rdtsc_precise
();
/* Dequeue one operation */
do
{
deq
+=
rte_bbdev_dequeue_ldpc_enc_ops
(
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
1
);
}
while
(
unlikely
(
deq
!=
1
));
deq_last_time
=
rte_rdtsc_precise
()
-
deq_start_time
;
time_st
->
deq_max_time
=
RTE_MAX
(
time_st
->
deq_max_time
,
deq_last_time
);
time_st
->
deq_min_time
=
RTE_MIN
(
time_st
->
deq_min_time
,
deq_last_time
);
time_st
->
deq_total_time
+=
deq_last_time
;
while
(
burst_sz
!=
deq
)
deq
+=
rte_bbdev_dequeue_ldpc_enc_ops
(
dev_id
,
queue_id
,
&
ops_deq
[
deq
],
burst_sz
-
deq
);
rte_bbdev_enc_op_free_bulk
(
ops_enq
,
deq
);
dequeued
+=
deq
;
for
(
core_idx
=
0
;
core_idx
<
used_cores
;
core_idx
++
)
{
printf
(
"Core%u BLER %.1f %% - Iters %.1f - Tp %.1f Mbps %s
\n
"
,
t_params
[
core_idx
].
lcore_id
,
t_params
[
core_idx
].
bler
*
100
,
t_params
[
core_idx
].
iter_average
,
t_params
[
core_idx
].
mbps
,
get_vector_filename
());
total_mbps
+=
t_params
[
core_idx
].
mbps
;
total_bler
+=
t_params
[
core_idx
].
bler
;
total_iter
+=
t_params
[
core_idx
].
iter_average
;
}
total_bler
/=
used_cores
;
total_iter
/=
used_cores
;
return
i
;
printf
(
"SNR %.2f BLER %.1f %% - Iterations %.1f %d - Tp %.1f Mbps %s
\n
"
,
snr
,
total_bler
*
100
,
total_iter
,
get_iter_max
(),
total_mbps
,
get_vector_filename
());
}
#endif
/*
* Test function that determines how long an enqueue + dequeue of a burst
* takes on available lcores.
*/
static
int
offload_cos
t_test
(
struct
active_device
*
ad
,
throughpu
t_test
(
struct
active_device
*
ad
,
struct
test_op_params
*
op_params
)
{
#ifndef RTE_BBDEV_OFFLOAD_COST
RTE_SET_USED
(
ad
);
RTE_SET_USED
(
op_params
);
printf
(
"Offload latency test is disabled.
\n
"
);
printf
(
"Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.
\n
"
);
return
TEST_SKIPPED
;
#else
int
iter
;
uint16_t
burst_sz
=
op_params
->
burst_sz
;
const
uint16_t
num_to_process
=
op_params
->
num_to_process
;
const
enum
rte_bbdev_op_type
op_type
=
test_vector
.
op_type
;
const
uint16_t
queue_id
=
ad
->
queue_ids
[
0
];
struct
test_buffers
*
bufs
=
NULL
;
int
ret
;
unsigned
int
lcore_id
,
used_cores
=
0
;
struct
thread_params
*
t_params
,
*
tp
;
struct
rte_bbdev_info
info
;
lcore_function_t
*
throughput_function
;
uint16_t
num_lcores
;
const
char
*
op_type_str
;
struct
test_time_stats
time_st
;
memset
(
&
time_st
,
0
,
sizeof
(
struct
test_time_stats
));
time_st
.
enq_sw_min_time
=
UINT64_MAX
;
time_st
.
enq_acc_min_time
=
UINT64_MAX
;
time_st
.
deq_min_time
=
UINT64_MAX
;
TEST_ASSERT_SUCCESS
((
burst_sz
>
MAX_BURST
),
"BURST_SIZE should be <= %u"
,
MAX_BURST
);
rte_bbdev_info_get
(
ad
->
dev_id
,
&
info
);
bufs
=
&
op_params
->
q_bufs
[
GET_SOCKET
(
info
.
socket_id
)][
queue_id
];
op_type_str
=
rte_bbdev_op_type_str
(
op_type
);
TEST_ASSERT_NOT_NULL
(
op_type_str
,
"Invalid op type: %u"
,
op_type
);
op_type_str
=
rte_bbdev_op_type_str
(
test_vector
.
op_type
);
TEST_ASSERT_NOT_NULL
(
op_type_str
,
"Invalid op type: %u"
,
test_vector
.
op_type
);
printf
(
"+ ------------------------------------------------------- +
\n
"
);
printf
(
"== test: offload latency test
\n
dev: %s, burst size: %u, num ops: %u, op type: %s
\n
"
,
info
.
dev_name
,
burst_sz
,
num_to_process
,
op_type_str
);
if
(
op_type
==
RTE_BBDEV_OP_TURBO_DEC
)
iter
=
offload_latency_test_dec
(
op_params
->
mp
,
bufs
,
op_params
->
ref_dec_op
,
ad
->
dev_id
,
queue_id
,
num_to_process
,
burst_sz
,
&
time_st
);
else
if
(
op_type
==
RTE_BBDEV_OP_TURBO_ENC
)
iter
=
offload_latency_test_enc
(
op_params
->
mp
,
bufs
,
op_params
->
ref_enc_op
,
ad
->
dev_id
,
queue_id
,
num_to_process
,
burst_sz
,
&
time_st
);
else
if
(
op_type
==
RTE_BBDEV_OP_LDPC_ENC
)
iter
=
offload_latency_test_ldpc_enc
(
op_params
->
mp
,
bufs
,
op_params
->
ref_enc_op
,
ad
->
dev_id
,
queue_id
,
num_to_process
,
burst_sz
,
&
time_st
);
else
if
(
op_type
==
RTE_BBDEV_OP_LDPC_DEC
)
iter
=
offload_latency_test_ldpc_dec
(
op_params
->
mp
,
bufs
,
op_params
->
ref_dec_op
,
ad
->
dev_id
,
queue_id
,
num_to_process
,
burst_sz
,
&
time_st
);
else
iter
=
offload_latency_test_enc
(
op_params
->
mp
,
bufs
,
op_params
->
ref_enc_op
,
ad
->
dev_id
,
queue_id
,
num_to_process
,
burst_sz
,
&
time_st
);
if
(
iter
<=
0
)
return
TEST_FAILED
;
printf
(
"== new test: throughput
\n
dev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg
\n
"
,
info
.
dev_name
,
ad
->
nb_queues
,
op_params
->
burst_sz
,
op_params
->
num_to_process
,
op_params
->
num_lcores
,
op_type_str
,
intr_enabled
?
"Interrupt mode"
:
"PMD mode"
,
(
double
)
rte_get_tsc_hz
()
/
1000000000
.
0
);
printf
(
"Enqueue driver offload cost latency:
\n
"
"
\t
avg: %lg cycles, %lg us
\n
"
"
\t
min: %lg cycles, %lg us
\n
"
"
\t
max: %lg cycles, %lg us
\n
"
"Enqueue accelerator offload cost latency:
\n
"
"
\t
avg: %lg cycles, %lg us
\n
"
"
\t
min: %lg cycles, %lg us
\n
"
"
\t
max: %lg cycles, %lg us
\n
"
,
(
double
)
time_st
.
enq_sw_total_time
/
(
double
)
iter
,
(
double
)(
time_st
.
enq_sw_total_time
*
1000000
)
/
(
double
)
iter
/
(
double
)
rte_get_tsc_hz
(),
(
double
)
time_st
.
enq_sw_min_time
,
(
double
)(
time_st
.
enq_sw_min_time
*
1000000
)
/
rte_get_tsc_hz
(),
(
double
)
time_st
.
enq_sw_max_time
,
(
double
)(
time_st
.
enq_sw_max_time
*
1000000
)
/
rte_get_tsc_hz
(),
(
double
)
time_st
.
enq_acc_total_time
/
(
double
)
iter
,
(
double
)(
time_st
.
enq_acc_total_time
*
1000000
)
/
(
double
)
iter
/
(
double
)
rte_get_tsc_hz
(),
(
double
)
time_st
.
enq_acc_min_time
,
(
double
)(
time_st
.
enq_acc_min_time
*
1000000
)
/
rte_get_tsc_hz
(),
(
double
)
time_st
.
enq_acc_max_time
,
(
double
)(
time_st
.
enq_acc_max_time
*
1000000
)
/
rte_get_tsc_hz
());
printf
(
"Dequeue offload cost latency - one op:
\n
"
"
\t
avg: %lg cycles, %lg us
\n
"
"
\t
min: %lg cycles, %lg us
\n
"
"
\t
max: %lg cycles, %lg us
\n
"
,
(
double
)
time_st
.
deq_total_time
/
(
double
)
iter
,
(
double
)(
time_st
.
deq_total_time
*
1000000
)
/
(
double
)
iter
/
(
double
)
rte_get_tsc_hz
(),
(
double
)
time_st
.
deq_min_time
,
(
double
)(
time_st
.
deq_min_time
*
1000000
)
/
rte_get_tsc_hz
(),
(
double
)
time_st
.
deq_max_time
,
(
double
)(
time_st
.
deq_max_time
*
1000000
)
/
rte_get_tsc_hz
());
/* Set number of lcores */
num_lcores
=
(
ad
->
nb_queues
<
(
op_params
->
num_lcores
))
?
ad
->
nb_queues
:
op_params
->
num_lcores
;
return
TEST_SUCCESS
;
#endif
}
/* Allocate memory for thread parameters structure */
t_params
=
rte_zmalloc
(
NULL
,
num_lcores
*
sizeof
(
struct
thread_params
),
RTE_CACHE_LINE_SIZE
);
TEST_ASSERT_NOT_NULL
(
t_params
,
"Failed to alloc %zuB for t_params"
,
RTE_ALIGN
(
sizeof
(
struct
thread_params
)
*
num_lcores
,
RTE_CACHE_LINE_SIZE
));
#ifdef RTE_BBDEV_OFFLOAD_COST
static
int
offload_latency_empty_q_test_dec
(
uint16_t
dev_id
,
uint16_t
queue_id
,
const
uint16_t
num_to_process
,
uint16_t
burst_sz
,
uint64_t
*
deq_total_time
,
uint64_t
*
deq_min_time
,
uint64_t
*
deq_max_time
,
const
enum
rte_bbdev_op_type
op_type
)
{
int
i
,
deq_total
;
struct
rte_bbdev_dec_op
*
ops
[
MAX_BURST
];
uint64_t
deq_start_time
,
deq_last_time
;
rte_atomic16_set
(
&
op_params
->
sync
,
SYNC_WAIT
);
/* Test deq offload latency from an empty queue */
/* Master core is set at first entry */
t_params
[
0
].
dev_id
=
ad
->
dev_id
;
t_params
[
0
].
lcore_id
=
rte_lcore_id
();
t_params
[
0
].
op_params
=
op_params
;
t_params
[
0
].
queue_id
=
ad
->
queue_ids
[
used_cores
++
];
t_params
[
0
].
iter_count
=
0
;
for
(
i
=
0
,
deq_total
=
0
;
deq_total
<
num_to_process
;
++
i
,
deq_total
+=
burst_sz
)
{
deq_start_time
=
rte_rdtsc_precise
()
;
RTE_LCORE_FOREACH_SLAVE
(
lcore_id
)
{
if
(
used_cores
>=
num_lcores
)
break
;
if
(
unlikely
(
num_to_process
-
deq_total
<
burst_sz
))
burst_sz
=
num_to_process
-
deq_total
;
if
(
op_type
==
RTE_BBDEV_OP_LDPC_DEC
)
rte_bbdev_dequeue_ldpc_dec_ops
(
dev_id
,
queue_id
,
ops
,
burst_sz
);
else
rte_bbdev_dequeue_dec_ops
(
dev_id
,
queue_id
,
ops
,
burst_sz
);
t_params
[
used_cores
].
dev_id
=
ad
->
dev_id
;
t_params
[
used_cores
].
lcore_id
=
lcore_id
;
t_params
[
used_cores
].
op_params
=
op_params
;
t_params
[
used_cores
].
queue_id
=
ad
->
queue_ids
[
used_cores
];
t_params
[
used_cores
].
iter_count
=
0
;
deq_last_time
=
rte_rdtsc_precise
()
-
deq_start_time
;
*
deq_max_time
=
RTE_MAX
(
*
deq_max_time
,
deq_last_time
);
*
deq_min_time
=
RTE_MIN
(
*
deq_min_time
,
deq_last_time
);
*
deq_total_time
+=
deq_last_time
;
rte_eal_remote_launch
(
throughput_pmd_lcore_ldpc_dec
,
&
t_params
[
used_cores
++
],
lcore_id
);
}
r
eturn
i
;
}
r
te_atomic16_set
(
&
op_params
->
sync
,
SYNC_START
)
;
ret
=
throughput_pmd_lcore_ldpc_dec
(
&
t_params
[
0
]);
static
int
offload_latency_empty_q_test_enc
(
uint16_t
dev_id
,
uint16_t
queue_id
,
const
uint16_t
num_to_process
,
uint16_t
burst_sz
,
uint64_t
*
deq_total_time
,
uint64_t
*
deq_min_time
,
uint64_t
*
deq_max_time
,
const
enum
rte_bbdev_op_type
op_type
)
{
int
i
,
deq_total
;
struct
rte_bbdev_enc_op
*
ops
[
MAX_BURST
];
uint64_t
deq_start_time
,
deq_last_time
;
/* Test deq offload latency from an empty queue */
for
(
i
=
0
,
deq_total
=
0
;
deq_total
<
num_to_process
;
++
i
,
deq_total
+=
burst_sz
)
{
deq_start_time
=
rte_rdtsc_precise
();
if
(
unlikely
(
num_to_process
-
deq_total
<
burst_sz
))
burst_sz
=
num_to_process
-
deq_total
;
if
(
op_type
==
RTE_BBDEV_OP_LDPC_ENC
)
rte_bbdev_dequeue_ldpc_enc_ops
(
dev_id
,
queue_id
,
ops
,
burst_sz
);
else
rte_bbdev_dequeue_enc_ops
(
dev_id
,
queue_id
,
ops
,
burst_sz
);
/* Master core is always used */
for
(
used_cores
=
1
;
used_cores
<
num_lcores
;
used_cores
++
)
ret
|=
rte_eal_wait_lcore
(
t_params
[
used_cores
].
lcore_id
);
deq_last_time
=
rte_rdtsc_precise
()
-
deq_start_time
;
*
deq_max_time
=
RTE_MAX
(
*
deq_max_time
,
deq_last_time
);
*
deq_min_time
=
RTE_MIN
(
*
deq_min_time
,
deq_last_time
);
*
deq_total_time
+=
deq_last_time
;
/* Return if test failed */
if
(
ret
)
{
rte_free
(
t_params
);
return
ret
;
}
return
i
;
}
#endif
static
int
offload_latency_empty_q_test
(
struct
active_device
*
ad
,
struct
test_op_params
*
op_params
)
{
#ifndef RTE_BBDEV_OFFLOAD_COST
RTE_SET_USED
(
ad
);
RTE_SET_USED
(
op_params
);
printf
(
"Offload latency empty dequeue test is disabled.
\n
"
);
printf
(
"Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.
\n
"
);
return
TEST_SKIPPED
;
#else
int
iter
;
uint64_t
deq_total_time
,
deq_min_time
,
deq_max_time
;
uint16_t
burst_sz
=
op_params
->
burst_sz
;
const
uint16_t
num_to_process
=
op_params
->
num_to_process
;
const
enum
rte_bbdev_op_type
op_type
=
test_vector
.
op_type
;
const
uint16_t
queue_id
=
ad
->
queue_ids
[
0
];
struct
rte_bbdev_info
info
;
const
char
*
op_type_str
;
deq_total_time
=
deq_max_time
=
0
;
deq_min_time
=
UINT64_MAX
;
TEST_ASSERT_SUCCESS
((
burst_sz
>
MAX_BURST
),
"BURST_SIZE should be <= %u"
,
MAX_BURST
);
rte_bbdev_info_get
(
ad
->
dev_id
,
&
info
);
/* Print throughput if interrupts are disabled and test passed */
if
(
!
intr_enabled
)
{
if
(
test_vector
.
op_type
==
RTE_BBDEV_OP_TURBO_DEC
||
test_vector
.
op_type
==
RTE_BBDEV_OP_LDPC_DEC
)
print_dec_throughput
(
t_params
,
num_lcores
);
else
print_enc_throughput
(
t_params
,
num_lcores
);
rte_free
(
t_params
);
return
ret
;
}
op_type_str
=
rte_bbdev_op_type_str
(
op_type
);
TEST_ASSERT_NOT_NULL
(
op_type_str
,
"Invalid op type: %u"
,
op_type
);
/* In interrupt TC we need to wait for the interrupt callback to deqeue
* all pending operations. Skip waiting for queues which reported an
* error using processing_status variable.
* Wait for master lcore operations.
*/
tp
=
&
t_params
[
0
];
while
((
rte_atomic16_read
(
&
tp
->
nb_dequeued
)
<
op_params
->
num_to_process
)
&&
(
rte_atomic16_read
(
&
tp
->
processing_status
)
!=
TEST_FAILED
))
rte_pause
();
printf
(
"+ ------------------------------------------------------- +
\n
"
)
;
printf
(
"== test: offload latency empty dequeue
\n
dev: %s, burst size: %u, num ops: %u, op type: %s
\n
"
,
info
.
dev_name
,
burst_sz
,
num_to_process
,
op_type_str
);
tp
->
ops_per_sec
/=
TEST_REPETITIONS
;
tp
->
mbps
/=
TEST_REPETITIONS
;
ret
|=
(
int
)
rte_atomic16_read
(
&
tp
->
processing_status
);
if
(
op_type
==
RTE_BBDEV_OP_TURBO_DEC
||
op_type
==
RTE_BBDEV_OP_LDPC_DEC
)
iter
=
offload_latency_empty_q_test_dec
(
ad
->
dev_id
,
queue_id
,
num_to_process
,
burst_sz
,
&
deq_total_time
,
&
deq_min_time
,
&
deq_max_time
,
op_type
);
else
iter
=
offload_latency_empty_q_test_enc
(
ad
->
dev_id
,
queue_id
,
num_to_process
,
burst_sz
,
&
deq_total_time
,
&
deq_min_time
,
&
deq_max_time
,
op_type
);
/* Wait for slave lcores operations */
for
(
used_cores
=
1
;
used_cores
<
num_lcores
;
used_cores
++
)
{
tp
=
&
t_params
[
used_cores
];
if
(
iter
<=
0
)
return
TEST_FAILED
;
while
((
rte_atomic16_read
(
&
tp
->
nb_dequeued
)
<
op_params
->
num_to_process
)
&&
(
rte_atomic16_read
(
&
tp
->
processing_status
)
!=
TEST_FAILED
))
rte_pause
();
printf
(
"Empty dequeue offload:
\n
"
"
\t
avg: %lg cycles, %lg us
\n
"
"
\t
min: %lg cycles, %lg us
\n
"
"
\t
max: %lg cycles, %lg us
\n
"
,
(
double
)
deq_total_time
/
(
double
)
iter
,
(
double
)(
deq_total_time
*
1000000
)
/
(
double
)
iter
/
(
double
)
rte_get_tsc_hz
(),
(
double
)
deq_min_time
,
(
double
)(
deq_min_time
*
1000000
)
/
rte_get_tsc_hz
(),
(
double
)
deq_max_time
,
(
double
)(
deq_max_time
*
1000000
)
/
rte_get_tsc_hz
());
tp
->
ops_per_sec
/=
TEST_REPETITIONS
;
tp
->
mbps
/=
TEST_REPETITIONS
;
ret
|=
(
int
)
rte_atomic16_read
(
&
tp
->
processing_status
);
}
return
TEST_SUCCESS
;
#endif
}
/* Print throughput if test passed */
if
(
!
ret
)
{
if
(
test_vector
.
op_type
==
RTE_BBDEV_OP_TURBO_DEC
||
test_vector
.
op_type
==
RTE_BBDEV_OP_LDPC_DEC
)
print_dec_throughput
(
t_params
,
num_lcores
);
else
if
(
test_vector
.
op_type
==
RTE_BBDEV_OP_TURBO_ENC
||
test_vector
.
op_type
==
RTE_BBDEV_OP_LDPC_ENC
)
print_enc_throughput
(
t_params
,
num_lcores
);
}
static
int
bler_tc
(
void
)
{
return
run_test_case
(
bler_test
);
rte_free
(
t_params
);
return
ret
;
}
static
int
...
...
@@ -5070,23 +3361,7 @@ throughput_tc(void)
return
run_test_case
(
throughput_test
);
}
static
int
offload_cost_tc
(
void
)
{
return
run_test_case
(
offload_cost_test
);
}
static
int
offload_latency_empty_q_tc
(
void
)
{
return
run_test_case
(
offload_latency_empty_q_test
);
}
static
int
latency_tc
(
void
)
{
return
run_test_case
(
latency_test
);
}
static
int
interrupt_tc
(
void
)
...
...
@@ -5094,16 +3369,6 @@ interrupt_tc(void)
return
run_test_case
(
throughput_test
);
}
static
struct
unit_test_suite
bbdev_bler_testsuite
=
{
.
suite_name
=
"BBdev BLER Tests"
,
.
setup
=
testsuite_setup
,
.
teardown
=
testsuite_teardown
,
.
unit_test_cases
=
{
TEST_CASE_ST
(
ut_setup
,
ut_teardown
,
bler_tc
),
TEST_CASES_END
()
/**< NULL terminate unit test array */
}
};
static
struct
unit_test_suite
bbdev_throughput_testsuite
=
{
.
suite_name
=
"BBdev Throughput Tests"
,
.
setup
=
testsuite_setup
,
...
...
@@ -5114,53 +3379,8 @@ static struct unit_test_suite bbdev_throughput_testsuite = {
}
};
static
struct
unit_test_suite
bbdev_validation_testsuite
=
{
.
suite_name
=
"BBdev Validation Tests"
,
.
setup
=
testsuite_setup
,
.
teardown
=
testsuite_teardown
,
.
unit_test_cases
=
{
TEST_CASE_ST
(
ut_setup
,
ut_teardown
,
latency_tc
),
TEST_CASES_END
()
/**< NULL terminate unit test array */
}
};
static
struct
unit_test_suite
bbdev_latency_testsuite
=
{
.
suite_name
=
"BBdev Latency Tests"
,
.
setup
=
testsuite_setup
,
.
teardown
=
testsuite_teardown
,
.
unit_test_cases
=
{
TEST_CASE_ST
(
ut_setup
,
ut_teardown
,
latency_tc
),
TEST_CASES_END
()
/**< NULL terminate unit test array */
}
};
static
struct
unit_test_suite
bbdev_offload_cost_testsuite
=
{
.
suite_name
=
"BBdev Offload Cost Tests"
,
.
setup
=
testsuite_setup
,
.
teardown
=
testsuite_teardown
,
.
unit_test_cases
=
{
TEST_CASE_ST
(
ut_setup
,
ut_teardown
,
offload_cost_tc
),
TEST_CASE_ST
(
ut_setup
,
ut_teardown
,
offload_latency_empty_q_tc
),
TEST_CASES_END
()
/**< NULL terminate unit test array */
}
};
static
struct
unit_test_suite
bbdev_interrupt_testsuite
=
{
.
suite_name
=
"BBdev Interrupt Tests"
,
.
setup
=
interrupt_testsuite_setup
,
.
teardown
=
testsuite_teardown
,
.
unit_test_cases
=
{
TEST_CASE_ST
(
ut_setup
,
ut_teardown
,
interrupt_tc
),
TEST_CASES_END
()
/**< NULL terminate unit test array */
}
};
//REGISTER_TEST_COMMAND(bler, bbdev_bler_testsuite);
REGISTER_TEST_COMMAND
(
throughput
,
bbdev_throughput_testsuite
);
//REGISTER_TEST_COMMAND(validation, bbdev_validation_testsuite);
//REGISTER_TEST_COMMAND(latency, bbdev_latency_testsuite);
//REGISTER_TEST_COMMAND(offload, bbdev_offload_cost_testsuite);
//REGISTER_TEST_COMMAND(interrupt, bbdev_interrupt_testsuite);
#define MAX_QUEUES RTE_MAX_LCORE
...
...
@@ -5331,162 +3551,6 @@ print_usage(const char *prog_name)
printf
(
"
\n
"
);
}
static
int
parse_args
(
int
argc
,
char
**
argv
,
struct
test_params
*
tp
)
{
int
opt
,
option_index
;
unsigned
int
num_tests
=
0
;
bool
test_cases_present
=
false
;
bool
test_vector_present
=
false
;
struct
test_command
*
t
;
char
*
tokens
[
MAX_CMDLINE_TESTCASES
];
int
tc
,
ret
;
static
struct
option
lgopts
[]
=
{
{
"num-ops"
,
1
,
0
,
'n'
},
{
"burst-size"
,
1
,
0
,
'b'
},
{
"test-cases"
,
1
,
0
,
'c'
},
{
"test-vector"
,
1
,
0
,
'v'
},
{
"lcores"
,
1
,
0
,
'l'
},
{
"snr"
,
1
,
0
,
's'
},
{
"iter_max"
,
6
,
0
,
't'
},
{
"init-device"
,
0
,
0
,
'i'
},
{
"help"
,
0
,
0
,
'h'
},
{
NULL
,
0
,
0
,
0
}
};
tp
->
iter_max
=
DEFAULT_ITER
;
while
((
opt
=
getopt_long
(
argc
,
argv
,
"hin:b:c:v:l:s:t:"
,
lgopts
,
&
option_index
))
!=
EOF
)
switch
(
opt
)
{
case
'n'
:
TEST_ASSERT
(
strlen
(
optarg
)
>
0
,
"Num of operations is not provided"
);
tp
->
num_ops
=
strtol
(
optarg
,
NULL
,
10
);
break
;
case
'b'
:
TEST_ASSERT
(
strlen
(
optarg
)
>
0
,
"Burst size is not provided"
);
tp
->
burst_sz
=
strtol
(
optarg
,
NULL
,
10
);
TEST_ASSERT
(
tp
->
burst_sz
<=
MAX_BURST
,
"Burst size mustn't be greater than %u"
,
MAX_BURST
);
break
;
case
'c'
:
TEST_ASSERT
(
test_cases_present
==
false
,
"Test cases provided more than once"
);
test_cases_present
=
true
;
ret
=
rte_strsplit
(
optarg
,
strlen
(
optarg
),
tokens
,
MAX_CMDLINE_TESTCASES
,
tc_sep
);
TEST_ASSERT
(
ret
<=
MAX_CMDLINE_TESTCASES
,
"Too many test cases (max=%d)"
,
MAX_CMDLINE_TESTCASES
);
for
(
tc
=
0
;
tc
<
ret
;
++
tc
)
{
/* Find matching test case */
TAILQ_FOREACH
(
t
,
&
commands_list
,
next
)
if
(
!
strcmp
(
tokens
[
tc
],
t
->
command
))
tp
->
test_to_run
[
num_tests
]
=
t
;
TEST_ASSERT
(
tp
->
test_to_run
[
num_tests
]
!=
NULL
,
"Unknown test case: %s"
,
tokens
[
tc
]);
++
num_tests
;
}
break
;
case
'v'
:
TEST_ASSERT
(
test_vector_present
==
false
,
"Test vector provided more than once"
);
test_vector_present
=
true
;
TEST_ASSERT
(
strlen
(
optarg
)
>
0
,
"Config file name is null"
);
snprintf
(
tp
->
test_vector_filename
,
sizeof
(
tp
->
test_vector_filename
),
"%s"
,
optarg
);
break
;
case
's'
:
TEST_ASSERT
(
strlen
(
optarg
)
>
0
,
"SNR is not provided"
);
tp
->
snr
=
strtod
(
optarg
,
NULL
);
break
;
case
't'
:
TEST_ASSERT
(
strlen
(
optarg
)
>
0
,
"Iter_max is not provided"
);
tp
->
iter_max
=
strtol
(
optarg
,
NULL
,
10
);
break
;
case
'l'
:
TEST_ASSERT
(
strlen
(
optarg
)
>
0
,
"Num of lcores is not provided"
);
tp
->
num_lcores
=
strtol
(
optarg
,
NULL
,
10
);
TEST_ASSERT
(
tp
->
num_lcores
<=
RTE_MAX_LCORE
,
"Num of lcores mustn't be greater than %u"
,
RTE_MAX_LCORE
);
break
;
case
'i'
:
/* indicate fpga fec config required */
tp
->
init_device
=
true
;
break
;
case
'h'
:
print_usage
(
argv
[
0
]);
return
0
;
default:
printf
(
"ERROR: Unknown option: -%c
\n
"
,
opt
);
return
-
1
;
}
if
(
tp
->
num_ops
==
0
)
{
printf
(
"WARNING: Num of operations was not provided or was set 0. Set to default (%u)
\n
"
,
DEFAULT_OPS
);
tp
->
num_ops
=
DEFAULT_OPS
;
}
if
(
tp
->
burst_sz
==
0
)
{
printf
(
"WARNING: Burst size was not provided or was set 0. Set to default (%u)
\n
"
,
DEFAULT_BURST
);
tp
->
burst_sz
=
DEFAULT_BURST
;
}
if
(
tp
->
num_lcores
==
0
)
{
printf
(
"WARNING: Num of lcores was not provided or was set 0. Set to value from RTE config (%u)
\n
"
,
rte_lcore_count
());
tp
->
num_lcores
=
rte_lcore_count
();
}
TEST_ASSERT
(
tp
->
burst_sz
<=
tp
->
num_ops
,
"Burst size (%u) mustn't be greater than num ops (%u)"
,
tp
->
burst_sz
,
tp
->
num_ops
);
tp
->
num_tests
=
num_tests
;
return
0
;
}
static
int
run_all_tests
(
void
)
{
int
ret
=
TEST_SUCCESS
;
struct
test_command
*
t
;
TAILQ_FOREACH
(
t
,
&
commands_list
,
next
)
ret
|=
(
int
)
t
->
callback
();
return
ret
;
}
static
int
run_parsed_tests
(
struct
test_params
*
tp
)
{
int
ret
=
TEST_SUCCESS
;
unsigned
int
i
;
for
(
i
=
0
;
i
<
tp
->
num_tests
;
++
i
)
ret
|=
(
int
)
tp
->
test_to_run
[
i
]
->
callback
();
return
ret
;
}
static
int
init_input
(
uint32_t
**
data
,
uint32_t
data_length
)
...
...
@@ -5652,13 +3716,6 @@ argv_re[1] = "-v";
argv_re
[
2
]
=
"../../../targets/ARCH/test-bbdev/test_vectors/ldpc_dec_v8480.data"
;
//printf("after ......ret %d argc %d argv %s %s %s %s\n", ret,argc, argv[0], argv[1], argv[2], argv[3],argv[4]);
/* Parse application arguments (after the EAL ones) */
/* ret = parse_args(argc_re, argv_re, &test_params);
if (ret < 0) {
print_usage(argv_re[0]);
return 1;
}
*/
memset
(
&
test_vector_dec
,
0
,
sizeof
(
struct
test_bbdev_vector
));
...
...
@@ -5707,11 +3764,19 @@ test_params.num_ops=2;
test_params
.
burst_sz
=
2
;
test_params
.
num_lcores
=
1
;
test_params
.
num_tests
=
1
;
run_all_tests
();
//run_all_tests();
testsuite_setup
();
ut_setup
();
throughput_tc
();
char
*
data
=
ldpc_output
;
data_len
=
(
p_decParams
->
BG
==
1
)
?
(
22
*
p_decParams
->
Z
)
:
(
10
*
p_decParams
->
Z
);
memcpy
(
&
p_out
[
0
],
data
,
C
*
data_len
);
//p_out = ldpc_output;
ut_teardown
();
//for (i=0;i<8;i++)
//printf("p_out[%d] = %x addr %p ldpcout addr %p\n",i,p_out[i],p_out+i,ldpc_output+i);
...
...
openair1/PHY/NR_TRANSPORT/nr_ulsch_decoding.c
View file @
52450c44
...
...
@@ -627,12 +627,12 @@ uint32_t nr_ulsch_decoding(PHY_VARS_gNB *phy_vars_gNB,
#endif
no_iteration_ldpc
=
ulsch
->
max_ldpc_iterations
+
1
;
}
for
(
int
k
=
0
;
k
<
8
;
k
++
)
/*
for (int k=0;k<8;k++)
{
printf("output decoder [%d] = 0x%02x \n", k, harq_process->c[r][k]);
printf("llrprocbuf [%d] = %x adr %p\n", k, llrProcBuf[k], llrProcBuf+k);
}
*/
memcpy
(
harq_process
->
b
+
offset
,
harq_process
->
c
[
r
],
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment