Commit 84c69e50 authored by Robert Schmidt's avatar Robert Schmidt

Avoid huge calloc by alloc'ing TBs independently

Prior to this this commit, the structure NR_UE_info_t is 4881632 bytes.
On some machines, e.g., one of the CI systems, this can lead to realtime
problems when doing the initial calloc() of the data structure.

Commit e586efb2 ("Enable 32 HARQ
Processes in DL, UL") introduced the real-time problems by increasing
the number of HARQ processes.

The reason NR_UE_info_t is that big is that it contains buffers for DL
HARQ data that might need to be retransmitted (the L1 is stateless, it
cannot store this for the L2). To reduce the size, dynamically allocate
the DL HARQ buffers when they are needed. This also reduces the size of
NR_UE_info_t to 15840 bytes.

Encapsulate transportBlock in byte_array and accessory functions, and
allocate in big chunks of multiple of ~32kB.
Reported-By: default avatarCedric Roux <cedric.roux@eurecom.fr>
Fixes: e586efb2
Closes: #875
parent 8eca2e05
......@@ -2126,7 +2126,7 @@ static void nr_generate_Msg4_MsgB(module_id_t module_idP,
harq->tb_size = tb_size;
uint8_t *buf = (uint8_t *) harq->transportBlock;
uint8_t *buf = allocate_transportBlock_buffer(&harq->transportBlock, tb_size);
// Bytes to be transmitted
if (harq->round == 0) {
uint16_t mac_pdu_length = 0;
......@@ -2206,11 +2206,11 @@ static void nr_generate_Msg4_MsgB(module_id_t module_idP,
}
T(T_GNB_MAC_DL_PDU_WITH_DATA, T_INT(module_idP), T_INT(CC_id), T_INT(ra->rnti),
T_INT(frameP), T_INT(slotP), T_INT(current_harq_pid), T_BUFFER(harq->transportBlock, harq->tb_size));
T_INT(frameP), T_INT(slotP), T_INT(current_harq_pid), T_BUFFER(harq->transportBlock.buf, harq->tb_size));
// DL TX request
nfapi_nr_pdu_t *tx_req = &TX_req->pdu_list[TX_req->Number_of_PDUs];
memcpy(tx_req->TLVs[0].value.direct, harq->transportBlock, sizeof(uint8_t) * harq->tb_size);
memcpy(tx_req->TLVs[0].value.direct, harq->transportBlock.buf, sizeof(uint8_t) * harq->tb_size);
tx_req->PDU_index = pduindex;
tx_req->num_TLV = 1;
tx_req->TLVs[0].length = harq->tb_size;
......
......@@ -1280,12 +1280,12 @@ void nr_schedule_ue_spec(module_id_t module_id,
UE->rnti,
current_harq_pid);
T(T_GNB_MAC_RETRANSMISSION_DL_PDU_WITH_DATA, T_INT(module_id), T_INT(CC_id), T_INT(rnti),
T_INT(frame), T_INT(slot), T_INT(current_harq_pid), T_INT(harq->round), T_BUFFER(harq->transportBlock, TBS));
T_INT(frame), T_INT(slot), T_INT(current_harq_pid), T_INT(harq->round), T_BUFFER(harq->transportBlock.buf, TBS));
UE->mac_stats.dl.total_rbs_retx += sched_pdsch->rbSize;
gNB_mac->mac_stats.used_prb_aggregate += sched_pdsch->rbSize;
} else { /* initial transmission */
LOG_D(NR_MAC, "Initial HARQ transmission in %d.%d\n", frame, slot);
uint8_t *buf = (uint8_t *) harq->transportBlock;
uint8_t *buf = allocate_transportBlock_buffer(&harq->transportBlock, TBS);
/* first, write all CEs that might be there */
int written = nr_write_ce_dlsch_pdu(module_id,
sched_ctrl,
......@@ -1420,7 +1420,7 @@ void nr_schedule_ue_spec(module_id_t module_id,
}
T(T_GNB_MAC_DL_PDU_WITH_DATA, T_INT(module_id), T_INT(CC_id), T_INT(rnti),
T_INT(frame), T_INT(slot), T_INT(current_harq_pid), T_BUFFER(harq->transportBlock, TBS));
T_INT(frame), T_INT(slot), T_INT(current_harq_pid), T_BUFFER(harq->transportBlock.buf, TBS));
}
const int ntx_req = TX_req->Number_of_PDUs;
......@@ -1429,7 +1429,7 @@ void nr_schedule_ue_spec(module_id_t module_id,
tx_req->num_TLV = 1;
tx_req->TLVs[0].length = TBS;
tx_req->PDU_length = compute_PDU_length(tx_req->num_TLV, tx_req->TLVs[0].length);
memcpy(tx_req->TLVs[0].value.direct, harq->transportBlock, TBS);
memcpy(tx_req->TLVs[0].value.direct, harq->transportBlock.buf, TBS);
TX_req->Number_of_PDUs++;
TX_req->SFN = frame;
TX_req->Slot = slot;
......
......@@ -2079,12 +2079,35 @@ void delete_nr_ue_data(NR_UE_info_t *UE, NR_COMMON_channels_t *ccPtr, uid_alloca
destroy_nr_list(&sched_ctrl->available_ul_harq);
destroy_nr_list(&sched_ctrl->feedback_ul_harq);
destroy_nr_list(&sched_ctrl->retrans_ul_harq);
for (int i = 0; i < NR_MAX_HARQ_PROCESSES; ++i)
free_transportBlock_buffer(&sched_ctrl->harq_processes[i].transportBlock);
free_sched_pucch_list(sched_ctrl);
uid_linear_allocator_free(uia, UE->uid);
LOG_I(NR_MAC, "Remove NR rnti 0x%04x\n", UE->rnti);
free(UE);
}
#define TB_SINGLE_LAYER (32 * 1024)
uint8_t *allocate_transportBlock_buffer(byte_array_t *tb, uint32_t needed)
{
DevAssert(needed > 0);
if (tb->buf != NULL && needed <= tb->len)
return tb->buf; // nothing to do, current is enough
uint32_t size = TB_SINGLE_LAYER;
while (needed > size)
size *= 2;
LOG_D(NR_MAC, "allocating new TB block of size %d\n", size);
free(tb->buf);
tb->buf = malloc_or_fail(size);
tb->len = size;
return tb->buf;
}
void free_transportBlock_buffer(byte_array_t *tb)
{
free_byte_array(*tb);
}
void set_max_fb_time(NR_UE_UL_BWP_t *UL_BWP, const NR_UE_DL_BWP_t *DL_BWP)
{
......
......@@ -381,6 +381,9 @@ void handle_nr_ul_harq(const int CC_idP,
sub_frame_t slot,
const nfapi_nr_crc_t *crc_pdu);
uint8_t *allocate_transportBlock_buffer(byte_array_t *tb, uint32_t needed);
void free_transportBlock_buffer(byte_array_t *tb);
void handle_nr_srs_measurements(const module_id_t module_id,
const frame_t frame,
const sub_frame_t slot,
......
......@@ -44,6 +44,7 @@
#include <pthread.h>
#include "common/utils/ds/seq_arr.h"
#include "common/utils/nr/nr_common.h"
#include "common/utils/ds/byte_array.h"
#define NR_SCHED_LOCK(lock) \
do { \
......@@ -481,10 +482,9 @@ typedef struct NR_UE_harq {
uint16_t feedback_frame;
uint16_t feedback_slot;
/* Transport block to be sent using this HARQ process, its size is in
* sched_pdsch */
uint32_t transportBlock[38016]; // valid up to 4 layers
uint32_t tb_size;
/* Transport block to be sent using this HARQ process */
byte_array_t transportBlock;
uint32_t tb_size; // size of currently stored TB
/// sched_pdsch keeps information on MCS etc used for the initial transmission
NR_sched_pdsch_t sched_pdsch;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment