Commit 90057df3 authored by Jaroslava Fiedlerova's avatar Jaroslava Fiedlerova

Merge remote-tracking branch 'origin/opt-cp-insertion' into integration_2024_w51 (!3151)

Optimize PHY_ofdm_mod CYCLIC_PREFIX in case of incidentally aligned pointers

It seems that the cyclic prefix is in most cases a multiple of 512 samples.
This means that in most cases the idft output pointer is already aligned and
there is no need to perform an extra memcpy. This saves the memcpy time in
most cyclic prefix insertion cases.
parents cd570f63 55b5e04e
......@@ -38,7 +38,7 @@
@param nb_prefix_samples The number of prefix/suffix/zero samples
@param etype Type of extension (CYCLIC_PREFIX,CYCLIC_SUFFIX,ZEROS)
*/
void PHY_ofdm_mod(int *input,
void PHY_ofdm_mod(const int *input,
int *output,
int fftsize,
unsigned char nb_symbols,
......
......@@ -40,6 +40,9 @@ This section deals with basic functions for OFDM Modulation.
#include "PHY/LTE_TRANSPORT/transport_common_proto.h"
//#define DEBUG_OFDM_MOD
// Use 64-byte alignment for IDFT output buffer to ensure no
// runtime error in case IDFT implementation uses AVX-512.
#define IDFT_OUTPUT_BUFFER_ALIGNMENT 64
void normal_prefix_mod(int32_t *txdataF,int32_t *txdata,uint8_t nsymb,LTE_DL_FRAME_PARMS *frame_parms)
{
......@@ -122,109 +125,90 @@ void nr_normal_prefix_mod(c16_t *txdataF, c16_t *txdata, uint8_t nsymb, const NR
}
void PHY_ofdm_mod(int *input, /// pointer to complex input
int *output, /// pointer to complex output
int fftsize, /// FFT_SIZE
unsigned char nb_symbols, /// number of OFDM symbols
unsigned short nb_prefix_samples, /// cyclic prefix length
Extension_t etype /// type of extension
)
void PHY_ofdm_mod(const int *input, /// pointer to complex input
int *output, /// pointer to complex output
int fftsize, /// FFT_SIZE
unsigned char nb_symbols, /// number of OFDM symbols
unsigned short nb_prefix_samples, /// cyclic prefix length
Extension_t etype /// type of extension
)
{
if (nb_symbols == 0)
return;
if(nb_symbols == 0) return;
int16_t temp[2*2*6144*4] __attribute__((aligned(32)));
int i,j;
volatile int *output_ptr=(int*)0;
int *temp_ptr=(int*)0;
idft_size_idx_t idft_size = get_idft(fftsize);
#ifdef DEBUG_OFDM_MOD
printf("[PHY] OFDM mod (size %d,prefix %d) Symbols %d, input %p, output %p\n",
fftsize,nb_prefix_samples,nb_symbols,input,output);
fftsize,
nb_prefix_samples,
nb_symbols,
input,
output);
#endif
for (i=0; i<nb_symbols; i++) {
for (int i = 0; i < nb_symbols; i++) {
#ifdef DEBUG_OFDM_MOD
printf("[PHY] symbol %d/%d offset %d (%p,%p -> %p)\n",i,nb_symbols,i*fftsize+(i*nb_prefix_samples),input,&input[i*fftsize],&output[(i*fftsize) + ((i)*nb_prefix_samples)]);
printf("[PHY] symbol %d/%d offset %d (%p,%p -> %p)\n",
i,
nb_symbols,
i * fftsize + (i * nb_prefix_samples),
input,
&input[i * fftsize],
&output[(i * fftsize) + ((i)*nb_prefix_samples)]);
#endif
// on AVX2 need 256-bit alignment
idft(idft_size, (int16_t *)&input[i * fftsize], (int16_t *)temp, 1);
// Copy to frame buffer with Cyclic Extension
// Note: will have to adjust for synchronization offset!
switch (etype) {
case CYCLIC_PREFIX:
output_ptr = &output[(i*fftsize) + ((1+i)*nb_prefix_samples)];
temp_ptr = (int *)temp;
// msg("Doing cyclic prefix method\n");
{
memcpy((void*)output_ptr,(void*)temp_ptr,fftsize<<2);
case CYCLIC_PREFIX: {
int *output_ptr = &output[(i * fftsize) + ((1 + i) * nb_prefix_samples)];
// Current idft implementation uses AVX-256: Check if buffer is already aligned to 256 bits (32 bytes)
if ((uintptr_t)output_ptr % 32 == 0) {
// output ptr is aligned, do ifft inplace
idft(idft_size, (int16_t *)&input[i * fftsize], (int16_t *)output_ptr, 1);
} else {
// output ptr is not aligned, needs an extra memcpy
c16_t temp[fftsize] __attribute__((aligned(IDFT_OUTPUT_BUFFER_ALIGNMENT)));
idft(idft_size, (int16_t *)&input[i * fftsize], (int16_t *)temp, 1);
memcpy((void *)output_ptr, (void *)temp, sizeof(temp));
}
// perform cyclic prefix insertion
memcpy((void *)&output_ptr[-nb_prefix_samples], (void *)&output_ptr[fftsize - nb_prefix_samples], nb_prefix_samples * sizeof(c16_t));
break;
}
memcpy((void*)&output_ptr[-nb_prefix_samples],(void*)&output_ptr[fftsize-nb_prefix_samples],nb_prefix_samples<<2);
break;
case CYCLIC_SUFFIX:
output_ptr = &output[(i*fftsize)+ (i*nb_prefix_samples)];
temp_ptr = (int *)temp;
// msg("Doing cyclic suffix method\n");
for (j=0; j<fftsize ; j++) {
output_ptr[j] = temp_ptr[2*j];
case CYCLIC_SUFFIX: {
// Use alignment of 64 bytes
c16_t temp[fftsize] __attribute__((aligned(IDFT_OUTPUT_BUFFER_ALIGNMENT)));
idft(idft_size, (int16_t *)&input[i * fftsize], (int16_t *)temp, 1);
int *output_ptr = &output[(i * fftsize) + (i * nb_prefix_samples)];
memcpy(output_ptr, temp, sizeof(temp));
memcpy(&output_ptr[fftsize], temp, nb_prefix_samples * sizeof(c16_t));
break;
}
case ZEROS:
for (j=0; j<nb_prefix_samples; j++)
output_ptr[fftsize+j] = output_ptr[j];
break;
case ZEROS:
break;
case NONE:
// msg("NO EXTENSION!\n");
output_ptr = &output[fftsize];
temp_ptr = (int *)temp;
for (j=0; j<fftsize ; j++) {
output_ptr[j] = temp_ptr[2*j];
break;
case NONE: {
c16_t temp[fftsize] __attribute__((aligned(IDFT_OUTPUT_BUFFER_ALIGNMENT)));
idft(idft_size, (int16_t *)&input[i * fftsize], (int16_t *)temp, 1);
int *output_ptr = &output[i * fftsize];
memcpy(output_ptr, temp, sizeof(temp));
break;
}
break;
default:
break;
default:
break;
}
}
}
void do_OFDM_mod(c16_t **txdataF, c16_t **txdata, uint32_t frame,uint16_t next_slot, LTE_DL_FRAME_PARMS *frame_parms)
{
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment