Commit 061fb00f authored by Raymond Knopp's avatar Raymond Knopp

added new polar decoder implementation

parent 7aad1584
......@@ -1123,6 +1123,7 @@ set(PHY_POLARSRC
${OPENAIR1_DIR}/PHY/CODING/nrPolar_tools/nr_polar_bit_insertion.c
${OPENAIR1_DIR}/PHY/CODING/nrPolar_tools/nr_polar_channel_interleaver_pattern.c
# ${OPENAIR1_DIR}/PHY/CODING/nrPolar_tools/nr_polar_crc.c
# ${OPENAIR1_DIR}/PHY/CODING/nrPolar_tools/decoder_K56_N512_E864.c
${OPENAIR1_DIR}/PHY/CODING/nrPolar_tools/nr_polar_decoding_tools.c
${OPENAIR1_DIR}/PHY/CODING/nrPolar_tools/nr_polar_info_bit_pattern.c
${OPENAIR1_DIR}/PHY/CODING/nrPolar_tools/nr_polar_interleaving_pattern.c
......
......@@ -43,8 +43,9 @@ int main(int argc, char *argv[]) {
double timeEncoderCumulative = 0, timeDecoderCumulative = 0;
uint8_t decoderListSize = 8, pathMetricAppr = 0; //0 --> eq. (8a) and (11b), 1 --> eq. (9) and (12)
int generate_optim_code=0;
while ((arguments = getopt (argc, argv, "s:d:f:m:i:l:a:h:q")) != -1)
while ((arguments = getopt (argc, argv, "s:d:f:m:i:l:a:h:qg")) != -1)
switch (arguments)
{
case 's':
......@@ -80,6 +81,13 @@ int main(int argc, char *argv[]) {
decoder_int8=1;
break;
case 'g':
generate_optim_code=1;
iterations=1;
SNRstart=-6.0;
SNRstop =-6.0;
decoder_int8=1;
break;
case 'h':
printf("./polartest -s SNRstart -d SNRinc -f SNRstop -m [0=DCI|1=PBCH|2=UCI] -i iterations -l decoderListSize -a pathMetricAppr\n");
exit(-1);
......@@ -141,12 +149,18 @@ int main(int argc, char *argv[]) {
t_nrPolar_params nrPolar_params;
nr_polar_init(&nrPolar_params, polarMessageType);
nr_polar_llrtableinit();
if (generate_optim_code==1) nrPolar_params.decoder_kernel = NULL;
// We assume no a priori knowledge available about the payload.
double aPrioriArray[nrPolar_params.payloadBits];
for (int i=0; i<=nrPolar_params.payloadBits; i++) aPrioriArray[i] = NAN;
printf("SNRstart %f, SNRstop %f,, SNRinc %f\n",SNRstart,SNRstop,SNRinc);
for (SNR = SNRstart; SNR <= SNRstop; SNR += SNRinc) {
printf("SNR %f\n",SNR);
SNR_lin = pow(10, SNR/10);
for (itr = 1; itr <= iterations; itr++) {
......@@ -165,20 +179,29 @@ int main(int argc, char *argv[]) {
modulatedInput[i]=(-1)/sqrt(2);
channelOutput[i] = modulatedInput[i] + (gaussdouble(0.0,1.0) * (1/sqrt(2*SNR_lin)));
if (decoder_int8==1) {
if (channelOutput[i] > 1024) channelOutput_int8[i] = 32768;
else if (channelOutput[i] < -1023) channelOutput_int8[i] = -32767;
else channelOutput_int8[i] = (int16_t) (32*channelOutput[i]);
if (channelOutput[i] > 15) channelOutput_int8[i] = 127;
else if (channelOutput[i] < -16) channelOutput_int8[i] = -128;
else channelOutput_int8[i] = (int16_t) (8*channelOutput[i]);
}
}
start_meas(&timeDecoder);
if (decoder_int8==0) decoderState = polar_decoder(channelOutput, estimatedOutput, &nrPolar_params, decoderListSize, aPrioriArray, pathMetricAppr,&polar_decoder_init,&polar_rate_matching,&decoding,&bit_extraction,&deinterleaving,&sorting,&path_metric,&update_LLR);
if (decoder_int8==0)
decoderState = polar_decoder(channelOutput, estimatedOutput, &nrPolar_params,
decoderListSize, aPrioriArray, pathMetricAppr,
&polar_decoder_init,&polar_rate_matching,&decoding,
&bit_extraction,&deinterleaving,&sorting,&path_metric,&update_LLR);
else
decoderState = polar_decoder_int8(channelOutput_int8, estimatedOutput, &nrPolar_params, decoderListSize, &polar_decoder_init,&polar_rate_matching,&decoding,&bit_extraction,&deinterleaving,&sorting,&path_metric,&update_LLR);
decoderState = polar_decoder_int8_new(channelOutput_int8, estimatedOutput, &nrPolar_params,
decoderListSize, &polar_decoder_init,&polar_rate_matching,
&decoding,&bit_extraction,&deinterleaving,
&sorting,&path_metric,&update_LLR,
generate_optim_code);
stop_meas(&timeDecoder);
//calculate errors
if (decoderState==-1) {
blockErrorState=-1;
......
......@@ -62,6 +62,11 @@ uint32_t poly24c = 0xB2B11700; //101100101011000100010111 -> D^24...
polarParams->nr_polar_u = malloc(sizeof(uint8_t) * polarParams->N); //Decoder: nr_polar_uHat
polarParams->nr_polar_cPrime = malloc(sizeof(uint8_t) * polarParams->K); //Decoder: nr_polar_cHat
polarParams->nr_polar_b = malloc(sizeof(uint8_t) * polarParams->K); //Decoder: nr_polar_bHat
polarParams->decoder_kernel = NULL;//polar_decoder_K56_N512_E864;
} else if (messageType == 2) { //UCI
polarParams->payloadBits = NR_POLAR_PUCCH_PAYLOAD_BITS; //A depends on what they carry...
polarParams->encoderLength = NR_POLAR_PUCCH_E ; //E depends on other standards 6.3.1.4
......@@ -131,6 +136,7 @@ uint32_t poly24c = 0xB2B11700; //101100101011000100010111 -> D^24...
polarParams->nr_polar_u = malloc(sizeof(uint8_t) * polarParams->N); //Decoder: nr_polar_uHat
polarParams->nr_polar_cPrime = malloc(sizeof(uint8_t) * polarParams->K); //Decoder: nr_polar_cHat
polarParams->nr_polar_b = malloc(sizeof(uint8_t) * polarParams->K); //Decoder: nr_polar_bHat
}
polarParams->crcCorrectionBits = NR_POLAR_CRC_ERROR_CORRECTION_BITS;
......@@ -161,5 +167,38 @@ uint32_t poly24c = 0xB2B11700; //101100101011000100010111 -> D^24...
nr_polar_channel_interleaver_pattern(polarParams->channel_interleaver_pattern,
polarParams->i_bil, polarParams->encoderLength);
polarParams->extended_crc_generator_matrix = malloc(polarParams->K * sizeof(uint8_t *)); //G_P3
uint8_t tempECGM[polarParams->K][polarParams->crcParityBits];
for (int i = 0; i < polarParams->K; i++){
polarParams->extended_crc_generator_matrix[i] = malloc(polarParams->crcParityBits * sizeof(uint8_t));
}
for (int i=0; i<polarParams->payloadBits; i++) {
for (int j=0; j<polarParams->crcParityBits; j++) {
tempECGM[i][j]=polarParams->crc_generator_matrix[i][j];
}
}
for (int i=polarParams->payloadBits; i<polarParams->K; i++) {
for (int j=0; j<polarParams->crcParityBits; j++) {
if( (i-polarParams->payloadBits) == j ){
tempECGM[i][j]=1;
} else {
tempECGM[i][j]=0;
}
}
}
for (int i=0; i<polarParams->K; i++) {
for (int j=0; j<polarParams->crcParityBits; j++) {
polarParams->extended_crc_generator_matrix[i][j]=tempECGM[polarParams->interleaving_pattern[i]][j];
}
}
build_decoder_tree(polarParams);
printf("decoder tree nodes %d\n",polarParams->tree.num_nodes);
free(J);
}
......@@ -29,7 +29,6 @@
#include "PHY/CODING/nrPolar_tools/nr_polar_pbch_defs.h"
#include "PHY/TOOLS/time_meas.h"
#define SHOWCOMP 1
int8_t polar_decoder(
double *input,
......@@ -50,31 +49,6 @@ int8_t polar_decoder(
start_meas(init);
uint8_t **bitUpdated = nr_alloc_uint8_t_2D_array(polarParams->N, (polarParams->n+1)); //0=False, 1=True
uint8_t **llrUpdated = nr_alloc_uint8_t_2D_array(polarParams->N, (polarParams->n+1)); //0=False, 1=True
decoder_list_t dlist[2*listSize];
for ( int i=0;i<2*listSize;i++) {
// dlist[i].bit = nr_alloc_uint8_t_2D_array((polarParams->n+1), polarParams->N);
// dlist[i].llr = nr_alloc_double_2D_array((polarParams->n+1), polarParams->N);
//dlist[i].crcChecksum = malloc(sizeof(uint8_t)*polarParams->crcParityBits);
for (int j=0; j< polarParams->n+1; j++) {
memset((void*)&dlist[i].bit[j][0],0,sizeof(uint8_t)*polarParams->N);
memset((void*)&dlist[i].llr[j][0],0,sizeof(double)*polarParams->N);
}
for (int j=0;j<polarParams->crcParityBits;j++) dlist[i].crcChecksum[j] = 0;
dlist[i].pathMetric = 0;
}
for (int i=0; i<polarParams->N; i++) {
memset((void *)&llrUpdated[i][0],0,sizeof(uint8_t)*polarParams->n);
memset((void *)&bitUpdated[i][0],0,sizeof(uint8_t)*polarParams->n);
llrUpdated[i][polarParams->n]=1;
bitUpdated[i][0]=((polarParams->information_bit_pattern[i]+1) % 2);
}
uint8_t **extended_crc_generator_matrix = malloc(polarParams->K * sizeof(uint8_t *)); //G_P3
uint8_t **tempECGM = malloc(polarParams->K * sizeof(uint8_t *)); //G_P2
for (int i = 0; i < polarParams->K; i++){
......@@ -102,6 +76,31 @@ int8_t polar_decoder(
extended_crc_generator_matrix[i][j]=tempECGM[polarParams->interleaving_pattern[i]][j];
}
}
uint8_t **bitUpdated = nr_alloc_uint8_t_2D_array(polarParams->N, (polarParams->n+1)); //0=False, 1=True
uint8_t **llrUpdated = nr_alloc_uint8_t_2D_array(polarParams->N, (polarParams->n+1)); //0=False, 1=True
decoder_list_t dlist[2*listSize];
for ( int i=0;i<2*listSize;i++) {
// dlist[i].bit = nr_alloc_uint8_t_2D_array((polarParams->n+1), polarParams->N);
// dlist[i].llr = nr_alloc_double_2D_array((polarParams->n+1), polarParams->N);
//dlist[i].crcChecksum = malloc(sizeof(uint8_t)*polarParams->crcParityBits);
for (int j=0; j< polarParams->n+1; j++) {
memset((void*)&dlist[i].bit[j][0],0,sizeof(uint8_t)*polarParams->N);
memset((void*)&dlist[i].llr[j][0],0,sizeof(double)*polarParams->N);
}
for (int j=0;j<polarParams->crcParityBits;j++) dlist[i].crcChecksum[j] = 0;
dlist[i].pathMetric = 0;
}
for (int i=0; i<polarParams->N; i++) {
memset((void *)&llrUpdated[i][0],0,sizeof(uint8_t)*polarParams->n);
memset((void *)&bitUpdated[i][0],0,sizeof(uint8_t)*polarParams->n);
llrUpdated[i][polarParams->n]=1;
bitUpdated[i][0]=((polarParams->information_bit_pattern[i]+1) % 2);
}
stop_meas(init);
start_meas(polar_rate_matching);
......@@ -155,6 +154,7 @@ int8_t polar_decoder(
start_meas(sorting);
if (currentListSize <= listSize/2) {
// until listsize is full we need to copy bit and LLR arrays to new entries
// below we only copy the ones we need to keep for sure
......@@ -290,92 +290,8 @@ int8_t polar_decoder(
}
#define decoder_int8_A(sorted_dlist,currentListSize,polarParams) {for (int i = 0; i < currentListSize; i++) { \
for (int k = 0; k < (polarParams->n+1); k++) { \
memcpy((void*)&sorted_dlist[i+currentListSize]->bit[k][0],\
(void*)&sorted_dlist[i]->bit[k][0],\
sizeof(uint8_t)*polarParams->N);\
memcpy((void*)&sorted_dlist[i+currentListSize]->llr[k][0],\
(void*)&sorted_dlist[i]->llr[k][0],\
sizeof(int16_t)*polarParams->N);}}}
#define decoder_int8_B(sorted_dlist,currentListSize) {for (int i = 0; i < currentListSize; i++) {sorted_dlist[i]->bit[0][currentBit]=0;sorted_dlist[i+currentListSize]->bit[0][currentBit]=1;}}
void inline decoder_int8_C(decoder_list_int8_t *sorted_dlist[],
t_nrPolar_params *polarParams,
int currentBit,
int currentListSize,
int listSize) {
int32_t pathMetric[2*listSize];
decoder_list_int8_t *temp_dlist[2*listSize];
int listIndex[2*listSize];
int listIndex2[2*listSize];
for (int i = 0; i < currentListSize; i++) {
listIndex[i]=i;
pathMetric[i] = sorted_dlist[i]->pathMetric;
}
nr_sort_asc_int16_1D_array_ind(pathMetric, listIndex, currentListSize);
for (int i=0;i<currentListSize;i++) {
listIndex2[listIndex[i]] = i;
}
// copy the llr/bit arrays that are needed
for (int i = 0; i < listSize; i++) {
// printf("listIndex[%d] %d\n",i,listIndex[i]);
if ((listIndex2[i+listSize]<listSize) && (listIndex2[i]<listSize)) { // both '0' and '1' path metrics are to be kept
// do memcpy of LLR and Bit arrays
for (int k = 0; k < (polarParams->n+1); k++) {
memcpy((void*)&sorted_dlist[i+listSize]->bit[k][0],
(void*)&sorted_dlist[i]->bit[k][0],
sizeof(uint8_t)*polarParams->N);
memcpy((void*)&sorted_dlist[i+listSize]->llr[k][0],
(void*)&sorted_dlist[i]->llr[k][0],
sizeof(int16_t)*polarParams->N);
}
sorted_dlist[i]->bit[0][currentBit]=0;
sorted_dlist[i+listSize]->bit[0][currentBit]=1;
}
else if (listIndex2[i+listSize]<listSize) { // only '1' path metric is to be kept
// just change the current bit from '0' to '1'
for (int k = 0; k < (polarParams->n+1); k++) {
memcpy((void*)&sorted_dlist[i+listSize]->bit[k][0],
(void*)&sorted_dlist[i]->bit[k][0],
sizeof(uint8_t)*polarParams->N);
memcpy((void*)&sorted_dlist[i+listSize]->llr[k][0],
(void*)&sorted_dlist[i]->llr[k][0],
sizeof(int16_t)*polarParams->N);
}
sorted_dlist[i+listSize]->bit[0][currentBit]=1;
/*
decoder_list_t *tmp = sorted_dlist[i+listSize];
sorted_dlist[i+listSize] = sorted_dlist[i];
sorted_dlist[i+listSize]->pathMetric = tmp->pathMetric;
sorted_dlist[i+listSize]->bit[0][currentBit]=1;
memcpy((void*)&sorted_dlist[i+listSize]->crcChecksum[0],
(void*)&tmp->crcChecksum[0],
24*sizeof(uint8_t));*/
}
}
for (int i = 0; i < 2*listSize; i++) {
temp_dlist[i] = sorted_dlist[i];
}
for (int i = 0; i < 2*listSize; i++) {
// printf("i %d => %d\n",i,listIndex[i]);
sorted_dlist[i] = temp_dlist[listIndex[i]];
}
}
int8_t polar_decoder_int8(int16_t *input,
uint8_t *output,
t_nrPolar_params *polarParams,
......@@ -387,16 +303,22 @@ int8_t polar_decoder_int8(int16_t *input,
time_stats_t *deinterleaving,
time_stats_t *sorting,
time_stats_t *path_metric,
time_stats_t *update_LLR)
time_stats_t *update_LLR,
int generate_optim_code)
{
start_meas(init);
uint8_t **bitUpdated = nr_alloc_uint8_t_2D_array(polarParams->N, (polarParams->n+1)); //0=False, 1=True
uint8_t **llrUpdated = nr_alloc_uint8_t_2D_array(polarParams->N, (polarParams->n+1)); //0=False, 1=True
uint8_t **bitUpdated;
uint8_t **llrUpdated;
if (generate_optim_code == 1 || polarParams->decoder_kernel==NULL) {
bitUpdated = nr_alloc_uint8_t_2D_array(polarParams->N, (polarParams->n+1)); //0=False, 1=True
llrUpdated = nr_alloc_uint8_t_2D_array(polarParams->N, (polarParams->n+1)); //0=False, 1=True
}
decoder_list_int8_t dlist[2*listSize];
start_meas(init);
for ( int i=0;i<2*listSize;i++) {
// dlist[i].bit = nr_alloc_uint8_t_2D_array((polarParams->n+1), polarParams->N);
......@@ -409,48 +331,28 @@ int8_t polar_decoder_int8(int16_t *input,
for (int j=0;j<polarParams->crcParityBits;j++) dlist[i].crcChecksum[j] = 0;
dlist[i].pathMetric = 0;
}
stop_meas(init);
for (int i=0; i<polarParams->N; i++) {
memset((void *)&llrUpdated[i][0],0,sizeof(uint8_t)*polarParams->n);
memset((void *)&bitUpdated[i][0],0,sizeof(uint8_t)*polarParams->n);
llrUpdated[i][polarParams->n]=1;
bitUpdated[i][0]=((polarParams->information_bit_pattern[i]+1) % 2);
}
uint8_t **extended_crc_generator_matrix = malloc(polarParams->K * sizeof(uint8_t *)); //G_P3
uint8_t **tempECGM = malloc(polarParams->K * sizeof(uint8_t *)); //G_P2
for (int i = 0; i < polarParams->K; i++){
extended_crc_generator_matrix[i] = malloc(polarParams->crcParityBits * sizeof(uint8_t));
tempECGM[i] = malloc(polarParams->crcParityBits * sizeof(uint8_t));
}
for (int i=0; i<polarParams->payloadBits; i++) {
for (int j=0; j<polarParams->crcParityBits; j++) {
tempECGM[i][j]=polarParams->crc_generator_matrix[i][j];
}
}
for (int i=polarParams->payloadBits; i<polarParams->K; i++) {
for (int j=0; j<polarParams->crcParityBits; j++) {
if( (i-polarParams->payloadBits) == j ){
tempECGM[i][j]=1;
} else {
tempECGM[i][j]=0;
}
if (generate_optim_code == 1 || polarParams->decoder_kernel==NULL) {
for (int i=0; i<polarParams->N; i++) {
memset((void *)&llrUpdated[i][0],0,sizeof(uint8_t)*polarParams->n);
memset((void *)&bitUpdated[i][0],0,sizeof(uint8_t)*polarParams->n);
llrUpdated[i][polarParams->n]=1;
bitUpdated[i][0]=((polarParams->information_bit_pattern[i]+1) % 2);
}
}
for (int i=0; i<polarParams->K; i++) {
for (int j=0; j<polarParams->crcParityBits; j++) {
extended_crc_generator_matrix[i][j]=tempECGM[polarParams->interleaving_pattern[i]][j];
}
}
stop_meas(init);
start_meas(polar_rate_matching);
int16_t d_tilde[polarParams->N];// = malloc(sizeof(double) * polarParams->N);
nr_polar_rate_matching_int8(input, d_tilde, polarParams->rate_matching_pattern, polarParams->K, polarParams->N, polarParams->encoderLength);
for (int i=0;i<polarParams->N;i++) {
if (d_tilde[i]<-128) d_tilde[i]=-128;
else if (d_tilde[i]>127) d_tilde[i]=128;
}
memcpy((void*)&dlist[0].llr[polarParams->n][0],(void*)&d_tilde[0],sizeof(int16_t)*polarParams->N);
stop_meas(polar_rate_matching);
......@@ -464,70 +366,81 @@ int8_t polar_decoder_int8(int16_t *input,
decoder_list_int8_t *sorted_dlist[2*listSize];
for (uint8_t i = 0; i < 2*listSize; i++) sorted_dlist[i] = &dlist[i];
char fname[100];
FILE *fd;
if (generate_optim_code==1) {
sprintf(fname,"decoder_K%d_N%d_E%d.c",polarParams->K,polarParams->N,polarParams->encoderLength);
if ((fd=fopen(fname,"w"))==NULL) {printf("Cannot open %s\n",fname); exit(-1);}
fprintf(fd,"#include \"nr_polar_defs.h\"\n");
fprintf(fd,"void polar_decoder_K%d_N%d_E%d(t_nrPolar_params *polarParams,decoder_list_int8_t **sorted_dlist) {\n",
polarParams->K,polarParams->N,polarParams->encoderLength);
fprintf(fd,"int16_t mask,absllr;\n");
}
for (uint16_t currentBit=0; currentBit<polarParams->N; currentBit++){
// printf("***************** BIT %d (currentListSize %d, information_bit_pattern %d)\n",
// currentBit,currentListSize,polarParams->information_bit_pattern[currentBit]);
start_meas(update_LLR);
updateLLR_int8(sorted_dlist, llrUpdated, bitUpdated, currentListSize, currentBit, 0, polarParams->N, (polarParams->n+1));
stop_meas(update_LLR);
if (polarParams->information_bit_pattern[currentBit]==0) { //Frozen bit.
updatePathMetric0_int8(sorted_dlist,currentListSize, currentBit); //approximation=0 --> 11b, approximation=1 --> 12
} else { //Information or CRC bit.
start_meas(path_metric);
updatePathMetric2_int8(sorted_dlist, currentListSize, currentBit);
stop_meas(path_metric);
if (polarParams->decoder_kernel) polarParams->decoder_kernel(polarParams,sorted_dlist);
else {
for (uint16_t currentBit=0; currentBit<polarParams->N; currentBit++){
printf("***************** BIT %d (currentListSize %d, information_bit_pattern %d)\n",
currentBit,currentListSize,polarParams->information_bit_pattern[currentBit]);
start_meas(sorting);
start_meas(update_LLR);
updateLLR_int8(sorted_dlist, llrUpdated, bitUpdated, currentListSize, currentBit, 0, polarParams->N, (polarParams->n+1),generate_optim_code,fd);
stop_meas(update_LLR);
if (currentListSize <= listSize/2) {
// until listsize is full we need to copy bit and LLR arrays to new entries
// below we only copy the ones we need to keep for sure
decoder_int8_A(sorted_dlist,currentListSize,polarParams);
#ifdef SHOWCOMP
printf("decoder_int8_A(sorted_dlist,%d,polarParams);\n",currentListSize);
#endif
}
decoder_int8_B(sorted_dlist,currentListSize);
#ifdef SHOWCOMP
printf("decoder_int8_B(sorted_dlist,%d);\n",currentListSize);
#endif
if (polarParams->information_bit_pattern[currentBit]==0) { //Frozen bit.
updatePathMetric0_int8(sorted_dlist,currentListSize, currentBit,generate_optim_code,fd); //approximation=0 --> 11b, approximation=1 --> 12
} else { //Information or CRC bit.
start_meas(path_metric);
updatePathMetric2_int8(sorted_dlist, currentListSize, currentBit,generate_optim_code,fd);
stop_meas(path_metric);
bitUpdated[currentBit][0]=1;
updateCrcChecksum2_int8(sorted_dlist,extended_crc_generator_matrix, currentListSize, nonFrozenBit, polarParams->crcParityBits);
currentListSize*=2;
//Keep only the best "listSize" number of entries.
if (currentListSize > listSize) {
decoder_int8_C(sorted_dlist,
polarParams,
currentBit,
currentListSize,
listSize);
#ifdef SHOWCOMP
printf("decoder_int8_C(sorted_dlist,polarParams,%d,%d,%d);\n",currentBit,currentListSize,listSize);
#endif
currentListSize = listSize;
start_meas(sorting);
if (currentListSize <= listSize/2) {
// until listsize is full we need to copy bit and LLR arrays to new entries
// below we only copy the ones we need to keep for sure
decoder_int8_A(sorted_dlist,currentListSize,polarParams);
if (generate_optim_code == 1) fprintf(fd,"decoder_int8_A(sorted_dlist,%d,polarParams);\n",currentListSize);
}
decoder_int8_B(sorted_dlist,currentBit,currentListSize);
if (generate_optim_code == 1) fprintf(fd,"decoder_int8_B(sorted_dlist,%d,%d);\n",currentBit,currentListSize);
bitUpdated[currentBit][0]=1;
updateCrcChecksum2_int8(sorted_dlist,polarParams->extended_crc_generator_matrix, currentListSize, nonFrozenBit, polarParams->crcParityBits,generate_optim_code,fd);
currentListSize*=2;
//Keep only the best "listSize" number of entries.
if (currentListSize > listSize) {
decoder_int8_C(sorted_dlist,
polarParams,
currentBit,
currentListSize,
listSize);
if (generate_optim_code == 1) fprintf(fd,"decoder_int8_C(sorted_dlist,polarParams,%d,%d,%d);\n",currentBit,currentListSize,listSize);
currentListSize = listSize;
}
stop_meas(sorting);
nonFrozenBit++;
}
stop_meas(sorting);
nonFrozenBit++;
}
}
}
if (generate_optim_code==1) fprintf(fd,"}\n");
for (uint8_t i = 0; i < fmin(listSize, (pow(2,polarParams->crcCorrectionBits)) ); i++) {
// printf("list index %d :",i);
// for (int j=0;j<polarParams->crcParityBits;j++) printf("%d",sorted_dlist[i]->crcChecksum[j]);
// printf(" => %d (%f)\n",sorted_dlist[i]->crcState,sorted_dlist[i]->pathMetric);
// printf("list index %d / %d :",i,listSize);
// for (int j=0;j<polarParams->crcParityBits;j++) printf("%d",sorted_dlist[i]->crcChecksum[j]);
// printf(" => (%d)\n",sorted_dlist[i]->pathMetric);
int crcState = 1;
for (int j=0;j<polarParams->crcParityBits;j++) if (sorted_dlist[i]->crcChecksum[j]!=0) crcState=0;
if (crcState == 1) {
......@@ -556,8 +469,69 @@ int8_t polar_decoder_int8(int16_t *input,
nr_free_double_2D_array(dlist[i].llr, (polarParams->n+1));
free(dlist[i].crcChecksum);
}*/
nr_free_uint8_t_2D_array(extended_crc_generator_matrix, polarParams->K);
nr_free_uint8_t_2D_array(tempECGM, polarParams->K);
stop_meas(decoding);
return(0);
}
int8_t polar_decoder_int8_new(int16_t *input,
uint8_t *output,
t_nrPolar_params *polarParams,
uint8_t listSize,
time_stats_t *init,
time_stats_t *polar_rate_matching,
time_stats_t *decoding,
time_stats_t *bit_extraction,
time_stats_t *deinterleaving,
time_stats_t *sorting,
time_stats_t *path_metric,
time_stats_t *update_LLR,
int generate_optim_code)
{
start_meas(init);
stop_meas(init);
start_meas(polar_rate_matching);
int16_t d_tilde[polarParams->N];// = malloc(sizeof(double) * polarParams->N);
nr_polar_rate_matching_int8(input, d_tilde, polarParams->rate_matching_pattern, polarParams->K, polarParams->N, polarParams->encoderLength);
for (int i=0;i<polarParams->N;i++) {
if (d_tilde[i]<-128) d_tilde[i]=-128;
else if (d_tilde[i]>127) d_tilde[i]=128;
}
memcpy((void*)&polarParams->tree.root->alpha[0],(void*)&d_tilde[0],sizeof(int16_t)*polarParams->N);
stop_meas(polar_rate_matching);
/*
* SCL polar decoder.
*/
start_meas(decoding);
generic_polar_decoder(polarParams,polarParams->tree.root);
start_meas(bit_extraction);
//Extract the information bits (û to ĉ)
nr_polar_info_bit_extraction(polarParams->nr_polar_u, polarParams->nr_polar_cPrime, polarParams->information_bit_pattern, polarParams->N);
stop_meas(bit_extraction);
//Deinterleaving (ĉ to b)
start_meas(deinterleaving);
nr_polar_deinterleaver(polarParams->nr_polar_cPrime, polarParams->nr_polar_b, polarParams->interleaving_pattern, polarParams->K);
stop_meas(deinterleaving);
//Remove the CRC (â)
for (int j = 0; j < polarParams->payloadBits; j++) output[j]=polarParams->nr_polar_b[j];
// free(d_tilde);
/*
for (int i=0;i<2*listSize;i++) {
// printf("correct: Freeing dlist[%d].bit %p\n",i,dlist[i].bit);
nr_free_uint8_t_2D_array(dlist[i].bit, (polarParams->n+1));
nr_free_double_2D_array(dlist[i].llr, (polarParams->n+1));
free(dlist[i].crcChecksum);
}*/
stop_meas(decoding);
return(0);
}
......@@ -19,8 +19,9 @@
* contact@openairinterface.org
*/
#include "PHY/impl_defs_top.h"
#include "PHY/CODING/nrPolar_tools/nr_polar_defs.h"
#define SHOWCOMP 1
#include "PHY/sse_intrin.h"
inline void computeLLR(double llr[1+nmax][Nmax], uint16_t row, uint16_t col,
uint16_t offset, uint8_t approximation) __attribute__((always_inline));
......@@ -45,40 +46,28 @@ inline void computeLLR(double llr[1+nmax][Nmax], uint16_t row, uint16_t col,
// printf("LLR (a %f, b %f): llr[%d][%d] %f\n",32*a,32*b,col,row,32*llr[col][row]);
}
inline void computeLLR_int8(decoder_list_int8_t **dlist,int i, uint16_t row, uint16_t col,
uint16_t offset) __attribute__((always_inline));
inline void computeLLR_int8(decoder_list_int8_t **dlist,int i, uint16_t row, uint16_t col,
uint16_t offset) {
int16_t llrtab[256][256];
int16_t a;
int16_t b;
int16_t absA,absB;
int16_t maska,maskb;
int16_t minabs;
// int16_t **llr=dlist[i]->llr;
void nr_polar_llrtableinit() {
int16_t absA,absB;
int16_t maska,maskb;
int16_t minabs;
#ifdef SHOWCOMP
printf("computeLLR_int8(sorted_dlist,%d,%d,%d);\n",i,row,col,offset);
#endif
a = dlist[i]->llr[col + 1][row];
b = dlist[i]->llr[col+1][row + offset];
// printf("LLR: a %d, b %d\n",a,b);
maska = a>>15;
maskb = b>>15;
absA = (a+maska)^maska;
absB = (b+maskb)^maskb;
// printf("LLR: absA %d, absB %d\n",absA,absB);
minabs = absA<absB ? absA : absB;
if ((maska^maskb) == 0)
dlist[i]->llr[col][row] = minabs;
else
dlist[i]->llr[col][row] = -minabs;
// printf("LLR (a %d, b %d): llr[%d][%d] %d\n",a,b,col,row,llr[col][row]);
for (int a=-128;a<128;a++) {
for (int b=-128;b<128;b++) {
absA=abs(a);
absB=abs(b);
minabs = absA<absB ? absA:absB;
if ((a<0 && b<0) || (a>=0 && b>=0)) llrtab[a+128][b+128] = minabs;
else llrtab[a+128][b+128] = -minabs;
}
}
}
void updateLLR(decoder_list_t **dlist,uint8_t **llrU, uint8_t **bitU,
uint8_t listSize, uint16_t row, uint16_t col, uint16_t xlen, uint8_t ylen, uint8_t approximation) {
uint16_t offset = (xlen/(1<<(ylen-col-1)));
if (( (row) % (2*offset) ) >= offset ) {
if (bitU[row-offset][col]==0) updateBit(dlist, bitU, listSize, (row-offset), col, xlen, ylen);
......@@ -96,33 +85,25 @@ void updateLLR(decoder_list_t **dlist,uint8_t **llrU, uint8_t **bitU,
llrU[row][col]=1;
}
#define updateLLR_int8_A(dlist,i,col,row,offset) if (dlist[(i)]->bit[(col)][(row)-(offset)]==0) dlist[(i)]->llr[(col)][(row)] = dlist[(i)]->llr[(col)+1][(row)-(offset)] + dlist[(i)]->llr[(col)+1][(row)]; else dlist[(i)]->llr[(col)][(row)] = -dlist[(i)]->llr[(col)+1][(row)-(offset)] + dlist[(i)]->llr[(col)+1][(row)];
void updateLLR_int8(decoder_list_int8_t **dlist,uint8_t **llrU, uint8_t **bitU,
uint8_t listSize, uint16_t row, uint16_t col, uint16_t xlen, uint8_t ylen) {
uint8_t listSize, uint16_t row, uint16_t col, uint16_t xlen, uint8_t ylen,
int generate_optim_code,FILE *fd) {
uint16_t offset = (xlen/(1<<(ylen-col-1)));
if (( (row) % (2*offset) ) >= offset ) {
if (bitU[row-offset][col]==0) updateBit_int8(dlist, bitU, listSize, (row-offset), col, xlen, ylen);
if (llrU[row-offset][col+1]==0) updateLLR_int8(dlist, llrU, bitU, listSize, (row-offset), (col+1), xlen, ylen);
if (llrU[row][col+1]==0) updateLLR_int8(dlist, llrU, bitU, listSize, row, (col+1), xlen, ylen);
if (bitU[row-offset][col]==0) updateBit_int8(dlist, bitU, listSize, (row-offset), col, xlen, ylen,generate_optim_code,fd);
if (llrU[row-offset][col+1]==0) updateLLR_int8(dlist, llrU, bitU, listSize, (row-offset), (col+1), xlen, ylen,generate_optim_code,fd);
if (llrU[row][col+1]==0) updateLLR_int8(dlist, llrU, bitU, listSize, row, (col+1), xlen, ylen,generate_optim_code,fd);
for (uint8_t i=0; i<listSize; i++) {
#ifdef SHOWCOMP
printf("updateLLR_int8_A(sorted_dlist,%d,%d,%d,%d);\n",i,row,col,offset);
#endif
updateLLR_int8_A(dlist,i,col,row,offset);
/*
if (dlist[i]->bit[col][row-offset]==0)
dlist[i]->llr[col][row] = dlist[i]->llr[col+1][row-offset] + dlist[i]->llr[col+1][row];
else
dlist[i]->llr[col][row] = -dlist[i]->llr[col+1][row-offset] + dlist[i]->llr[col+1][row];*/
}
if (generate_optim_code==1) fprintf(fd,"updateLLR_int8_A(sorted_dlist,%d,%d,%d,%d);\n",listSize,col,row,offset);
updateLLR_int8_A(dlist,listSize,col,row,offset);
} else {
if (llrU[row][col+1]==0) updateLLR_int8(dlist, llrU, bitU, listSize, row, (col+1), xlen, ylen);
if (llrU[row+offset][col+1]==0) updateLLR_int8(dlist, llrU, bitU, listSize, (row+offset), (col+1), xlen, ylen);
for (int i=0;i<listSize;i++) computeLLR_int8(dlist,i, row, col, offset);
if (llrU[row][col+1]==0) updateLLR_int8(dlist, llrU, bitU, listSize, row, (col+1), xlen, ylen,generate_optim_code,fd);
if (llrU[row+offset][col+1]==0) updateLLR_int8(dlist, llrU, bitU, listSize, (row+offset), (col+1), xlen, ylen,generate_optim_code,fd);
if (generate_optim_code==1) fprintf(fd,"computeLLR_int8(sorted_dlist,%d,%d,%d,%d);\n",listSize,row,col,offset);
computeLLR_int8(dlist,listSize, row, col, offset);
}
llrU[row][col]=1;
......@@ -146,33 +127,35 @@ void updateBit(decoder_list_t **dlist, uint8_t **bitU, uint8_t listSize, uint16_
bitU[row][col]=1;
}
#define updateBit_int8_A(dlist,i,col,row) dlist[(i)]->bit[(col)][(row)] = dlist[(i)]->bit[(col)-1][(row)]
#define updateBit_int8_B(dlist,i,col,row,offset) dlist[(i)]->bit[(col)][(row)] = dlist[(i)]->bit[(col)-1][(row)]^dlist[(i)]->bit[(col)-1][(row)+(offset)]
void updateBit_int8(decoder_list_int8_t **dlist, uint8_t **bitU, uint8_t listSize, uint16_t row,
uint16_t col, uint16_t xlen, uint8_t ylen) {
void updateBit_int8(decoder_list_int8_t **dlist, uint8_t **bitU,
uint8_t listSize, uint16_t row,
uint16_t col, uint16_t xlen, uint8_t ylen,
int generate_optim_code,FILE *fd) {
uint16_t offset = ( xlen/(pow(2,(ylen-col))) );
for (uint8_t i=0; i<listSize; i++) {
if (( (row) % (2*offset) ) >= offset ) {
if (bitU[row][col-1]==0) updateBit_int8(dlist, bitU, listSize, row, (col-1), xlen, ylen);
// dlist[i]->bit[col][row] = dlist[i]->bit[col-1][row];
#ifdef SHOWCOMP
printf("updateBit_int8_A(sorted_dlist,%d,%d,%d);\n",i,col,row);
#endif
updateBit_int8_A(dlist,i,col,row);
if (( (row) % (2*offset) ) >= offset ) {
} else {
if (bitU[row][col-1]==0) updateBit_int8(dlist, bitU, listSize, row, (col-1), xlen, ylen);
if (bitU[row+offset][col-1]==0) updateBit_int8(dlist, bitU, listSize, (row+offset), (col-1), xlen, ylen);
if (bitU[row][col-1]==0) updateBit_int8(dlist, bitU, listSize, row, (col-1), xlen, ylen,generate_optim_code,fd);
// dlist[i]->bit[col][row] = dlist[i]->bit[col-1][row];
if (generate_optim_code==1) fprintf(fd,"updateBit_int8_A(sorted_dlist,%d,%d,%d);\n",listSize,col,row);
updateBit_int8_A(dlist,listSize,col,row);
} else {
if (bitU[row][col-1]==0) updateBit_int8(dlist, bitU, listSize, row, (col-1), xlen, ylen,generate_optim_code,fd);
if (bitU[row+offset][col-1]==0) updateBit_int8(dlist, bitU, listSize, (row+offset), (col-1), xlen, ylen,generate_optim_code,fd);
// dlist[i]->bit[col][row] = dlist[i]->bit[col-1][row]^dlist[i]->bit[col-1][row+offset];
// printf("updating dlist[%d]->bit[%d][%d] => %d\n",i,col,row,dlist[i]->bit[col][row]);
#ifdef SHOWCOMP
printf("updateBit_int8_B(sorted_dlist,%d,%d,%d,%d);\n",i,col,row,offset);
#endif
updateBit_int8_B(dlist,i,col,row,offset);
}
if (generate_optim_code==1) fprintf(fd,"updateBit_int8_B(sorted_dlist,%d,%d,%d,%d);\n",listSize,col,row,offset);
updateBit_int8_B(dlist,listSize,col,row,offset);
}
bitU[row][col]=1;
}
......@@ -193,17 +176,15 @@ void updatePathMetric(decoder_list_t **dlist,uint8_t listSize, uint8_t bitValue,
}
#define updatePathMetric0_int8_A(dlist,i,row,mask,absllr) { mask=dlist[i]->llr[0][row]>>15;if(mask!=0){absllr=(dlist[i]->llr[0][row]+mask)^mask;dlist[i]->pathMetric+=absllr;}}
void updatePathMetric0_int8(decoder_list_int8_t **dlist,uint8_t listSize, uint16_t row) {
void updatePathMetric0_int8(decoder_list_int8_t **dlist,uint8_t listSize, uint16_t row,int generate_optim_code,FILE *fd) {
int16_t mask,absllr;
for (uint8_t i=0; i<listSize; i++) {
updatePathMetric0_int8_A(dlist,listSize,row,mask,absllr);
if (generate_optim_code == 1) fprintf(fd,"updatePathMetric0_int8_A(sorted_dlist,%d,%d,mask,absllr);\n",listSize,row);
updatePathMetric0_int8_A(dlist,i,row,mask,absllr);
#ifdef SHOWCOMP
printf("updatePathMetric0_int8_A(sorted_dlist,i,%d,%d);\n",listSize,row);
#endif
/*
......@@ -215,7 +196,6 @@ void updatePathMetric0_int8(decoder_list_int8_t **dlist,uint8_t listSize, uint16
}*/
}
}
......@@ -244,24 +224,23 @@ void updatePathMetric2(decoder_list_t **dlist, uint8_t listSize, uint16_t row, u
}
}
#define updatePathMetric2_int8_A(dlist,i,listSize,row) {dlist[i+listSize]->pathMetric = dlist[i]->pathMetric;if (dlist[i]->llr[0][row]<0) dlist[i]->pathMetric-=dlist[i]->llr[0][row];else dlist[i+listSize]->pathMetric += dlist[i]->llr[0][row];}
void updatePathMetric2_int8(decoder_list_int8_t **dlist, uint8_t listSize, uint16_t row) {
void updatePathMetric2_int8(decoder_list_int8_t **dlist, uint8_t listSize, uint16_t row,int generate_optim_code,FILE *fd) {
int i;
for (i = 0; i < listSize; i++) {
#ifdef SHOWCOMP
printf("updatePathMetric2_int8_A(sorted_dlist,%d,%d,%d);\n",
i,listSize,row);
#endif
updatePathMetric2_int8_A(dlist,i,listSize,row);
// dlist[i+listSize]->pathMetric = dlist[i]->pathMetric;
//if (dlist[i]->llr[0][row]<0) dlist[i]->pathMetric -= dlist[i]->llr[0][row];
//else dlist[i+listSize]->pathMetric += dlist[i]->llr[0][row];
}
if (generate_optim_code == 1) fprintf(fd,"updatePathMetric2_int8_A(sorted_dlist,%d,%d);\n",
listSize,row);
updatePathMetric2_int8_A(dlist,listSize,row);
// dlist[i+listSize]->pathMetric = dlist[i]->pathMetric;
//if (dlist[i]->llr[0][row]<0) dlist[i]->pathMetric -= dlist[i]->llr[0][row];
//else dlist[i+listSize]->pathMetric += dlist[i]->llr[0][row];
}
......@@ -283,32 +262,253 @@ void updateCrcChecksum2(decoder_list_t **dlist, uint8_t **crcGen,
}
}
#define updateCrcChecksum_int8_A(dlist,i,crcGen,i2,len) {for (uint8_t j = 0; j < len; j++) dlist[i]->crcChecksum[j] = (dlist[i]->crcChecksum[j]^crcGen[i2][j]);}
void updateCrcChecksum_int8(decoder_list_int8_t **dlist, uint8_t **crcGen,
uint8_t listSize, uint32_t i2, uint8_t len) {
for (uint8_t i = 0; i < listSize; i++) {
#ifdef SHOWCOMP
printf("updateCrcChecksum_int8_A(sorted_dlist,%d,crcGen,%d,%d);\n",i,i2,len);
#endif
updateCrcChecksum_int8_A(dlist,i,crcGen,i2,len);
// for (uint8_t j = 0; j < len; j++) {
// dlist[i]->crcChecksum[j] = ( (dlist[i]->crcChecksum[j] + crcGen[i2][j]) % 2 );
// }
}
uint8_t listSize, uint32_t i2, uint8_t len,int generate_optim_code,FILE *fd) {
if (generate_optim_code == 1) fprintf(fd,"updateCrcChecksum_int8_A(sorted_dlist,%d,crcGen,%d,%d);\n",listSize,i2,len);
updateCrcChecksum_int8_A(dlist,listSize,crcGen,i2,len);
// for (uint8_t j = 0; j < len; j++) {
// dlist[i]->crcChecksum[j] = ( (dlist[i]->crcChecksum[j] + crcGen[i2][j]) % 2 );
// }
}
#define updateCrcChecksum2_int8_A(dlist,i,listSize,crcGen,i2,len) {for (uint8_t j = 0; j < len; j++) dlist[i+listSize]->crcChecksum[j]=dlist[i]->crcChecksum[j]^crcGen[i2][j];}
void updateCrcChecksum2_int8(decoder_list_int8_t **dlist, uint8_t **crcGen,
uint8_t listSize, uint32_t i2, uint8_t len) {
for (uint8_t i = 0; i < listSize; i++) {
#ifdef SHOWCOMP
printf("updateCrcChecksum2_int8_A(sorted_dlist,%d,%d,crcGen,%d,%d);\n",i,listSize,i2,len);
#endif
updateCrcChecksum2_int8_A(dlist,i,listSize,crcGen,i2,len);
// for (uint8_t j = 0; j < len; j++) {
uint8_t listSize, uint32_t i2, uint8_t len,int generate_optim_code,FILE *fd) {
if (generate_optim_code == 1) fprintf(fd,"updateCrcChecksum2_int8_A(sorted_dlist,%d,polarParams->extended_crc_generator_matrix,%d,%d);\n",listSize,i2,len);
updateCrcChecksum2_int8_A(dlist,listSize,crcGen,i2,len);
// for (uint8_t j = 0; j < len; j++) {
// dlist[i+listSize]->crcChecksum[j] = ( (dlist[i]->crcChecksum[j] + crcGen[i2][j]) % 2 );
// }
}
decoder_node_t *new_decoder_node(int first_leaf_index,int level) {
decoder_node_t *node=(decoder_node_t *)malloc(sizeof(decoder_node_t));
node->first_leaf_index=first_leaf_index;
node->level=level;
node->Nv = 1<<level;
node->leaf = 0;
node->left=(decoder_node_t *)NULL;
node->right=(decoder_node_t *)NULL;
node->all_frozen=0;
node->alpha = (int16_t*)malloc16(node->Nv*sizeof(int16_t));
node->beta = (int8_t*)malloc16(node->Nv*sizeof(int16_t));
memset((void*)node->beta,-1,node->Nv*sizeof(int16_t));
return(node);
}
decoder_node_t *add_nodes(int level,int first_leaf_index,t_nrPolar_params *pp) {
int all_frozen_below=1;
int Nv = 1<<level;
decoder_node_t *new_node = new_decoder_node(first_leaf_index,level);
#ifdef DEBUG_NEW_IMPL
printf("New node %d order %d, level %d\n",pp->tree.num_nodes,Nv,level);
pp->tree.num_nodes++;
#endif
if (level==0) {
#ifdef DEBUG_NEW_IMPL
printf("leaf %d (%s)\n",first_leaf_index,pp->information_bit_pattern[first_leaf_index]==1 ? "information or crc" : "frozen");
#endif
new_node->leaf=1;
new_node->all_frozen = pp->information_bit_pattern[first_leaf_index]==0 ? 1 : 0;
return new_node; // this is a leaf node
}
for (int i=0;i<Nv;i++) {
if (pp->information_bit_pattern[i+first_leaf_index]>0) all_frozen_below=0;
}
if (all_frozen_below==0) new_node->left=add_nodes(level-1,first_leaf_index,pp);
else {
#ifdef DEBUG_NEW_IMPL
printf("aggregating frozen bits %d ... %d at level %d (%s)\n",first_leaf_index,first_leaf_index+Nv-1,level,((first_leaf_index/Nv)&1)==0?"left":"right");
#endif
new_node->leaf=1;
new_node->all_frozen=1;
}
if (all_frozen_below==0) new_node->right=add_nodes(level-1,first_leaf_index+(Nv/2),pp);
return(new_node);
}
void build_decoder_tree(t_nrPolar_params *pp) {
pp->tree.num_nodes=0;
pp->tree.root = add_nodes(pp->n,0,pp);
}
void applyFtoleft(t_nrPolar_params *pp,decoder_node_t *node) {
int16_t *alpha_v=node->alpha;
int16_t *alpha_l=node->left->alpha;
int16_t *betal = node->left->beta;
int16_t a,b,absa,absb,maska,maskb,minabs;
#ifdef DEBUG_NEW_IMPL
printf("applyFtoleft %d, Nv %d (level %d,node->left (leaf %d, AF %d))\n",node->first_leaf_index,node->Nv,node->level,node->left->leaf,node->left->all_frozen);
for (int i=0;i<node->Nv;i++) printf("i%d (frozen %d): alpha_v[i] = %d\n",i,1-pp->information_bit_pattern[node->first_leaf_index+i],alpha_v[i]);
#endif
if (node->left->all_frozen == 0) {
#if defined(__AVX2__)
int avx2mod = (node->Nv/2)&15;
if (avx2mod == 0) {
__m256i a256,b256,absa256,absb256,minabs256;
int avx2len = node->Nv/2/16;
for (int i=0;i<avx2len;i++) {
a256 =((__m256i*)alpha_v)[i];
b256 =((__m256i*)alpha_v)[i+avx2len];
absa256 =_mm256_abs_epi16(a256);
absb256 =_mm256_abs_epi16(b256);
minabs256 =_mm256_min_epi16(absa256,absb256);
((__m256i*)alpha_l)[i] =_mm256_sign_epi16(minabs256,_mm256_xor_si256(a256,b256));
}
}
else if (avx2mod == 8) {
__m128i a128,b128,absa128,absb128,minabs128;
a128 =*((__m128i*)alpha_v);
b128 =((__m128i*)alpha_v)[1];
absa128 =_mm_abs_epi16(a128);
absb128 =_mm_abs_epi16(b128);
minabs128 =_mm_min_epi16(absa128,absb128);
*((__m128i*)alpha_l) =_mm_sign_epi16(minabs128,_mm_xor_si128(a128,b128));
}
else if (avx2mod == 4) {
__m64 a64,b64,absa64,absb64,minabs64;
a64 =*((__m64*)alpha_v);
b64 =((__m64*)alpha_v)[1];
absa64 =_mm_abs_pi16(a64);
absb64 =_mm_abs_pi16(b64);
minabs64 =_mm_min_pi16(absa64,absb64);
*((__m64*)alpha_l) =_mm_sign_pi16(minabs64,_mm_xor_si64(a64,b64));
}
else
#endif
{
for (int i=0;i<node->Nv/2;i++) {
a=alpha_v[i];
b=alpha_v[i+(node->Nv/2)];
maska=a>>15;
maskb=b>>15;
absa=(a+maska)^maska;
absb=(b+maskb)^maskb;
minabs = absa<absb ? absa : absb;
alpha_l[i] = (maska^maskb)==0 ? minabs : -minabs;
}
}
if (node->Nv == 2) { // apply hard decision on left node
betal[0] = (alpha_l[0]>0) ? -1 : 1;
#ifdef DEBUG_NEW_IMPL
printf("betal[0] %d (%p)\n",betal[0],&betal[0]);
#endif
pp->nr_polar_u[node->first_leaf_index] = (betal[0]+1)>>1;
#ifdef DEBUG_NEW_IMPL
printf("Setting bit %d to %d (LLR %d)\n",node->first_leaf_index,(betal[0]+1)>>1,alpha_l[0]);
#endif
}
}
}
void applyGtoright(t_nrPolar_params *pp,decoder_node_t *node) {
int16_t *alpha_v=node->alpha;
int16_t *alpha_r=node->right->alpha;
int16_t *betal = node->left->beta;
int16_t *betar = node->right->beta;
int8_t frozen_mask=-1;
if (node->Nv == 2) frozen_mask=-pp->information_bit_pattern[node->first_leaf_index+1];
#ifdef DEBUG_NEW_IMPL
printf("applyGtoright %d, Nv %d (level %d), (leaf %d, AF %d)\n",node->first_leaf_index,node->Nv,node->level,node->right->leaf,node->right->all_frozen);
#endif
if (node->right->all_frozen == 0) {
#if defined(__AVX2__)
int avx2mod = (node->Nv/2)&15;
if (avx2mod == 0) {
int avx2len = node->Nv/2/16;
for (int i=0;i<avx2len;i++) {
((__m256i *)alpha_r)[i] =
_mm256_subs_epi16(((__m256i *)alpha_v)[i+avx2len],
_mm256_sign_epi16(((__m256i *)alpha_v)[i],
((__m256i *)betal)[i]));
}
}
else if (avx2mod == 8) {
((__m128i *)alpha_r)[0] = _mm_subs_epi16(((__m128i *)alpha_v)[1],_mm_sign_epi16(((__m128i *)alpha_v)[0],((__m128i *)betal)[0]));
}
else
#endif
{
for (int i=0;i<node->Nv/2;i++) {
alpha_r[i] = alpha_v[i+(node->Nv/2)] - (betal[i]*alpha_v[i]);
}
}
if (node->Nv == 2) { // apply hard decision on right node
betar[0] = (alpha_r[0]>0) ? -1 : 1;
pp->nr_polar_u[node->first_leaf_index+1] = (betar[0]+1)>>1;
#ifdef DEBUG_NEW_IMPL
printf("Setting bit %d to %d (LLR %d frozen_mask %d)\n",node->first_leaf_index+1,(betar[0]+1)>>1,alpha_r[0],frozen_mask);
#endif
}
}
}
void computeBeta(t_nrPolar_params *pp,decoder_node_t *node) {
int16_t *betav = node->beta;
int16_t *betal = node->left->beta;
int16_t *betar = node->right->beta;
#ifdef DEBUG_NEW_IMPL
printf("Computing beta @ level %d first_leaf_index %d (all_frozen %d)\n",node->level,node->first_leaf_index,node->left->all_frozen);
#endif
if (node->left->all_frozen==0) { // if left node is not aggregation of frozen bits
for (int i=0;i<node->Nv/2;i++) {
betav[i] = (betal[i] != betar[i]) ? 1 : -1;
#ifdef DEBUG_NEW_IMPL
printf("COMPUTE betav[%d] %d (%p)\n",i,
betav[i],
&betav[i]);
#endif
}
}
else memcpy((void*)&betav[0],betar,(node->Nv/2)*sizeof(int16_t));
memcpy((void*)&betav[node->Nv/2],betar,(node->Nv/2)*sizeof(int16_t));
}
void generic_polar_decoder(t_nrPolar_params *pp,decoder_node_t *node) {
// Apply F to left
applyFtoleft(pp,node);
// if left is not a leaf recurse down to the left
if (node->left->leaf==0) generic_polar_decoder(pp,node->left);
applyGtoright(pp,node);
if (node->right->leaf==0) generic_polar_decoder(pp,node->right);
computeBeta(pp,node);
}
......@@ -34,58 +34,125 @@
static const uint8_t nr_polar_subblock_interleaver_pattern[32] = { 0, 1, 2, 4, 3, 5, 6, 7, 8, 16, 9, 17, 10, 18, 11, 19, 12, 20, 13, 21, 14, 22, 15, 23, 24, 25, 26, 28, 27, 29, 30, 31 };
#define Nmax 1024
#define nmax 10
typedef struct decoder_list_s {
uint8_t bit[1+nmax][Nmax];
double llr[1+nmax][Nmax];
uint8_t crcChecksum[24];
double pathMetric;
} decoder_list_t;
typedef struct decoder_list_int8_s {
uint8_t bit[1+nmax][Nmax] __attribute__((aligned(32)));
int16_t llr[1+nmax][Nmax]__attribute__((aligned(32)));
uint8_t crcChecksum[24];
int32_t pathMetric;
} decoder_list_int8_t;
typedef struct decoder_node_t_s {
struct decoder_node_t_s *left;
struct decoder_node_t_s *right;
int level;
int leaf;
int Nv;
int first_leaf_index;
int all_frozen;
int16_t *alpha;
int8_t *beta;
} decoder_node_t;
typedef struct decoder_tree_t_s {
decoder_node_t *root;
int num_nodes;
} decoder_tree_t;
struct nrPolar_params {
uint8_t n_max;
uint8_t i_il;
uint8_t i_seg;
uint8_t n_pc;
uint8_t n_pc_wm;
uint8_t i_bil;
uint16_t payloadBits;
uint16_t encoderLength;
uint8_t crcParityBits;
uint8_t crcCorrectionBits;
uint16_t K;
uint16_t N;
uint8_t n;
uint16_t *interleaving_pattern;
uint16_t *rate_matching_pattern;
const uint16_t *Q_0_Nminus1;
int16_t *Q_I_N;
int16_t *Q_F_N;
int16_t *Q_PC_N;
uint8_t *information_bit_pattern;
uint16_t *channel_interleaver_pattern;
uint32_t crc_polynomial;
uint8_t **crc_generator_matrix; //G_P
uint8_t **G_N;
uint32_t* crc256Table;
//polar_encoder vectors:
uint8_t *nr_polar_crc;
uint8_t *nr_polar_b;
uint8_t *nr_polar_cPrime;
uint8_t *nr_polar_u;
uint8_t *nr_polar_d;
uint8_t n_max;
uint8_t i_il;
uint8_t i_seg;
uint8_t n_pc;
uint8_t n_pc_wm;
uint8_t i_bil;
uint16_t payloadBits;
uint16_t encoderLength;
uint8_t crcParityBits;
uint8_t crcCorrectionBits;
uint16_t K;
uint16_t N;
uint8_t n;
uint16_t *interleaving_pattern;
uint16_t *rate_matching_pattern;
const uint16_t *Q_0_Nminus1;
int16_t *Q_I_N;
int16_t *Q_F_N;
int16_t *Q_PC_N;
uint8_t *information_bit_pattern;
uint16_t *channel_interleaver_pattern;
uint32_t crc_polynomial;
uint8_t **crc_generator_matrix; //G_P
uint8_t **G_N;
uint32_t* crc256Table;
uint8_t **extended_crc_generator_matrix;
//polar_encoder vectors:
uint8_t *nr_polar_crc;
uint8_t *nr_polar_b;
uint8_t *nr_polar_cPrime;
uint8_t *nr_polar_u;
uint8_t *nr_polar_d;
void (*decoder_kernel)(struct nrPolar_params *,decoder_list_int8_t **);
decoder_tree_t tree;
} __attribute__ ((__packed__));
typedef struct nrPolar_params t_nrPolar_params;
void polar_encoder(uint8_t *input, uint8_t *output, t_nrPolar_params* polarParams);
void nr_polar_kernal_operation(uint8_t *u, uint8_t *d, uint16_t N);
int8_t polar_decoder(double *input, uint8_t *output, t_nrPolar_params *polarParams,
uint8_t listSize, double *aPrioriPayload, uint8_t pathMetricAppr,
time_stats_t *init,
time_stats_t *polar_rate_matching,
time_stats_t *decoding,
time_stats_t *bit_extraction,
time_stats_t *deinterleaving,
time_stats_t *sorting,
time_stats_t *path_metric,
time_stats_t *update_LLR);
uint8_t listSize, double *aPrioriPayload, uint8_t pathMetricAppr,
time_stats_t *init,
time_stats_t *polar_rate_matching,
time_stats_t *decoding,
time_stats_t *bit_extraction,
time_stats_t *deinterleaving,
time_stats_t *sorting,
time_stats_t *path_metric,
time_stats_t *update_LLR);
int8_t polar_decoder_int8(int16_t *input,
uint8_t *output,
t_nrPolar_params *polarParams,
uint8_t listSize,
time_stats_t *init,
time_stats_t *polar_rate_matching,
time_stats_t *decoding,
time_stats_t *bit_extraction,
time_stats_t *deinterleaving,
time_stats_t *sorting,
time_stats_t *path_metric,
time_stats_t *update_LLR,
int generate_optim_code);
void nr_polar_llrtableinit(void);
void nr_polar_init(t_nrPolar_params* polarParams, int messageType);
......@@ -133,59 +200,38 @@ void nr_free_uint8_t_3D_array(uint8_t ***input, uint16_t xlen, uint16_t ylen);
void nr_free_uint8_t_2D_array(uint8_t **input, uint16_t xlen);
void nr_free_double_2D_array(double **input, uint16_t xlen);
#define Nmax 1024
#define nmax 10
typedef struct decoder_list_s {
uint8_t bit[1+nmax][Nmax];
double llr[1+nmax][Nmax];
uint8_t crcChecksum[24];
double pathMetric;
} decoder_list_t;
typedef struct decoder_list_int8_s {
uint8_t bit[1+nmax][Nmax] __attribute__((aligned(32)));
int16_t llr[1+nmax][Nmax]__attribute__((aligned(32)));
uint8_t crcChecksum[24];
int32_t pathMetric;
} decoder_list_int8_t;
void updateLLR(decoder_list_t **dlist,uint8_t **llrU, uint8_t **bitU,
uint8_t listSize, uint16_t row, uint16_t col, uint16_t xlen, uint8_t ylen, uint8_t approximation);
void updateLLR_int8(decoder_list_int8_t **dlist,uint8_t **llrU, uint8_t **bitU,
uint8_t listSize, uint16_t row, uint16_t col, uint16_t xlen, uint8_t ylen);
uint8_t listSize, uint16_t row, uint16_t col, uint16_t xlen, uint8_t ylen,int generate_optim_code,FILE *fd);
void updateBit(decoder_list_t **dlist, uint8_t **bitU, uint8_t listSize, uint16_t row,
uint16_t col, uint16_t xlen, uint8_t ylen);
void updateBit_int8(decoder_list_int8_t **dlist, uint8_t **bitU, uint8_t listSize, uint16_t row,
uint16_t col, uint16_t xlen, uint8_t ylen);
uint16_t col, uint16_t xlen, uint8_t ylen,int generate_optim_code,FILE *fd);
void updatePathMetric(decoder_list_t **dlist,uint8_t listSize, uint8_t bitValue,
uint16_t row, uint8_t approximation);
void updatePathMetric0_int8(decoder_list_int8_t **dlist,uint8_t listSize, uint16_t row);
void updatePathMetric0_int8(decoder_list_int8_t **dlist,uint8_t listSize, uint16_t row,int generate_optim_code,FILE *fd);
void updatePathMetric2(decoder_list_t **dlist, uint8_t listSize, uint16_t row, uint8_t appr);
void updatePathMetric2_int8(decoder_list_int8_t **dlist, uint8_t listSize, uint16_t row);
void updatePathMetric2_int8(decoder_list_int8_t **dlist, uint8_t listSize, uint16_t row,int generate_optim_code,FILE *fd);
void updateCrcChecksum(decoder_list_t **dlist, uint8_t **crcGen,
uint8_t listSize, uint32_t i2, uint8_t len);
void updateCrcChecksum_int8(decoder_list_int8_t **dlist, uint8_t **crcGen,
uint8_t listSize, uint32_t i2, uint8_t len);
uint8_t listSize, uint32_t i2, uint8_t len,int generate_optim_code,FILE *fd);
void updateCrcChecksum2(decoder_list_t **dlist, uint8_t **crcGen,
uint8_t listSize, uint32_t i2, uint8_t len);
void updateCrcChecksum2_int8(decoder_list_int8_t **dlist, uint8_t **crcGen,
uint8_t listSize, uint32_t i2, uint8_t len);
uint8_t listSize, uint32_t i2, uint8_t len,int generate_optim_code,FILE *fd);
void nr_sort_asc_double_1D_array_ind(double *matrix, int *ind, int len);
......@@ -213,4 +259,144 @@ static inline void nr_polar_deinterleaver(uint8_t *input, uint8_t *output, uint1
for (int i=0; i<size; i++) output[pattern[i]]=input[i];
}
void polar_decoder_K56_N512_E864(t_nrPolar_params *polarParams,decoder_list_int8_t **sorted_list);
void build_decoder_tree(t_nrPolar_params *pp);
#define updateLLR_int8_A(dlist,listSize,col,row,offset) {for (int i=0;i<listSize;i++) {if (dlist[(i)]->bit[(col)][(row)-(offset)]==0) dlist[(i)]->llr[(col)][(row)] = dlist[(i)]->llr[(col)+1][(row)-(offset)] + dlist[(i)]->llr[(col)+1][(row)]; else dlist[(i)]->llr[(col)][(row)] = -dlist[(i)]->llr[(col)+1][(row)-(offset)] + dlist[(i)]->llr[(col)+1][(row)];if(dlist[(i)]->llr[col][row]>127 || dlist[i]->llr[col][row]<-128) printf("dlist[%d]->llr[%d][%d] = %d> 8bit\n",i,col,row,dlist[i]->llr[col][row]);}}
#define updateBit_int8_A(dlist,listSize,col,row) {for (int i=0;i<listSize;i++) {dlist[(i)]->bit[(col)][(row)] = dlist[(i)]->bit[(col)-1][(row)];}}
#define updateBit_int8_B(dlist,listSize,col,row,offset) {for (int i=0;i<listSize;i++) {dlist[(i)]->bit[(col)][(row)] = dlist[(i)]->bit[(col)-1][(row)]^dlist[(i)]->bit[(col)-1][(row)+(offset)];}}
#define updatePathMetric0_int8_A(dlist,listSize,row,mask,absllr) {for (int i=0;i<listSize;i++) { mask=dlist[i]->llr[0][row]>>15;if(mask!=0){absllr=(dlist[i]->llr[0][row]+mask)^mask;dlist[i]->pathMetric+=absllr;}}}
#define updatePathMetric2_int8_A(dlist,listSize,row) {for (int i=0;i<listSize;i++) {{dlist[i+listSize]->pathMetric = dlist[i]->pathMetric;if (dlist[i]->llr[0][row]<0) dlist[i]->pathMetric-=dlist[i]->llr[0][row];else dlist[i+listSize]->pathMetric += dlist[i]->llr[0][row];}}}
#define updateCrcChecksum_int8_A(dlist,listSize,crcGen,i2,len) {for (int i=0;i<listSize;i++){for (uint8_t j = 0; j < len; j++) dlist[i]->crcChecksum[j] = (dlist[i]->crcChecksum[j]^crcGen[i2][j]);}}
#define updateCrcChecksum2_int8_A(dlist,listSize,crcGen,i2,len) {for (int i=0;i<listSize;i++){for (uint8_t j = 0; j < len; j++) dlist[i+listSize]->crcChecksum[j]=dlist[i]->crcChecksum[j]^crcGen[i2][j];}}
extern int16_t llrtab[256][256];
inline void computeLLR_int8(decoder_list_int8_t **dlist,int listSize, uint16_t row, uint16_t col,
uint16_t offset) __attribute__((always_inline));
inline void computeLLR_int8(decoder_list_int8_t **dlist,int listSize, uint16_t row, uint16_t col,
uint16_t offset) {
int16_t a;
int16_t b;
int16_t absA,absB;
int16_t maska,maskb;
int16_t minabs;
// int16_t **llr=dlist[i]->llr;
for (int i=0;i<listSize;i++) {
a = dlist[i]->llr[col + 1][row];
b = dlist[i]->llr[col+1][row + offset];
// printf("LLR: a %d, b %d\n",a,b);
maska = a>>15;
maskb = b>>15;
absA = (a+maska)^maska;
absB = (b+maskb)^maskb;
// printf("LLR: absA %d, absB %d\n",absA,absB);
minabs = absA<absB ? absA : absB;
if ((maska^maskb) == 0)
dlist[i]->llr[col][row] = minabs;
else
dlist[i]->llr[col][row] = -minabs;
// printf("LLR (a %d, b %d): llr[%d][%d] %d\n",a,b,col,row,llr[col][row]);
//dlist[i]->llr[col][row] = llrtab[a+128][b+128];
// printf("newLLR [%d,%d]: %d\n",col,row,dlist[i]->llr[col][row]);
}
}
#define decoder_int8_A(sorted_dlist,currentListSize,polarParams) {for (int i = 0; i < currentListSize; i++) { \
for (int k = 0; k < (polarParams->n+1); k++) { \
memcpy((void*)&sorted_dlist[i+currentListSize]->bit[k][0],\
(void*)&sorted_dlist[i]->bit[k][0],\
sizeof(uint8_t)*polarParams->N);\
memcpy((void*)&sorted_dlist[i+currentListSize]->llr[k][0],\
(void*)&sorted_dlist[i]->llr[k][0],\
sizeof(int16_t)*polarParams->N);}}}
#define decoder_int8_B(sorted_dlist,currentBit,currentListSize) {for (int i = 0; i < currentListSize; i++) {sorted_dlist[i]->bit[0][currentBit]=0;sorted_dlist[i+currentListSize]->bit[0][currentBit]=1;}}
void inline decoder_int8_C(decoder_list_int8_t *sorted_dlist[],
t_nrPolar_params *polarParams,
int currentBit,
int currentListSize,
int listSize) {
int32_t pathMetric[2*listSize];
decoder_list_int8_t *temp_dlist[2*listSize];
int listIndex[2*listSize];
int listIndex2[2*listSize];
for (int i = 0; i < currentListSize; i++) {
listIndex[i]=i;
pathMetric[i] = sorted_dlist[i]->pathMetric;
}
nr_sort_asc_int16_1D_array_ind(pathMetric, listIndex, currentListSize);
for (int i=0;i<currentListSize;i++) {
listIndex2[listIndex[i]] = i;
}
// copy the llr/bit arrays that are needed
for (int i = 0; i < listSize; i++) {
// printf("listIndex[%d] %d\n",i,listIndex[i]);
if ((listIndex2[i+listSize]<listSize) && (listIndex2[i]<listSize)) { // both '0' and '1' path metrics are to be kept
// do memcpy of LLR and Bit arrays
for (int k = 0; k < (polarParams->n+1); k++) {
memcpy((void*)&sorted_dlist[i+listSize]->bit[k][0],
(void*)&sorted_dlist[i]->bit[k][0],
sizeof(uint8_t)*polarParams->N);
memcpy((void*)&sorted_dlist[i+listSize]->llr[k][0],
(void*)&sorted_dlist[i]->llr[k][0],
sizeof(int16_t)*polarParams->N);
}
sorted_dlist[i]->bit[0][currentBit]=0;
sorted_dlist[i+listSize]->bit[0][currentBit]=1;
}
else if (listIndex2[i+listSize]<listSize) { // only '1' path metric is to be kept
// just change the current bit from '0' to '1'
for (int k = 0; k < (polarParams->n+1); k++) {
memcpy((void*)&sorted_dlist[i+listSize]->bit[k][0],
(void*)&sorted_dlist[i]->bit[k][0],
sizeof(uint8_t)*polarParams->N);
memcpy((void*)&sorted_dlist[i+listSize]->llr[k][0],
(void*)&sorted_dlist[i]->llr[k][0],
sizeof(int16_t)*polarParams->N);
}
sorted_dlist[i+listSize]->bit[0][currentBit]=1;
/*
decoder_list_t *tmp = sorted_dlist[i+listSize];
sorted_dlist[i+listSize] = sorted_dlist[i];
sorted_dlist[i+listSize]->pathMetric = tmp->pathMetric;
sorted_dlist[i+listSize]->bit[0][currentBit]=1;
memcpy((void*)&sorted_dlist[i+listSize]->crcChecksum[0],
(void*)&tmp->crcChecksum[0],
24*sizeof(uint8_t));*/
}
}
for (int i = 0; i < 2*listSize; i++) {
temp_dlist[i] = sorted_dlist[i];
}
for (int i = 0; i < 2*listSize; i++) {
// printf("i %d => %d\n",i,listIndex[i]);
sorted_dlist[i] = temp_dlist[listIndex[i]];
}
}
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment