Commit 175e835d authored by Laurent THOMAS's avatar Laurent THOMAS

update the polar codec function that creates the internal LUTs

to use faster code
- replaced hundreds mallocs by either local arrays or by the OAI multi-dimensionalarray (G_N_tab array)
- made in SIMD a one bit per byte packing function to packed (ordinary) bits in bytes
parent 6debc5dd
...@@ -32,20 +32,20 @@ const uint8_t **crc24c_generator_matrix(uint16_t payloadSizeBits) ...@@ -32,20 +32,20 @@ const uint8_t **crc24c_generator_matrix(uint16_t payloadSizeBits)
uint8_t **crc_generator_matrix = malloc(payloadSizeBits*sizeof(uint8_t *) + payloadSizeBits*crcPolynomialSize*sizeof(uint8_t)); uint8_t **crc_generator_matrix = malloc(payloadSizeBits*sizeof(uint8_t *) + payloadSizeBits*crcPolynomialSize*sizeof(uint8_t));
if (crc_generator_matrix) if (crc_generator_matrix)
for (int i = 0; i < payloadSizeBits; i++) for (int i = 0; i < payloadSizeBits; i++)
crc_generator_matrix[i] = ((uint8_t*)&crc_generator_matrix[payloadSizeBits])+i*crcPolynomialSize; crc_generator_matrix[i] = ((uint8_t *)&crc_generator_matrix[payloadSizeBits]) + i * crcPolynomialSize;
for (int i = 0; i < crcPolynomialSize; i++) for (int i = 0; i < crcPolynomialSize; i++)
crc_generator_matrix[payloadSizeBits-1][i]=crcPolynomialPattern[i+1]; crc_generator_matrix[payloadSizeBits - 1][i] = crcPolynomialPattern[i + 1];
for (int i = payloadSizeBits-2; i >= 0; i--){ for (int i = payloadSizeBits - 2; i >= 0; i--) {
for (int j = 0; j < crcPolynomialSize-1; j++) temp1[j]=crc_generator_matrix[i+1][j+1]; for (int j = 0; j < crcPolynomialSize-1; j++) temp1[j]=crc_generator_matrix[i+1][j+1];
temp1[crcPolynomialSize-1]=0; temp1[crcPolynomialSize - 1] = 0;
for (int j = 0; j < crcPolynomialSize; j++) for (int j = 0; j < crcPolynomialSize; j++)
temp2[j]=crc_generator_matrix[i+1][0]*crcPolynomialPattern[j+1]; temp2[j] = crc_generator_matrix[i + 1][0] * crcPolynomialPattern[j + 1];
for (int j = 0; j < crcPolynomialSize; j++){ for (int j = 0; j < crcPolynomialSize; j++) {
if(temp1[j]+temp2[j] == 1) if(temp1[j]+temp2[j] == 1)
crc_generator_matrix[i][j]=1; crc_generator_matrix[i][j]=1;
else else
......
...@@ -28,7 +28,7 @@ ...@@ -28,7 +28,7 @@
* \email raymond.knopp@eurecom.fr, turker.yilmaz@eurecom.fr * \email raymond.knopp@eurecom.fr, turker.yilmaz@eurecom.fr
* \note * \note
* \warning * \warning
*/ */
/* /*
* Return values: * Return values:
...@@ -68,13 +68,13 @@ int8_t polar_decoder(double *input, ...@@ -68,13 +68,13 @@ int8_t polar_decoder(double *input,
{ {
t_nrPolar_params *polarParams=nr_polar_params(messageType, messageLength, aggregation_level, true); t_nrPolar_params *polarParams=nr_polar_params(messageType, messageLength, aggregation_level, true);
//Assumes no a priori knowledge. //Assumes no a priori knowledge.
uint8_t bit[polarParams->N][polarParams->n+1][2*listSize]; uint8_t bit[polarParams->N][polarParams->n + 1][2 * listSize];
memset(bit,0,sizeof bit); memset(bit, 0, sizeof bit);
uint8_t bitUpdated[polarParams->N][polarParams->n+1]; //0=False, 1=True uint8_t bitUpdated[polarParams->N][polarParams->n+1]; //0=False, 1=True
memset(bitUpdated,0,sizeof bitUpdated); memset(bitUpdated,0,sizeof bitUpdated);
uint8_t llrUpdated[polarParams->N][polarParams->n+1]; //0=False, 1=True uint8_t llrUpdated[polarParams->N][polarParams->n+1]; //0=False, 1=True
memset(llrUpdated,0,sizeof llrUpdated); memset(llrUpdated,0,sizeof llrUpdated);
double llr[polarParams->N][polarParams->n+1][2*listSize]; double llr[polarParams->N][polarParams->n + 1][2 * listSize];
uint8_t crcChecksum[polarParams->crcParityBits][2*listSize]; uint8_t crcChecksum[polarParams->crcParityBits][2*listSize];
memset(crcChecksum,0,sizeof crcChecksum); memset(crcChecksum,0,sizeof crcChecksum);
double pathMetric[2*listSize]; double pathMetric[2*listSize];
...@@ -127,7 +127,8 @@ int8_t polar_decoder(double *input, ...@@ -127,7 +127,8 @@ int8_t polar_decoder(double *input,
double d_tilde[polarParams->N]; double d_tilde[polarParams->N];
nr_polar_rate_matching(input, d_tilde, polarParams->rate_matching_pattern, polarParams->K, polarParams->N, polarParams->encoderLength); nr_polar_rate_matching(input, d_tilde, polarParams->rate_matching_pattern, polarParams->K, polarParams->N, polarParams->encoderLength);
for (int j = 0; j < polarParams->N; j++) llr[j][polarParams->n][0]=d_tilde[j]; for (int j = 0; j < polarParams->N; j++)
llr[j][polarParams->n][0] = d_tilde[j];
/* /*
* SCL polar decoder. * SCL polar decoder.
...@@ -139,48 +140,56 @@ int8_t polar_decoder(double *input, ...@@ -139,48 +140,56 @@ int8_t polar_decoder(double *input,
uint8_t listIndex[2*listSize], copyIndex; uint8_t listIndex[2*listSize], copyIndex;
for (uint16_t currentBit=0; currentBit<polarParams->N; currentBit++) { for (uint16_t currentBit=0; currentBit<polarParams->N; currentBit++) {
updateLLR(currentListSize, currentBit, 0, polarParams->N, polarParams->n+1, 2*listSize, llr, llrUpdated, bit, bitUpdated); updateLLR(currentListSize, currentBit, 0, polarParams->N, polarParams->n + 1, 2 * listSize, llr, llrUpdated, bit, bitUpdated);
if (polarParams->information_bit_pattern[currentBit]==0) { //Frozen bit. if (polarParams->information_bit_pattern[currentBit]==0) { //Frozen bit.
updatePathMetric(pathMetric, currentListSize, 0, currentBit, polarParams->N, polarParams->n+1, 2*listSize, llr); updatePathMetric(pathMetric, currentListSize, 0, currentBit, polarParams->N, polarParams->n + 1, 2 * listSize, llr);
} else { //Information or CRC bit. } else { //Information or CRC bit.
updatePathMetric2(pathMetric, currentListSize, currentBit, polarParams->N, polarParams->n+1, 2*listSize, llr); updatePathMetric2(pathMetric, currentListSize, currentBit, polarParams->N, polarParams->n + 1, 2 * listSize, llr);
for (int i = 0; i < currentListSize; i++) { for (int i = 0; i < currentListSize; i++) {
for (int j = 0; j < polarParams->N; j++) { for (int j = 0; j < polarParams->N; j++) {
for (int k = 0; k < (polarParams->n+1); k++) { for (int k = 0; k < (polarParams->n + 1); k++) {
bit[j][k][i+currentListSize]=bit[j][k][i]; bit[j][k][i + currentListSize] = bit[j][k][i];
llr[j][k][i+currentListSize]=llr[j][k][i]; llr[j][k][i + currentListSize] = llr[j][k][i];
} }
} }
} }
for (int i = 0; i < currentListSize; i++) { for (int i = 0; i < currentListSize; i++) {
bit[currentBit][0][i]=0; bit[currentBit][0][i] = 0;
crcState[i+currentListSize]=crcState[i]; crcState[i + currentListSize] = crcState[i];
} }
for (int i = currentListSize; i < 2*currentListSize; i++) bit[currentBit][0][i]=1; for (int i = currentListSize; i < 2 * currentListSize; i++)
bit[currentBit][0][i] = 1;
bitUpdated[currentBit][0]=1;
updateCrcChecksum2(polarParams->crcParityBits, 2*listSize, crcChecksum, bitUpdated[currentBit][0] = 1;
polarParams->K, polarParams->crcParityBits, extended_crc_generator_matrix, updateCrcChecksum2(polarParams->crcParityBits,
currentListSize, nonFrozenBit, polarParams->crcParityBits); 2 * listSize,
currentListSize*=2; crcChecksum,
polarParams->K,
//Keep only the best "listSize" number of entries. polarParams->crcParityBits,
extended_crc_generator_matrix,
currentListSize,
nonFrozenBit,
polarParams->crcParityBits);
currentListSize *= 2;
// Keep only the best "listSize" number of entries.
if (currentListSize > listSize) { if (currentListSize > listSize) {
for (uint8_t i = 0; i < 2*listSize; i++) listIndex[i]=i; for (uint8_t i = 0; i < 2 * listSize; i++)
listIndex[i] = i;
nr_sort_asc_double_1D_array_ind(pathMetric, listIndex, currentListSize); nr_sort_asc_double_1D_array_ind(pathMetric, listIndex, currentListSize);
//sort listIndex[listSize, ..., 2*listSize-1] in descending order. // sort listIndex[listSize, ..., 2*listSize-1] in descending order.
uint8_t swaps, tempInd; uint8_t swaps, tempInd;
for (uint8_t i = 0; i < listSize; i++) { for (uint8_t i = 0; i < listSize; i++) {
swaps = 0; swaps = 0;
for (uint8_t j = listSize; j < (2*listSize - i) - 1; j++) { for (uint8_t j = listSize; j < (2 * listSize - i) - 1; j++) {
if (listIndex[j+1] > listIndex[j]) { if (listIndex[j + 1] > listIndex[j]) {
tempInd = listIndex[j]; tempInd = listIndex[j];
listIndex[j] = listIndex[j + 1]; listIndex[j] = listIndex[j + 1];
listIndex[j + 1] = tempInd; listIndex[j + 1] = tempInd;
...@@ -196,8 +205,8 @@ int8_t polar_decoder(double *input, ...@@ -196,8 +205,8 @@ int8_t polar_decoder(double *input,
for (int k=(listSize-1); k>0; k--) { for (int k=(listSize-1); k>0; k--) {
for (int i=0; i<polarParams->N; i++) { for (int i=0; i<polarParams->N; i++) {
for (int j=0; j<(polarParams->n+1); j++) { for (int j=0; j<(polarParams->n+1); j++) {
bit[i][j][listIndex[(2*listSize-1)-k]]=bit[i][j][listIndex[k]]; bit[i][j][listIndex[(2 * listSize - 1) - k]] = bit[i][j][listIndex[k]];
llr[i][j][listIndex[(2*listSize-1)-k]]=llr[i][j][listIndex[k]]; llr[i][j][listIndex[(2 * listSize - 1) - k]] = llr[i][j][listIndex[k]];
} }
} }
} }
...@@ -285,7 +294,8 @@ int8_t polar_decoder(double *input, ...@@ -285,7 +294,8 @@ int8_t polar_decoder(double *input,
for (uint8_t i = 0; i < fmin(listSize, (pow(2,polarParams->crcCorrectionBits)) ); i++) { for (uint8_t i = 0; i < fmin(listSize, (pow(2,polarParams->crcCorrectionBits)) ); i++) {
if ( crcState[listIndex[i]] == 1 ) { if ( crcState[listIndex[i]] == 1 ) {
for (int j = 0; j < polarParams->N; j++) polarParams->nr_polar_U[j]=bit[j][0][listIndex[i]]; for (int j = 0; j < polarParams->N; j++)
polarParams->nr_polar_U[j] = bit[j][0][listIndex[i]];
//Extract the information bits (û to ĉ) //Extract the information bits (û to ĉ)
nr_polar_info_bit_extraction(polarParams->nr_polar_U, polarParams->nr_polar_CPrime, polarParams->information_bit_pattern, polarParams->N); nr_polar_info_bit_extraction(polarParams->nr_polar_U, polarParams->nr_polar_CPrime, polarParams->information_bit_pattern, polarParams->N);
...@@ -626,7 +636,7 @@ uint32_t polar_decoder_int16(int16_t *input, ...@@ -626,7 +636,7 @@ uint32_t polar_decoder_int16(int16_t *input,
printf("\n"); printf("\n");
#endif #endif
int16_t d_tilde[polarParams->N];// = malloc(sizeof(double) * polarParams->N); int16_t d_tilde[polarParams->N];
nr_polar_rate_matching_int16(input, d_tilde, polarParams->rate_matching_pattern, polarParams->K, polarParams->N, polarParams->encoderLength, polarParams->i_bil); nr_polar_rate_matching_int16(input, d_tilde, polarParams->rate_matching_pattern, polarParams->K, polarParams->N, polarParams->encoderLength, polarParams->i_bil);
for (int i=0; i<polarParams->N; i++) { for (int i=0; i<polarParams->N; i++) {
...@@ -687,14 +697,9 @@ uint32_t polar_decoder_int16(int16_t *input, ...@@ -687,14 +697,9 @@ uint32_t polar_decoder_int16(int16_t *input,
uint64_t B[4] = {0}; uint64_t B[4] = {0};
if (polarParams->K<65) { if (polarParams->K<65) {
B[0] = polarParams->B_tab0[0][Cprimebyte[0]] | B[0] = polarParams->B_tab0[0][Cprimebyte[0]] | polarParams->B_tab0[1][Cprimebyte[1]] | polarParams->B_tab0[2][Cprimebyte[2]]
polarParams->B_tab0[1][Cprimebyte[1]] | | polarParams->B_tab0[3][Cprimebyte[3]] | polarParams->B_tab0[4][Cprimebyte[4]] | polarParams->B_tab0[5][Cprimebyte[5]]
polarParams->B_tab0[2][Cprimebyte[2]] | | polarParams->B_tab0[6][Cprimebyte[6]] | polarParams->B_tab0[7][Cprimebyte[7]];
polarParams->B_tab0[3][Cprimebyte[3]] |
polarParams->B_tab0[4][Cprimebyte[4]] |
polarParams->B_tab0[5][Cprimebyte[5]] |
polarParams->B_tab0[6][Cprimebyte[6]] |
polarParams->B_tab0[7][Cprimebyte[7]];
} else if (polarParams->K<129) { } else if (polarParams->K<129) {
int len = polarParams->K/8; int len = polarParams->K/8;
......
...@@ -45,20 +45,23 @@ static inline void updateBit(uint8_t listSize, ...@@ -45,20 +45,23 @@ static inline void updateBit(uint8_t listSize,
uint8_t bit[xlen][ylen][zlen], uint8_t bit[xlen][ylen][zlen],
uint8_t bitU[xlen][ylen]) uint8_t bitU[xlen][ylen])
{ {
uint16_t offset = ( xlen/(pow(2,(ylen-col))) ); uint16_t offset = (xlen / (pow(2, (ylen - col))));
for (uint8_t i=0; i<listSize; i++) { for (uint8_t i = 0; i < listSize; i++) {
if (( (row) % (2*offset) ) >= offset ) { if (((row) % (2 * offset)) >= offset) {
if (bitU[row][col-1]==0) updateBit(listSize, row, (col-1), xlen, ylen, zlen, bit, bitU); if (bitU[row][col - 1] == 0)
bit[row][col][i] = bit[row][col-1][i]; updateBit(listSize, row, (col - 1), xlen, ylen, zlen, bit, bitU);
bit[row][col][i] = bit[row][col - 1][i];
} else { } else {
if (bitU[row][col-1]==0) updateBit(listSize, row, (col-1), xlen, ylen, zlen, bit, bitU); if (bitU[row][col - 1] == 0)
if (bitU[row+offset][col-1]==0) updateBit(listSize, (row+offset), (col-1), xlen, ylen, zlen, bit, bitU); updateBit(listSize, row, (col - 1), xlen, ylen, zlen, bit, bitU);
bit[row][col][i] = ( (bit[row][col-1][i]+bit[row+offset][col-1][i]) % 2); if (bitU[row + offset][col - 1] == 0)
updateBit(listSize, (row + offset), (col - 1), xlen, ylen, zlen, bit, bitU);
bit[row][col][i] = ((bit[row][col - 1][i] + bit[row + offset][col - 1][i]) % 2);
} }
} }
bitU[row][col]=1; bitU[row][col] = 1;
} }
static inline void computeLLR(uint16_t row, static inline void computeLLR(uint16_t row,
...@@ -75,7 +78,6 @@ static inline void computeLLR(uint16_t row, ...@@ -75,7 +78,6 @@ static inline void computeLLR(uint16_t row,
llr[row][col][i] = log((exp(a + b) + 1) / (exp(a) + exp(b))); //eq. (8a) llr[row][col][i] = log((exp(a + b) + 1) / (exp(a) + exp(b))); //eq. (8a)
} }
void updateLLR(uint8_t listSize, void updateLLR(uint8_t listSize,
uint16_t row, uint16_t row,
uint16_t col, uint16_t col,
...@@ -85,23 +87,27 @@ void updateLLR(uint8_t listSize, ...@@ -85,23 +87,27 @@ void updateLLR(uint8_t listSize,
double llr[xlen][ylen][zlen], double llr[xlen][ylen][zlen],
uint8_t llrU[xlen][ylen], uint8_t llrU[xlen][ylen],
uint8_t bit[xlen][ylen][zlen], uint8_t bit[xlen][ylen][zlen],
uint8_t bitU[xlen][ylen] uint8_t bitU[xlen][ylen])
)
{ {
uint16_t offset = (xlen/(pow(2,(ylen-col-1)))); uint16_t offset = (xlen / (pow(2, (ylen - col - 1))));
for (uint8_t i=0; i<listSize; i++) { for (uint8_t i = 0; i < listSize; i++) {
if (( (row) % (2*offset) ) >= offset ) { if ((row % (2 * offset)) >= offset) {
if(bitU[row-offset][col]==0) updateBit(listSize, (row-offset), col, xlen, ylen, zlen, bit, bitU); if (bitU[row - offset][col] == 0)
if(llrU[row-offset][col+1]==0) updateLLR(listSize, (row-offset), (col+1), xlen, ylen, zlen, llr, llrU, bit, bitU ); updateBit(listSize, (row - offset), col, xlen, ylen, zlen, bit, bitU);
if(llrU[row][col+1]==0) updateLLR(listSize, row, (col+1), xlen, ylen, zlen, llr, llrU, bit, bitU); if (llrU[row - offset][col + 1] == 0)
llr[row][col][i] = (pow((-1),bit[row-offset][col][i])*llr[row-offset][col+1][i]) + llr[row][col+1][i]; updateLLR(listSize, (row - offset), (col + 1), xlen, ylen, zlen, llr, llrU, bit, bitU);
if (llrU[row][col + 1] == 0)
updateLLR(listSize, row, (col + 1), xlen, ylen, zlen, llr, llrU, bit, bitU);
llr[row][col][i] = (pow((-1), bit[row - offset][col][i]) * llr[row - offset][col + 1][i]) + llr[row][col + 1][i];
} else { } else {
if(llrU[row][col+1]==0) updateLLR(listSize, row, (col+1), xlen, ylen, zlen, llr, llrU, bit, bitU ); if (llrU[row][col + 1] == 0)
if(llrU[row+offset][col+1]==0) updateLLR(listSize, (row+offset), (col+1), xlen, ylen, zlen, llr, llrU, bit, bitU ); updateLLR(listSize, row, (col + 1), xlen, ylen, zlen, llr, llrU, bit, bitU);
if (llrU[row + offset][col + 1] == 0)
updateLLR(listSize, (row + offset), (col + 1), xlen, ylen, zlen, llr, llrU, bit, bitU);
computeLLR(row, col, i, offset, xlen, ylen, zlen, llr); computeLLR(row, col, i, offset, xlen, ylen, zlen, llr);
} }
} }
llrU[row][col]=1; llrU[row][col] = 1;
// printf("LLR (a %f, b %f): llr[%d][%d] %f\n",32*a,32*b,col,row,32*llr[col][row]); // printf("LLR (a %f, b %f): llr[%d][%d] %f\n",32*a,32*b,col,row,32*llr[col][row]);
} }
......
...@@ -112,7 +112,7 @@ struct nrPolar_params { ...@@ -112,7 +112,7 @@ struct nrPolar_params {
const uint8_t **crc_generator_matrix; // G_P const uint8_t **crc_generator_matrix; // G_P
const uint8_t **G_N; const uint8_t **G_N;
uint64_t **G_N_tab; fourDimArray_t *G_N_tab;
int groupsize; int groupsize;
int *rm_tab; int *rm_tab;
uint64_t cprime_tab0[32][256]; uint64_t cprime_tab0[32][256];
...@@ -219,9 +219,7 @@ uint32_t nr_polar_output_length(uint16_t K, ...@@ -219,9 +219,7 @@ uint32_t nr_polar_output_length(uint16_t K,
uint16_t E, uint16_t E,
uint8_t n_max); uint8_t n_max);
void nr_polar_channel_interleaver_pattern(uint16_t *cip, void nr_polar_channel_interleaver_pattern(uint16_t *cip, const uint8_t I_BIL, const uint16_t E);
uint8_t I_BIL,
uint16_t E);
void nr_polar_rate_matching_pattern(uint16_t *rmp, void nr_polar_rate_matching_pattern(uint16_t *rmp,
uint16_t *J, uint16_t *J,
...@@ -258,7 +256,7 @@ void nr_polar_info_bit_pattern(uint8_t *ibp, ...@@ -258,7 +256,7 @@ void nr_polar_info_bit_pattern(uint8_t *ibp,
const uint16_t *Q_0_Nminus1, const uint16_t *Q_0_Nminus1,
uint16_t K, uint16_t K,
uint16_t N, uint16_t N,
uint16_t E, const uint16_t E,
uint8_t n_PC, uint8_t n_PC,
uint8_t n_pc_wm); uint8_t n_pc_wm);
...@@ -284,7 +282,7 @@ void nr_polar_generate_u(uint64_t *u, ...@@ -284,7 +282,7 @@ void nr_polar_generate_u(uint64_t *u,
uint16_t N, uint16_t N,
uint8_t n_pc); uint8_t n_pc);
void nr_polar_uxG(uint64_t *D, const uint64_t *u, const uint64_t **G_N_tab, uint16_t N); void nr_polar_uxG(uint64_t *D, const uint64_t *u, const fourDimArray_t *G_N_tab, uint16_t N);
void nr_polar_info_extraction_from_u(uint64_t *Cprime, void nr_polar_info_extraction_from_u(uint64_t *Cprime,
const uint8_t *u, const uint8_t *u,
......
...@@ -359,15 +359,11 @@ void build_polar_tables(t_nrPolar_params *polarParams) { ...@@ -359,15 +359,11 @@ void build_polar_tables(t_nrPolar_params *polarParams) {
AssertFatal(polarParams->K > 17, "K = %d < 18, is not possible\n",polarParams->K); AssertFatal(polarParams->K > 17, "K = %d < 18, is not possible\n",polarParams->K);
AssertFatal(polarParams->K < 129, "K = %d > 128, is not supported yet\n",polarParams->K); AssertFatal(polarParams->K < 129, "K = %d > 128, is not supported yet\n",polarParams->K);
int bit_i,ip; int bit_i,ip;
int numbytes = polarParams->K>>3; const int numbytes = (polarParams->K+7)/8;
int residue = polarParams->K&7; const int residue = polarParams->K&7;
int numbits;
if (residue>0) numbytes++;
for (int byte=0; byte<numbytes; byte++) { for (int byte=0; byte<numbytes; byte++) {
if (byte<(polarParams->K>>3)) numbits=8; int numbits = byte<(polarParams->K>>3) ? 8 : residue;
else numbits=residue;
for (int val=0; val<256; val++) { for (int val=0; val<256; val++) {
polarParams->cprime_tab0[byte][val] = 0; polarParams->cprime_tab0[byte][val] = 0;
...@@ -389,15 +385,19 @@ void build_polar_tables(t_nrPolar_params *polarParams) { ...@@ -389,15 +385,19 @@ void build_polar_tables(t_nrPolar_params *polarParams) {
// build G bit vectors for information bit positions and convert the bit as bytes tables in nr_polar_kronecker_power_matrices.c to // build G bit vectors for information bit positions and convert the bit as bytes tables in nr_polar_kronecker_power_matrices.c to
// 64 bit packed vectors. // 64 bit packed vectors.
polarParams->G_N_tab = (uint64_t **)calloc(polarParams->N, sizeof(int64_t *)); // Truncates id N%64 != 0
allocCast2D(pp, uint64_t, polarParams->G_N_tab, polarParams->N, polarParams->N / 64, false);
simde__m256i zeros = simde_mm256_setzero_si256();
// this code packs the one bit per byte of G_N into a packed bits G_N_tab
for (int i = 0; i < polarParams->N; i++) { for (int i = 0; i < polarParams->N; i++) {
polarParams->G_N_tab[i] = (uint64_t *)memalign(32, (polarParams->N / 64) * sizeof(uint64_t)); for (int j = 0; j < polarParams->N; j += 64) {
memset((void *)polarParams->G_N_tab[i], 0, (polarParams->N / 64) * sizeof(uint64_t)); const simde__m256i tmp1 = simde_mm256_cmpgt_epi8(*(simde__m256i *)&polarParams->G_N[i][j], zeros);
const simde__m256i tmp2 = simde_mm256_cmpgt_epi8(*(simde__m256i *)&polarParams->G_N[i][j + 32], zeros);
for (int j = 0; j < polarParams->N; j++) // cast directly to uint64_t from int32_t propagates the sign bit (in gcc)
polarParams->G_N_tab[i][j / 64] |= ((uint64_t)polarParams->G_N[i][j]) << (j & 63); const uint32_t part1 = simde_mm256_movemask_epi8(tmp1);
const uint32_t part2 = simde_mm256_movemask_epi8(tmp2);
pp[i][j / 64] = ((uint64_t)part2 << 32) | part1;
}
#ifdef DEBUG_POLAR_ENCODER #ifdef DEBUG_POLAR_ENCODER
printf("Bit %d Selecting row %d of G : ", i, i); printf("Bit %d Selecting row %d of G : ", i, i);
...@@ -519,17 +519,13 @@ void polar_encoder_fast(uint64_t *A, ...@@ -519,17 +519,13 @@ void polar_encoder_fast(uint64_t *A,
//int bitlen0=bitlen; //int bitlen0=bitlen;
#ifdef POLAR_CODING_DEBUG #ifdef POLAR_CODING_DEBUG
int A_array = (bitlen + 63) >> 6;
printf("\nTX\n"); printf("\nTX\n");
printf("a: "); printf("a: ");
for (int n = 0; n < bitlen; n++) { for (int n = (bitlen + 63)/64 ; n >=0; n--) {
if (n % 4 == 0) { if (n % 4 == 0)
printf(" "); printf(" ");
} if (n < bitlen)
int n1 = n >> 6; printf("%lu", (A[n/64] >> (n%64)) & 1);
int n2 = n - (n1 << 6);
int alen = n1 == 0 ? bitlen - (A_array << 6) : 64;
printf("%lu", (A[A_array - 1 - n1] >> (alen - 1 - n2)) & 1);
} }
printf("\n"); printf("\n");
#endif #endif
...@@ -692,7 +688,7 @@ void polar_encoder_fast(uint64_t *A, ...@@ -692,7 +688,7 @@ void polar_encoder_fast(uint64_t *A,
#endif #endif
uint64_t D[8] = {0}; uint64_t D[8] = {0};
nr_polar_uxG(D, u, (const uint64_t **)polarParams->G_N_tab, polarParams->N); nr_polar_uxG(D, u, polarParams->G_N_tab, polarParams->N);
#ifdef POLAR_CODING_DEBUG #ifdef POLAR_CODING_DEBUG
printf("d: "); printf("d: ");
......
...@@ -119,22 +119,19 @@ void nr_polar_info_extraction_from_u(uint64_t *Cprime, ...@@ -119,22 +119,19 @@ void nr_polar_info_extraction_from_u(uint64_t *Cprime,
} }
} }
void nr_polar_uxG(uint64_t *D, const uint64_t *u, const uint64_t **G_N_tab, uint16_t N) void nr_polar_uxG(uint64_t *D, const uint64_t *u, const fourDimArray_t *G_N_tab, uint16_t N)
{ {
int N_array = N >> 6; const int N64 = N / 64;
cast2Darray(g_n, uint64_t, G_N_tab);
for (int n = 0; n < N; n++) { for (int n = 0; n < N; n++) {
const uint64_t *Gn = G_N_tab[N - 1 - n]; const uint64_t *Gn = g_n[N - 1 - n];
int n_ones = 0; int n_ones = 0;
for (int a = 0; a < N_array; a++) { for (int a = 0; a < N64; a++)
uint64_t uxG = u[a] & Gn[a]; n_ones += count_bits_set(u[a] & Gn[a]);
if (uxG != 0)
n_ones += count_bits_set(uxG);
}
int n1 = n >> 6; int n1 = n / 64;
int n2 = n - (n1 << 6); int n2 = n - (n1 * 64);
D[n1] |= ((uint64_t)n_ones & 1) << n2; D[n1] |= ((uint64_t)n_ones & 1) << n2;
} }
} }
...@@ -147,8 +144,7 @@ void nr_polar_bit_insertion(uint8_t *input, ...@@ -147,8 +144,7 @@ void nr_polar_bit_insertion(uint8_t *input,
int16_t *Q_PC_N, int16_t *Q_PC_N,
uint8_t n_PC) uint8_t n_PC)
{ {
uint16_t k=0; int k = 0;
uint8_t flag;
if (n_PC>0) { if (n_PC>0) {
/* /*
...@@ -156,22 +152,16 @@ void nr_polar_bit_insertion(uint8_t *input, ...@@ -156,22 +152,16 @@ void nr_polar_bit_insertion(uint8_t *input,
*/ */
} else { } else {
for (int n=0; n<=N-1; n++) { for (int n=0; n<=N-1; n++) {
flag=0; output[n] = 0;
for (int m=0; m<=(K+n_PC)-1; m++) { for (int m=0; m<=(K+n_PC)-1; m++) {
if ( n == Q_I_N[m]) { if ( n == Q_I_N[m]) {
flag=1; output[n] = input[k];
k++;
break; break;
} }
} }
if (flag) { // n ϵ Q_I_N
output[n]=input[k];
k++;
} else {
output[n] = 0;
}
} }
} }
} }
...@@ -179,7 +169,7 @@ uint32_t nr_polar_output_length(uint16_t K, ...@@ -179,7 +169,7 @@ uint32_t nr_polar_output_length(uint16_t K,
uint16_t E, uint16_t E,
uint8_t n_max) uint8_t n_max)
{ {
uint8_t n_1, n_2, n_min=5, n; int n_1, n_2, n_min = 5;
double R_min=1.0/8; double R_min=1.0/8;
if ( (E <= (9.0/8)*pow(2,ceil(log2(E))-1)) && (K/E < 9.0/16) ) { if ( (E <= (9.0/8)*pow(2,ceil(log2(E))-1)) && (K/E < 9.0/16) ) {
...@@ -190,7 +180,7 @@ uint32_t nr_polar_output_length(uint16_t K, ...@@ -190,7 +180,7 @@ uint32_t nr_polar_output_length(uint16_t K,
n_2 = ceil(log2(K/R_min)); n_2 = ceil(log2(K/R_min));
n=n_max; int n = n_max;
if (n>n_1) n=n_1; if (n>n_1) n=n_1;
if (n>n_2) n=n_2; if (n>n_2) n=n_2;
if (n<n_min) n=n_min; if (n<n_min) n=n_min;
...@@ -201,19 +191,14 @@ uint32_t nr_polar_output_length(uint16_t K, ...@@ -201,19 +191,14 @@ uint32_t nr_polar_output_length(uint16_t K,
return ((uint32_t) pow(2.0,n)); //=polar_code_output_length return ((uint32_t) pow(2.0,n)); //=polar_code_output_length
} }
void nr_polar_channel_interleaver_pattern(uint16_t *cip, const uint8_t I_BIL, const uint16_t E)
void nr_polar_channel_interleaver_pattern(uint16_t *cip,
uint8_t I_BIL,
uint16_t E)
{ {
if (I_BIL == 1) { if (I_BIL == 1) {
uint16_t T=0, k; int T = E;
while( ((T/2)*(T+1)) < E ) T++; while( ((T/2)*(T+1)) < E ) T++;
int16_t **v = malloc(T * sizeof(*v)); int16_t v[T][T];
for (int i = 0; i <= T-1; i++) v[i] = malloc((T-i) * sizeof(*(v[i]))); int k = 0;
k=0;
for (int i = 0; i <= T-1; i++) { for (int i = 0; i <= T-1; i++) {
for (int j = 0; j <= (T-1)-i; j++) { for (int j = 0; j <= (T-1)-i; j++) {
if (k<E) { if (k<E) {
...@@ -234,16 +219,11 @@ void nr_polar_channel_interleaver_pattern(uint16_t *cip, ...@@ -234,16 +219,11 @@ void nr_polar_channel_interleaver_pattern(uint16_t *cip,
} }
} }
} }
for (int i = 0; i <= T-1; i++) free(v[i]);
free(v);
} else { } else {
for (int i=0; i<=E-1; i++) cip[i]=i; for (int i=0; i<=E-1; i++) cip[i]=i;
} }
} }
void nr_polar_info_bit_pattern(uint8_t *ibp, void nr_polar_info_bit_pattern(uint8_t *ibp,
uint8_t *pcbp, uint8_t *pcbp,
int16_t *Q_I_N, int16_t *Q_I_N,
...@@ -253,25 +233,24 @@ void nr_polar_info_bit_pattern(uint8_t *ibp, ...@@ -253,25 +233,24 @@ void nr_polar_info_bit_pattern(uint8_t *ibp,
const uint16_t *Q_0_Nminus1, const uint16_t *Q_0_Nminus1,
uint16_t K, uint16_t K,
uint16_t N, uint16_t N,
uint16_t E, const uint16_t E,
uint8_t n_PC, uint8_t n_PC,
uint8_t n_pc_wm) uint8_t n_pc_wm)
{ {
int16_t *Q_Ftmp_N = malloc(sizeof(int16_t) * (N + 1)); // Last element shows the final int Q_Ftmp_N[N + 1]; // Last element shows the final
int16_t *Q_Itmp_N = malloc(sizeof(int16_t) * (N + 1)); // array index assigned a value. int Q_Itmp_N[N + 1]; // array index assigned a value.
for (int i = 0; i <= N; i++) { for (int i = 0; i <= N; i++) {
Q_Ftmp_N[i] = -1; // Empty array. Q_Ftmp_N[i] = -1; // Empty array.
Q_Itmp_N[i] = -1; Q_Itmp_N[i] = -1;
} }
uint8_t flag; int limit;
uint16_t limit, ind;
if (E < N) { if (E < N) {
if ((K / (double)E) <= (7.0 / 16)) { // puncturing if ((K / (double)E) <= (7.0 / 16)) { // puncturing
for (int n = 0; n <= N - E - 1; n++) { for (int n = 0; n <= N - E - 1; n++) {
ind = Q_Ftmp_N[N] + 1; int ind = Q_Ftmp_N[N] + 1;
Q_Ftmp_N[ind] = J[n]; Q_Ftmp_N[ind] = J[n];
Q_Ftmp_N[N] = Q_Ftmp_N[N] + 1; Q_Ftmp_N[N] = Q_Ftmp_N[N] + 1;
} }
...@@ -279,21 +258,21 @@ void nr_polar_info_bit_pattern(uint8_t *ibp, ...@@ -279,21 +258,21 @@ void nr_polar_info_bit_pattern(uint8_t *ibp,
if ((E / (double)N) >= (3.0 / 4)) { if ((E / (double)N) >= (3.0 / 4)) {
limit = ceil((double)(3 * N - 2 * E) / 4); limit = ceil((double)(3 * N - 2 * E) / 4);
for (int n = 0; n <= limit - 1; n++) { for (int n = 0; n <= limit - 1; n++) {
ind = Q_Ftmp_N[N] + 1; int ind = Q_Ftmp_N[N] + 1;
Q_Ftmp_N[ind] = n; Q_Ftmp_N[ind] = n;
Q_Ftmp_N[N] = Q_Ftmp_N[N] + 1; Q_Ftmp_N[N] = Q_Ftmp_N[N] + 1;
} }
} else { } else {
limit = ceil((double)(9 * N - 4 * E) / 16); limit = ceil((double)(9 * N - 4 * E) / 16);
for (int n = 0; n <= limit - 1; n++) { for (int n = 0; n <= limit - 1; n++) {
ind = Q_Ftmp_N[N] + 1; int ind = Q_Ftmp_N[N] + 1;
Q_Ftmp_N[ind] = n; Q_Ftmp_N[ind] = n;
Q_Ftmp_N[N] = Q_Ftmp_N[N] + 1; Q_Ftmp_N[N] = Q_Ftmp_N[N] + 1;
} }
} }
} else { // shortening } else { // shortening
for (int n = E; n <= N - 1; n++) { for (int n = E; n <= N - 1; n++) {
ind = Q_Ftmp_N[N] + 1; int ind = Q_Ftmp_N[N] + 1;
Q_Ftmp_N[ind] = J[n]; Q_Ftmp_N[ind] = J[n];
Q_Ftmp_N[N] = Q_Ftmp_N[N] + 1; Q_Ftmp_N[N] = Q_Ftmp_N[N] + 1;
} }
...@@ -302,13 +281,12 @@ void nr_polar_info_bit_pattern(uint8_t *ibp, ...@@ -302,13 +281,12 @@ void nr_polar_info_bit_pattern(uint8_t *ibp,
// Q_I,tmp_N = Q_0_N-1 \ Q_F,tmp_N // Q_I,tmp_N = Q_0_N-1 \ Q_F,tmp_N
for (int n = 0; n <= N - 1; n++) { for (int n = 0; n <= N - 1; n++) {
flag = 1; bool flag = true;
for (int m = 0; m <= Q_Ftmp_N[N]; m++) { for (int m = 0; m <= Q_Ftmp_N[N]; m++)
if (Q_0_Nminus1[n] == Q_Ftmp_N[m]) { if (Q_0_Nminus1[n] == Q_Ftmp_N[m]) {
flag = 0; flag = false;
break; break;
} }
}
if (flag) { if (flag) {
Q_Itmp_N[Q_Itmp_N[N] + 1] = Q_0_Nminus1[n]; Q_Itmp_N[Q_Itmp_N[N] + 1] = Q_0_Nminus1[n];
Q_Itmp_N[N]++; Q_Itmp_N[N]++;
...@@ -317,7 +295,7 @@ void nr_polar_info_bit_pattern(uint8_t *ibp, ...@@ -317,7 +295,7 @@ void nr_polar_info_bit_pattern(uint8_t *ibp,
// Q_I_N comprises (K+n_PC) most reliable bit indices in Q_I,tmp_N // Q_I_N comprises (K+n_PC) most reliable bit indices in Q_I,tmp_N
for (int n = 0; n <= (K + n_PC) - 1; n++) { for (int n = 0; n <= (K + n_PC) - 1; n++) {
ind = Q_Itmp_N[N] + n - ((K + n_PC) - 1); int ind = Q_Itmp_N[N] + n - ((K + n_PC) - 1);
Q_I_N[n] = Q_Itmp_N[ind]; Q_I_N[n] = Q_Itmp_N[ind];
} }
...@@ -330,13 +308,12 @@ void nr_polar_info_bit_pattern(uint8_t *ibp, ...@@ -330,13 +308,12 @@ void nr_polar_info_bit_pattern(uint8_t *ibp,
// Q_F_N = Q_0_N-1 \ Q_I_N // Q_F_N = Q_0_N-1 \ Q_I_N
for (int n = 0; n <= N - 1; n++) { for (int n = 0; n <= N - 1; n++) {
flag = 1; bool flag = true;
for (int m = 0; m <= (K + n_PC) - 1; m++) { for (int m = 0; m <= (K + n_PC) - 1; m++)
if (Q_0_Nminus1[n] == Q_I_N[m]) { if (Q_0_Nminus1[n] == Q_I_N[m]) {
flag = 0; flag = false;
break; break;
} }
}
if (flag) { if (flag) {
Q_F_N[Q_F_N[N] + 1] = Q_0_Nminus1[n]; Q_F_N[Q_F_N[N] + 1] = Q_0_Nminus1[n];
Q_F_N[N]++; Q_F_N[N]++;
...@@ -361,11 +338,7 @@ void nr_polar_info_bit_pattern(uint8_t *ibp, ...@@ -361,11 +338,7 @@ void nr_polar_info_bit_pattern(uint8_t *ibp,
break; break;
} }
} }
} }
free(Q_Ftmp_N);
free(Q_Itmp_N);
} }
...@@ -391,22 +364,21 @@ void nr_polar_rate_matching_pattern(uint16_t *rmp, ...@@ -391,22 +364,21 @@ void nr_polar_rate_matching_pattern(uint16_t *rmp,
uint16_t N, uint16_t N,
uint16_t E) uint16_t E)
{ {
uint8_t i; uint16_t d[N];
uint16_t *d, ind; for (int m = 0; m < N; m++)
d = (uint16_t *)malloc(sizeof(uint16_t) * N); d[m] = m;
uint16_t* y = calloc(N, sizeof(uint16_t)); uint16_t y[N];
memset(y, 0, sizeof(y));
for (int m=0; m<=N-1; m++) d[m]=m;
for (int m=0; m<=N-1; m++){ for (int m=0; m<=N-1; m++){
i=floor((32*m)/N); int i = floor((32 * m) / N);
J[m] = (P_i_[i]*(N/32)) + (m%(N/32)); J[m] = (P_i_[i]*(N/32)) + (m%(N/32));
y[m] = d[J[m]]; y[m] = d[J[m]];
} }
if (E>=N) { //repetition if (E>=N) { //repetition
for (int k=0; k<=E-1; k++) { for (int k=0; k<=E-1; k++) {
ind = (k%N); int ind = (k % N);
rmp[k]=y[ind]; rmp[k]=y[ind];
} }
} else { } else {
...@@ -420,9 +392,6 @@ void nr_polar_rate_matching_pattern(uint16_t *rmp, ...@@ -420,9 +392,6 @@ void nr_polar_rate_matching_pattern(uint16_t *rmp,
} }
} }
} }
free(d);
free(y);
} }
...@@ -459,9 +428,9 @@ void nr_polar_rm_deinterleaving_cb(const int16_t *in, int16_t *out, const uint16 ...@@ -459,9 +428,9 @@ void nr_polar_rm_deinterleaving_cb(const int16_t *in, int16_t *out, const uint16
{ {
int T = ceil((sqrt(8 * E + 1) - 1) / 2); int T = ceil((sqrt(8 * E + 1) - 1) / 2);
int v_tab[T][T]; int v_tab[T][T];
memset(v_tab, 0, sizeof(v_tab));
int k = 0; int k = 0;
for (int i = 0; i < T; i++) { for (int i = 0; i < T; i++) {
memset(v_tab[i], 0, T * sizeof(int));
for (int j = 0; j < T - i; j++) { for (int j = 0; j < T - i; j++) {
if (k < E) { if (k < E) {
v_tab[i][j] = k + 1; v_tab[i][j] = k + 1;
...@@ -484,7 +453,7 @@ void nr_polar_rm_deinterleaving_cb(const int16_t *in, int16_t *out, const uint16 ...@@ -484,7 +453,7 @@ void nr_polar_rm_deinterleaving_cb(const int16_t *in, int16_t *out, const uint16
} }
k = 0; k = 0;
memset(out, 0, E * sizeof(int16_t)); memset(out, 0, E * sizeof(*out));
for (int i = 0; i < T; i++) { for (int i = 0; i < T; i++) {
for (int j = 0; j < T - i; j++) { for (int j = 0; j < T - i; j++) {
if (v[i][j] != INT_MAX) { if (v[i][j] != INT_MAX) {
...@@ -508,12 +477,12 @@ void nr_polar_rate_matching_int16(int16_t *input, ...@@ -508,12 +477,12 @@ void nr_polar_rate_matching_int16(int16_t *input,
} }
if (E >= N) { // repetition if (E >= N) { // repetition
memset((void *)output, 0, N * sizeof(int16_t)); memset(output, 0, N * sizeof(*output));
for (int i = 0; i <= E - 1; i++) for (int i = 0; i <= E - 1; i++)
output[rmp[i]] += input[i]; output[rmp[i]] += input[i];
} else { } else {
if ((K / (double)E) <= (7.0 / 16)) if ((K / (double)E) <= (7.0 / 16))
memset((void *)output, 0, N * sizeof(int16_t)); // puncturing memset(output, 0, N * sizeof(*output)); // puncturing
else { // shortening else { // shortening
for (int i = 0; i <= N - 1; i++) for (int i = 0; i <= N - 1; i++)
output[i] = 32767; // instead of INFINITY, to prevent [-Woverflow] output[i] = 32767; // instead of INFINITY, to prevent [-Woverflow]
......
...@@ -23,39 +23,34 @@ ...@@ -23,39 +23,34 @@
#include "PHY/CODING/nrPolar_tools/nr_polar_defs.h" #include "PHY/CODING/nrPolar_tools/nr_polar_defs.h"
void nr_polar_rate_matching_pattern(uint16_t *rmp, uint16_t *J, const uint8_t *P_i_, uint16_t K, uint16_t N, uint16_t E){ void nr_polar_rate_matching_pattern(uint16_t *rmp, uint16_t *J, const uint8_t *P_i_, uint16_t K, uint16_t N, uint16_t E){
int d[N];
int y[N];
uint8_t i; for (int m = 0; m <= N - 1; m++)
uint16_t *d, *y, ind; d[m] = m;
d = (uint16_t *)malloc(sizeof(uint16_t) * N);
y = (uint16_t *)malloc(sizeof(uint16_t) * N);
for (int m=0; m<=N-1; m++) d[m]=m; for (int m = 0; m <= N - 1; m++) {
i = floor((32 * m) / N);
for (int m=0; m<=N-1; m++){ J[m] = (P_i_[i] * (N / 32)) + (m % (N / 32));
i=floor((32*m)/N);
J[m] = (P_i_[i]*(N/32)) + (m%(N/32));
y[m] = d[J[m]]; y[m] = d[J[m]];
} }
if (E>=N) { //repetition if (E >= N) { // repetition
for (int k=0; k<=E-1; k++) { for (int k = 0; k <= E - 1; k++) {
ind = (k%N); ind = (k % N);
rmp[k]=y[ind]; rmp[k] = y[ind];
} }
} else { } else {
if ( (K/(double)E) <= (7.0/16) ) { //puncturing if ((K / (double)E) <= (7.0 / 16)) { // puncturing
for (int k=0; k<=E-1; k++) { for (int k = 0; k <= E - 1; k++) {
rmp[k]=y[k+N-E]; rmp[k] = y[k + N - E];
} }
} else { //shortening } else { // shortening
for (int k=0; k<=E-1; k++) { for (int k = 0; k <= E - 1; k++) {
rmp[k]=y[k]; rmp[k] = y[k];
} }
} }
} }
free(d);
free(y);
} }
......
...@@ -48,10 +48,7 @@ static void nr_polar_delete_list(t_nrPolar_params * polarParams) { ...@@ -48,10 +48,7 @@ static void nr_polar_delete_list(t_nrPolar_params * polarParams) {
nr_polar_delete_list(polarParams->nextPtr); nr_polar_delete_list(polarParams->nextPtr);
delete_decoder_tree(polarParams); delete_decoder_tree(polarParams);
//From build_polar_tables() // From build_polar_tables()
for (int n=0; n < polarParams->N; n++)
if (polarParams->G_N_tab[n])
free(polarParams->G_N_tab[n]);
free(polarParams->G_N_tab); free(polarParams->G_N_tab);
free(polarParams->rm_tab); free(polarParams->rm_tab);
if (polarParams->crc_generator_matrix) if (polarParams->crc_generator_matrix)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment