update the polar codec function that creates the internal LUTs

to use faster code - replaced hundreds mallocs by either local arrays or by the OAI multi-dimensionalarray (G_N_tab array) - made in SIMD a one bit per byte packing function to packed (ordinary) bits in bytes

update the polar codec function that creates the internal LUTs
to use faster code - replaced hundreds mallocs by either local arrays or by the OAI multi-dimensionalarray (G_N_tab array) - made in SIMD a one bit per byte packing function to packed (ordinary) bits in bytes
175e835d · Laurent THOMAS · 6debc5dd · 175e835d · 175e835d · 175e835d
Commit 175e835d authored Nov 23, 2023 by Laurent THOMAS
8 changed files
--- a/openair1/PHY/CODING/nrPolar_tools/nr_crc_byte.c
+++ b/openair1/PHY/CODING/nrPolar_tools/nr_crc_byte.c
@@ -30,27 +30,27 @@ const uint8_t **crc24c_generator_matrix(uint16_t payloadSizeBits)
  uint8_t temp1[crcPolynomialSize], temp2[crcPolynomialSize];
 	uint8_t **crc_generator_matrix = malloc(payloadSizeBits*sizeof(uint8_t *) + payloadSizeBits*crcPolynomialSize*sizeof(uint8_t));
-	if (crc_generator_matrix)
+  if (crc_generator_matrix)
-	  for (int i = 0; i < payloadSizeBits; i++)
+    for (int i = 0; i < payloadSizeBits; i++)
-	    crc_generator_matrix[i] = ((uint8_t*)&crc_generator_matrix[payloadSizeBits])+i*crcPolynomialSize;
+      crc_generator_matrix[i] = ((uint8_t *)&crc_generator_matrix[payloadSizeBits]) + i * crcPolynomialSize;
-	for (int i = 0; i < crcPolynomialSize; i++)
+  for (int i = 0; i < crcPolynomialSize; i++)
-	  crc_generator_matrix[payloadSizeBits-1][i]=crcPolynomialPattern[i+1];
+    crc_generator_matrix[payloadSizeBits - 1][i] = crcPolynomialPattern[i + 1];
-	for (int i = payloadSizeBits-2; i >= 0; i--){
+  for (int i = payloadSizeBits - 2; i >= 0; i--) {
 		for (int j = 0; j < crcPolynomialSize-1; j++) temp1[j]=crc_generator_matrix[i+1][j+1];
-		temp1[crcPolynomialSize-1]=0;
+    temp1[crcPolynomialSize - 1] = 0;
-		for (int j = 0; j < crcPolynomialSize; j++)
+    for (int j = 0; j < crcPolynomialSize; j++)
-			temp2[j]=crc_generator_matrix[i+1][0]*crcPolynomialPattern[j+1];
+      temp2[j] = crc_generator_matrix[i + 1][0] * crcPolynomialPattern[j + 1];
-		for (int j = 0; j < crcPolynomialSize; j++){
+    for (int j = 0; j < crcPolynomialSize; j++) {
 			if(temp1[j]+temp2[j] == 1)
 				crc_generator_matrix[i][j]=1;
 			else
 				crc_generator_matrix[i][j]=0;
-		}
+    }
-	}
+  }
  return (const uint8_t **)crc_generator_matrix;
 }
--- a/openair1/PHY/CODING/nrPolar_tools/nr_polar_decoder.c
+++ b/openair1/PHY/CODING/nrPolar_tools/nr_polar_decoder.c
@@ -28,7 +28,7 @@
 * \email raymond.knopp@eurecom.fr, turker.yilmaz@eurecom.fr
 * \note
 * \warning
-*/
+ */
 /*
 * Return values:
@@ -68,13 +68,13 @@ int8_t polar_decoder(double *input,
 {
  t_nrPolar_params *polarParams=nr_polar_params(messageType, messageLength, aggregation_level, true);
  //Assumes no a priori knowledge.
-  uint8_t bit[polarParams->N][polarParams->n+1][2*listSize];
+  uint8_t bit[polarParams->N][polarParams->n + 1][2 * listSize];
-  memset(bit,0,sizeof bit);
+  memset(bit, 0, sizeof bit);
  uint8_t bitUpdated[polarParams->N][polarParams->n+1]; //0=False, 1=True
  memset(bitUpdated,0,sizeof bitUpdated);
  uint8_t llrUpdated[polarParams->N][polarParams->n+1]; //0=False, 1=True
  memset(llrUpdated,0,sizeof llrUpdated);
-  double  llr[polarParams->N][polarParams->n+1][2*listSize];
+  double llr[polarParams->N][polarParams->n + 1][2 * listSize];
  uint8_t crcChecksum[polarParams->crcParityBits][2*listSize];
  memset(crcChecksum,0,sizeof crcChecksum);
  double  pathMetric[2*listSize];
@@ -127,7 +127,8 @@ int8_t polar_decoder(double *input,
  double d_tilde[polarParams->N];
  nr_polar_rate_matching(input, d_tilde, polarParams->rate_matching_pattern, polarParams->K, polarParams->N, polarParams->encoderLength);
-  for (int j = 0; j < polarParams->N; j++) llr[j][polarParams->n][0]=d_tilde[j];
+  for (int j = 0; j < polarParams->N; j++)
+    llr[j][polarParams->n][0] = d_tilde[j];
  /*
   * SCL polar decoder.
@@ -139,48 +140,56 @@ int8_t polar_decoder(double *input,
  uint8_t listIndex[2*listSize], copyIndex;
  for (uint16_t currentBit=0; currentBit<polarParams->N; currentBit++) {
-    updateLLR(currentListSize, currentBit, 0, polarParams->N, polarParams->n+1, 2*listSize, llr, llrUpdated, bit, bitUpdated);
+    updateLLR(currentListSize, currentBit, 0, polarParams->N, polarParams->n + 1, 2 * listSize, llr, llrUpdated, bit, bitUpdated);
    if (polarParams->information_bit_pattern[currentBit]==0) { //Frozen bit.
-      updatePathMetric(pathMetric, currentListSize, 0, currentBit, polarParams->N, polarParams->n+1, 2*listSize, llr);
+      updatePathMetric(pathMetric, currentListSize, 0, currentBit, polarParams->N, polarParams->n + 1, 2 * listSize, llr);
    } else { //Information or CRC bit.
-      updatePathMetric2(pathMetric, currentListSize, currentBit, polarParams->N, polarParams->n+1, 2*listSize, llr);
+      updatePathMetric2(pathMetric, currentListSize, currentBit, polarParams->N, polarParams->n + 1, 2 * listSize, llr);
      for (int i = 0; i < currentListSize; i++) {
-      for (int j = 0; j < polarParams->N; j++) {
+        for (int j = 0; j < polarParams->N; j++) {
-	for (int k = 0; k < (polarParams->n+1); k++) {
+          for (int k = 0; k < (polarParams->n + 1); k++) {
-            bit[j][k][i+currentListSize]=bit[j][k][i];
+            bit[j][k][i + currentListSize] = bit[j][k][i];
-            llr[j][k][i+currentListSize]=llr[j][k][i];
+            llr[j][k][i + currentListSize] = llr[j][k][i];
          }
-	}
+        }
      }
-	for (int i = 0; i < currentListSize; i++) {
+      for (int i = 0; i < currentListSize; i++) {
-	  bit[currentBit][0][i]=0;
+        bit[currentBit][0][i] = 0;
-	  crcState[i+currentListSize]=crcState[i];
+        crcState[i + currentListSize] = crcState[i];
-	}
+      }
-      for (int i = currentListSize; i < 2*currentListSize; i++) bit[currentBit][0][i]=1;
-      bitUpdated[currentBit][0]=1;
-      updateCrcChecksum2(polarParams->crcParityBits, 2*listSize, crcChecksum,
-			 polarParams->K, polarParams->crcParityBits, extended_crc_generator_matrix, 
-                         currentListSize, nonFrozenBit, polarParams->crcParityBits);
-      currentListSize*=2;
-      //Keep only the best "listSize" number of entries.
+      for (int i = currentListSize; i < 2 * currentListSize; i++)
+        bit[currentBit][0][i] = 1;
+      bitUpdated[currentBit][0] = 1;
+      updateCrcChecksum2(polarParams->crcParityBits,
+                         2 * listSize,
+                         crcChecksum,
+                         polarParams->K,
+                         polarParams->crcParityBits,
+                         extended_crc_generator_matrix,
+                         currentListSize,
+                         nonFrozenBit,
+                         polarParams->crcParityBits);
+      currentListSize *= 2;
+      // Keep only the best "listSize" number of entries.
      if (currentListSize > listSize) {
-        for (uint8_t i = 0; i < 2*listSize; i++) listIndex[i]=i;
+        for (uint8_t i = 0; i < 2 * listSize; i++)
+          listIndex[i] = i;
        nr_sort_asc_double_1D_array_ind(pathMetric, listIndex, currentListSize);
-        //sort listIndex[listSize, ..., 2*listSize-1] in descending order.
+        // sort listIndex[listSize, ..., 2*listSize-1] in descending order.
        uint8_t swaps, tempInd;
        for (uint8_t i = 0; i < listSize; i++) {
          swaps = 0;
-          for (uint8_t j = listSize; j < (2*listSize - i) - 1; j++) {
+          for (uint8_t j = listSize; j < (2 * listSize - i) - 1; j++) {
-            if (listIndex[j+1] > listIndex[j]) {
+            if (listIndex[j + 1] > listIndex[j]) {
              tempInd = listIndex[j];
              listIndex[j] = listIndex[j + 1];
              listIndex[j + 1] = tempInd;
@@ -196,8 +205,8 @@ int8_t polar_decoder(double *input,
        for (int k=(listSize-1); k>0; k--) {
          for (int i=0; i<polarParams->N; i++) {
            for (int j=0; j<(polarParams->n+1); j++) {
-              bit[i][j][listIndex[(2*listSize-1)-k]]=bit[i][j][listIndex[k]];
+              bit[i][j][listIndex[(2 * listSize - 1) - k]] = bit[i][j][listIndex[k]];
-              llr[i][j][listIndex[(2*listSize-1)-k]]=llr[i][j][listIndex[k]];
+              llr[i][j][listIndex[(2 * listSize - 1) - k]] = llr[i][j][listIndex[k]];
            }
          }
        }
@@ -285,7 +294,8 @@ int8_t polar_decoder(double *input,
  for (uint8_t i = 0; i < fmin(listSize, (pow(2,polarParams->crcCorrectionBits)) ); i++) {
    if ( crcState[listIndex[i]] == 1 ) {
-      for (int j = 0; j < polarParams->N; j++) polarParams->nr_polar_U[j]=bit[j][0][listIndex[i]];
+      for (int j = 0; j < polarParams->N; j++)
+        polarParams->nr_polar_U[j] = bit[j][0][listIndex[i]];
      //Extract the information bits (û to ĉ)
      nr_polar_info_bit_extraction(polarParams->nr_polar_U, polarParams->nr_polar_CPrime, polarParams->information_bit_pattern, polarParams->N);
@@ -626,7 +636,7 @@ uint32_t polar_decoder_int16(int16_t *input,
  printf("\n");
 #endif
-  int16_t d_tilde[polarParams->N];// = malloc(sizeof(double) * polarParams->N);
+  int16_t d_tilde[polarParams->N];
  nr_polar_rate_matching_int16(input, d_tilde, polarParams->rate_matching_pattern, polarParams->K, polarParams->N, polarParams->encoderLength, polarParams->i_bil);
  for (int i=0; i<polarParams->N; i++) {
@@ -687,14 +697,9 @@ uint32_t polar_decoder_int16(int16_t *input,
  uint64_t B[4] = {0};
  if (polarParams->K<65) {
-    B[0] = polarParams->B_tab0[0][Cprimebyte[0]] |
+    B[0] = polarParams->B_tab0[0][Cprimebyte[0]] | polarParams->B_tab0[1][Cprimebyte[1]] | polarParams->B_tab0[2][Cprimebyte[2]]
-           polarParams->B_tab0[1][Cprimebyte[1]] |
+           | polarParams->B_tab0[3][Cprimebyte[3]] | polarParams->B_tab0[4][Cprimebyte[4]] | polarParams->B_tab0[5][Cprimebyte[5]]
-           polarParams->B_tab0[2][Cprimebyte[2]] |
+           | polarParams->B_tab0[6][Cprimebyte[6]] | polarParams->B_tab0[7][Cprimebyte[7]];
-           polarParams->B_tab0[3][Cprimebyte[3]] |
-           polarParams->B_tab0[4][Cprimebyte[4]] |
-           polarParams->B_tab0[5][Cprimebyte[5]] |
-           polarParams->B_tab0[6][Cprimebyte[6]] |
-           polarParams->B_tab0[7][Cprimebyte[7]];
  } else if (polarParams->K<129) {
    int len = polarParams->K/8;

--- a/openair1/PHY/CODING/nrPolar_tools/nr_polar_decoding_tools.c
+++ b/openair1/PHY/CODING/nrPolar_tools/nr_polar_decoding_tools.c
@@ -45,20 +45,23 @@ static inline void updateBit(uint8_t listSize,
 			     uint8_t bit[xlen][ylen][zlen],
 			     uint8_t bitU[xlen][ylen])
 {
-	uint16_t offset = ( xlen/(pow(2,(ylen-col))) );
+  uint16_t offset = (xlen / (pow(2, (ylen - col))));
-	for (uint8_t i=0; i<listSize; i++) {
+  for (uint8_t i = 0; i < listSize; i++) {
-		if (( (row) % (2*offset) ) >= offset ) {
+    if (((row) % (2 * offset)) >= offset) {
-		  if (bitU[row][col-1]==0) updateBit(listSize, row, (col-1), xlen, ylen, zlen, bit, bitU);
+      if (bitU[row][col - 1] == 0)
-			bit[row][col][i] = bit[row][col-1][i];
+        updateBit(listSize, row, (col - 1), xlen, ylen, zlen, bit, bitU);
-		} else {
+      bit[row][col][i] = bit[row][col - 1][i];
-		  if (bitU[row][col-1]==0) updateBit(listSize, row, (col-1), xlen, ylen, zlen, bit, bitU);
+    } else {
-		  if (bitU[row+offset][col-1]==0) updateBit(listSize, (row+offset), (col-1), xlen, ylen, zlen, bit, bitU);
+      if (bitU[row][col - 1] == 0)
-			bit[row][col][i] = ( (bit[row][col-1][i]+bit[row+offset][col-1][i]) % 2);
+        updateBit(listSize, row, (col - 1), xlen, ylen, zlen, bit, bitU);
-		}
+      if (bitU[row + offset][col - 1] == 0)
-	}
+        updateBit(listSize, (row + offset), (col - 1), xlen, ylen, zlen, bit, bitU);
+      bit[row][col][i] = ((bit[row][col - 1][i] + bit[row + offset][col - 1][i]) % 2);
+    }
+  }
-	bitU[row][col]=1;
+  bitU[row][col] = 1;
 }
 static inline void computeLLR(uint16_t row,
@@ -75,35 +78,38 @@ static inline void computeLLR(uint16_t row,
  llr[row][col][i] = log((exp(a + b) + 1) / (exp(a) + exp(b))); //eq. (8a)
 }
 void updateLLR(uint8_t listSize,
-	       uint16_t row,
+               uint16_t row,
-	       uint16_t col,
+               uint16_t col,
-	        uint16_t xlen,
+               uint16_t xlen,
-	       uint8_t ylen,
+               uint8_t ylen,
-	       int zlen,
+               int zlen,
-	       double  llr[xlen][ylen][zlen],
+               double llr[xlen][ylen][zlen],
-	       uint8_t llrU[xlen][ylen],
+               uint8_t llrU[xlen][ylen],
-	       uint8_t bit[xlen][ylen][zlen],
+               uint8_t bit[xlen][ylen][zlen],
-	       uint8_t bitU[xlen][ylen]
+               uint8_t bitU[xlen][ylen])
-	       )
 {
-	uint16_t offset = (xlen/(pow(2,(ylen-col-1))));
+  uint16_t offset = (xlen / (pow(2, (ylen - col - 1))));
-	for (uint8_t i=0; i<listSize; i++) {
+  for (uint8_t i = 0; i < listSize; i++) {
-		if (( (row) % (2*offset) ) >= offset ) {
+    if ((row % (2 * offset)) >= offset) {
-		  if(bitU[row-offset][col]==0) updateBit(listSize, (row-offset), col, xlen, ylen, zlen, bit, bitU);
+      if (bitU[row - offset][col] == 0)
-		  if(llrU[row-offset][col+1]==0) updateLLR(listSize, (row-offset), (col+1), xlen, ylen, zlen, llr, llrU, bit, bitU );
+        updateBit(listSize, (row - offset), col, xlen, ylen, zlen, bit, bitU);
-		  if(llrU[row][col+1]==0) updateLLR(listSize, row, (col+1), xlen, ylen, zlen, llr, llrU, bit, bitU);
+      if (llrU[row - offset][col + 1] == 0)
-			llr[row][col][i] = (pow((-1),bit[row-offset][col][i])*llr[row-offset][col+1][i]) + llr[row][col+1][i];
+        updateLLR(listSize, (row - offset), (col + 1), xlen, ylen, zlen, llr, llrU, bit, bitU);
-		} else {
+      if (llrU[row][col + 1] == 0)
-		  if(llrU[row][col+1]==0) updateLLR(listSize, row, (col+1), xlen, ylen, zlen, llr, llrU, bit, bitU );
+        updateLLR(listSize, row, (col + 1), xlen, ylen, zlen, llr, llrU, bit, bitU);
-		  if(llrU[row+offset][col+1]==0) updateLLR(listSize, (row+offset), (col+1), xlen, ylen, zlen, llr, llrU, bit, bitU );
+      llr[row][col][i] = (pow((-1), bit[row - offset][col][i]) * llr[row - offset][col + 1][i]) + llr[row][col + 1][i];
-		  computeLLR(row, col, i, offset, xlen, ylen, zlen, llr);
+    } else {
-		}
+      if (llrU[row][col + 1] == 0)
-	}
+        updateLLR(listSize, row, (col + 1), xlen, ylen, zlen, llr, llrU, bit, bitU);
-	llrU[row][col]=1;
+      if (llrU[row + offset][col + 1] == 0)
+        updateLLR(listSize, (row + offset), (col + 1), xlen, ylen, zlen, llr, llrU, bit, bitU);
+      computeLLR(row, col, i, offset, xlen, ylen, zlen, llr);
+    }
+  }
+  llrU[row][col] = 1;
-	//	printf("LLR (a %f, b %f): llr[%d][%d] %f\n",32*a,32*b,col,row,32*llr[col][row]);
+  //	printf("LLR (a %f, b %f): llr[%d][%d] %f\n",32*a,32*b,col,row,32*llr[col][row]);
 }
 void updatePathMetric(double *pathMetric,

--- a/openair1/PHY/CODING/nrPolar_tools/nr_polar_defs.h
+++ b/openair1/PHY/CODING/nrPolar_tools/nr_polar_defs.h
@@ -112,7 +112,7 @@ struct nrPolar_params {
  const uint8_t **crc_generator_matrix; // G_P
  const uint8_t **G_N;
-  uint64_t **G_N_tab;
+  fourDimArray_t *G_N_tab;
  int groupsize;
  int *rm_tab;
  uint64_t cprime_tab0[32][256];
@@ -219,9 +219,7 @@ uint32_t nr_polar_output_length(uint16_t K,
                                uint16_t E,
                                uint8_t n_max);
-void nr_polar_channel_interleaver_pattern(uint16_t *cip,
+void nr_polar_channel_interleaver_pattern(uint16_t *cip, const uint8_t I_BIL, const uint16_t E);
-    uint8_t I_BIL,
-    uint16_t E);
 void nr_polar_rate_matching_pattern(uint16_t *rmp,
                                    uint16_t *J,
@@ -258,7 +256,7 @@ void nr_polar_info_bit_pattern(uint8_t *ibp,
                               const uint16_t *Q_0_Nminus1,
                               uint16_t K,
                               uint16_t N,
-                               uint16_t E,
+                               const uint16_t E,
                               uint8_t n_PC,
                               uint8_t n_pc_wm);
@@ -284,7 +282,7 @@ void nr_polar_generate_u(uint64_t *u,
                         uint16_t N,
                         uint8_t n_pc);
-void nr_polar_uxG(uint64_t *D, const uint64_t *u, const uint64_t **G_N_tab, uint16_t N);
+void nr_polar_uxG(uint64_t *D, const uint64_t *u, const fourDimArray_t *G_N_tab, uint16_t N);
 void nr_polar_info_extraction_from_u(uint64_t *Cprime,
                                     const uint8_t *u,

--- a/openair1/PHY/CODING/nrPolar_tools/nr_polar_encoder.c
+++ b/openair1/PHY/CODING/nrPolar_tools/nr_polar_encoder.c
@@ -359,15 +359,11 @@ void build_polar_tables(t_nrPolar_params *polarParams) {
  AssertFatal(polarParams->K > 17, "K = %d < 18, is not possible\n",polarParams->K);
  AssertFatal(polarParams->K < 129, "K = %d > 128, is not supported yet\n",polarParams->K);
  int bit_i,ip;
-  int numbytes = polarParams->K>>3;
+  const int numbytes = (polarParams->K+7)/8;
-  int residue = polarParams->K&7;
+  const int residue = polarParams->K&7;
-  int numbits;
-  if (residue>0) numbytes++;
  for (int byte=0; byte<numbytes; byte++) {
-    if (byte<(polarParams->K>>3)) numbits=8;
+    int numbits = byte<(polarParams->K>>3) ? 8 : residue;
-    else numbits=residue;
    for (int val=0; val<256; val++) {
      polarParams->cprime_tab0[byte][val] = 0;
@@ -388,16 +384,20 @@ void build_polar_tables(t_nrPolar_params *polarParams) {
  AssertFatal(polarParams->N == 512 || polarParams->N == 256 || polarParams->N == 128 || polarParams->N == 64, "N = %d, not done yet\n", polarParams->N);
  // build G bit vectors for information bit positions and convert the bit as bytes tables in nr_polar_kronecker_power_matrices.c to
-  // 64 bit packed vectors.
+  // 64 bit packed vectors. 
-  polarParams->G_N_tab = (uint64_t **)calloc(polarParams->N, sizeof(int64_t *));
+  // Truncates id N%64 != 0
+  allocCast2D(pp, uint64_t, polarParams->G_N_tab, polarParams->N, polarParams->N / 64, false);
+  simde__m256i zeros = simde_mm256_setzero_si256();
+  // this code packs the one bit per byte of G_N into a packed bits G_N_tab
  for (int i = 0; i < polarParams->N; i++) {
-    polarParams->G_N_tab[i] = (uint64_t *)memalign(32, (polarParams->N / 64) * sizeof(uint64_t));
+    for (int j = 0; j < polarParams->N; j += 64) {
-    memset((void *)polarParams->G_N_tab[i], 0, (polarParams->N / 64) * sizeof(uint64_t));
+      const simde__m256i tmp1 = simde_mm256_cmpgt_epi8(*(simde__m256i *)&polarParams->G_N[i][j], zeros);
+      const simde__m256i tmp2 = simde_mm256_cmpgt_epi8(*(simde__m256i *)&polarParams->G_N[i][j + 32], zeros);
-    for (int j = 0; j < polarParams->N; j++)
+      // cast directly to uint64_t from int32_t propagates the sign bit (in gcc)
-      polarParams->G_N_tab[i][j / 64] |= ((uint64_t)polarParams->G_N[i][j]) << (j & 63);
+      const uint32_t part1 = simde_mm256_movemask_epi8(tmp1);
+      const uint32_t part2 = simde_mm256_movemask_epi8(tmp2);
+      pp[i][j / 64] = ((uint64_t)part2 << 32) | part1;
+    }
 #ifdef DEBUG_POLAR_ENCODER
    printf("Bit %d Selecting row %d of G : ", i, i);
@@ -519,17 +519,13 @@ void polar_encoder_fast(uint64_t *A,
  //int bitlen0=bitlen;
 #ifdef POLAR_CODING_DEBUG
-  int A_array = (bitlen + 63) >> 6;
  printf("\nTX\n");
  printf("a: ");
-  for (int n = 0; n < bitlen; n++) {
+  for (int n = (bitlen + 63)/64 ; n >=0; n--) {
-    if (n % 4 == 0) {
+    if (n % 4 == 0) 
-      printf(" ");
+      printf(" "); 
-    }
+    if (n < bitlen) 
-    int n1 = n >> 6;
+      printf("%lu", (A[n/64] >> (n%64)) & 1);
-    int n2 = n - (n1 << 6);
-    int alen = n1 == 0 ? bitlen - (A_array << 6) : 64;
-    printf("%lu", (A[A_array - 1 - n1] >> (alen - 1 - n2)) & 1);
  }
  printf("\n");
 #endif
@@ -692,7 +688,7 @@ void polar_encoder_fast(uint64_t *A,
 #endif
  uint64_t D[8] = {0};
-  nr_polar_uxG(D, u, (const uint64_t **)polarParams->G_N_tab, polarParams->N);
+  nr_polar_uxG(D, u, polarParams->G_N_tab, polarParams->N);
 #ifdef POLAR_CODING_DEBUG
  printf("d: ");

--- a/openair1/PHY/CODING/nrPolar_tools/nr_polar_procedures.c
+++ b/openair1/PHY/CODING/nrPolar_tools/nr_polar_procedures.c
@@ -119,22 +119,19 @@ void nr_polar_info_extraction_from_u(uint64_t *Cprime,
  }
 }
-void nr_polar_uxG(uint64_t *D, const uint64_t *u, const uint64_t **G_N_tab, uint16_t N)
+void nr_polar_uxG(uint64_t *D, const uint64_t *u, const fourDimArray_t *G_N_tab, uint16_t N)
 {
-  int N_array = N >> 6;
+  const int N64 = N / 64;
+  cast2Darray(g_n, uint64_t, G_N_tab);
  for (int n = 0; n < N; n++) {
-    const uint64_t *Gn = G_N_tab[N - 1 - n];
+    const uint64_t *Gn = g_n[N - 1 - n];
    int n_ones = 0;
-    for (int a = 0; a < N_array; a++) {
+    for (int a = 0; a < N64; a++)
-      uint64_t uxG = u[a] & Gn[a];
+      n_ones += count_bits_set(u[a] & Gn[a]);
-      if (uxG != 0)
-        n_ones += count_bits_set(uxG);
-    }
-    int n1 = n >> 6;
+    int n1 = n / 64;
-    int n2 = n - (n1 << 6);
+    int n2 = n - (n1 * 64);
    D[n1] |= ((uint64_t)n_ones & 1) << n2;
  }
 }
@@ -147,8 +144,7 @@ void nr_polar_bit_insertion(uint8_t *input,
 			    int16_t *Q_PC_N,
 			    uint8_t n_PC)
 {
-  uint16_t k=0;
+  int k = 0;
-  uint8_t flag;
  if (n_PC>0) {
    /*
@@ -156,22 +152,16 @@ void nr_polar_bit_insertion(uint8_t *input,
     */
  } else {
    for (int n=0; n<=N-1; n++) {
-      flag=0;
+      output[n] = 0;
      for (int m=0; m<=(K+n_PC)-1; m++) {
 	if ( n == Q_I_N[m]) {
-	  flag=1;
+          output[n] = input[k];
-	  break;
+          k++;
-	}
+          break;
-      }
+  }
-      if (flag) { // n ϵ Q_I_N
-	output[n]=input[k];
-	k++;
-      } else {
-	output[n] = 0;
      }
    }
  }
 }
@@ -179,7 +169,7 @@ uint32_t nr_polar_output_length(uint16_t K,
 				uint16_t E,
 				uint8_t n_max)
 {
-  uint8_t n_1, n_2, n_min=5, n;
+  int n_1, n_2, n_min = 5;
  double R_min=1.0/8;
  if ( (E <= (9.0/8)*pow(2,ceil(log2(E))-1)) && (K/E < 9.0/16) ) {
@@ -189,8 +179,8 @@ uint32_t nr_polar_output_length(uint16_t K,
  }
  n_2 = ceil(log2(K/R_min));
-  n=n_max;
+  int n = n_max;
  if (n>n_1) n=n_1;
  if (n>n_2) n=n_2;
  if (n<n_min) n=n_min;
@@ -201,19 +191,14 @@ uint32_t nr_polar_output_length(uint16_t K,
  return ((uint32_t) pow(2.0,n)); //=polar_code_output_length
 }
+void nr_polar_channel_interleaver_pattern(uint16_t *cip, const uint8_t I_BIL, const uint16_t E)
-void nr_polar_channel_interleaver_pattern(uint16_t *cip,
-					  uint8_t I_BIL,
-					  uint16_t E)
 {
  if (I_BIL == 1) {
-    uint16_t T=0, k;
+    int T = E;
    while( ((T/2)*(T+1)) < E ) T++;
-    int16_t **v = malloc(T * sizeof(*v));
+    int16_t v[T][T];
-    for (int i = 0; i <= T-1; i++) v[i] = malloc((T-i) * sizeof(*(v[i])));
+    int k = 0;
-    k=0;
    for (int i = 0; i <= T-1; i++) {
      for (int j = 0; j <= (T-1)-i; j++) {
 	if (k<E) {
@@ -234,16 +219,11 @@ void nr_polar_channel_interleaver_pattern(uint16_t *cip,
 	}
      }
    }
-    for (int i = 0; i <= T-1; i++) free(v[i]);
-    free(v);
  } else {
    for (int i=0; i<=E-1; i++) cip[i]=i;
  }
 }
 void nr_polar_info_bit_pattern(uint8_t *ibp,
                               uint8_t *pcbp,
                               int16_t *Q_I_N,
@@ -253,47 +233,46 @@ void nr_polar_info_bit_pattern(uint8_t *ibp,
                               const uint16_t *Q_0_Nminus1,
                               uint16_t K,
                               uint16_t N,
-                               uint16_t E,
+                               const uint16_t E,
                               uint8_t n_PC,
                               uint8_t n_pc_wm)
 {
-  int16_t *Q_Ftmp_N = malloc(sizeof(int16_t) * (N + 1)); // Last element shows the final
+  int Q_Ftmp_N[N + 1]; // Last element shows the final
-  int16_t *Q_Itmp_N = malloc(sizeof(int16_t) * (N + 1)); // array index assigned a value.
+  int Q_Itmp_N[N + 1]; // array index assigned a value.
  for (int i = 0; i <= N; i++) {
    Q_Ftmp_N[i] = -1; // Empty array.
    Q_Itmp_N[i] = -1;
  }
-  uint8_t flag;
+  int limit;
-  uint16_t limit, ind;
  if (E < N) {
    if ((K / (double)E) <= (7.0 / 16)) { // puncturing
      for (int n = 0; n <= N - E - 1; n++) {
-        ind = Q_Ftmp_N[N] + 1;
+  int ind = Q_Ftmp_N[N] + 1;
-        Q_Ftmp_N[ind] = J[n];
+  Q_Ftmp_N[ind] = J[n];
-        Q_Ftmp_N[N] = Q_Ftmp_N[N] + 1;
+  Q_Ftmp_N[N] = Q_Ftmp_N[N] + 1;
      }
      if ((E / (double)N) >= (3.0 / 4)) {
        limit = ceil((double)(3 * N - 2 * E) / 4);
        for (int n = 0; n <= limit - 1; n++) {
-          ind = Q_Ftmp_N[N] + 1;
+    int ind = Q_Ftmp_N[N] + 1;
-          Q_Ftmp_N[ind] = n;
+    Q_Ftmp_N[ind] = n;
-          Q_Ftmp_N[N] = Q_Ftmp_N[N] + 1;
+    Q_Ftmp_N[N] = Q_Ftmp_N[N] + 1;
        }
      } else {
        limit = ceil((double)(9 * N - 4 * E) / 16);
        for (int n = 0; n <= limit - 1; n++) {
-          ind = Q_Ftmp_N[N] + 1;
+    int ind = Q_Ftmp_N[N] + 1;
-          Q_Ftmp_N[ind] = n;
+    Q_Ftmp_N[ind] = n;
-          Q_Ftmp_N[N] = Q_Ftmp_N[N] + 1;
+    Q_Ftmp_N[N] = Q_Ftmp_N[N] + 1;
        }
      }
    } else { // shortening
      for (int n = E; n <= N - 1; n++) {
-        ind = Q_Ftmp_N[N] + 1;
+        int ind = Q_Ftmp_N[N] + 1;
        Q_Ftmp_N[ind] = J[n];
        Q_Ftmp_N[N] = Q_Ftmp_N[N] + 1;
      }
@@ -302,13 +281,12 @@ void nr_polar_info_bit_pattern(uint8_t *ibp,
  // Q_I,tmp_N = Q_0_N-1 \ Q_F,tmp_N
  for (int n = 0; n <= N - 1; n++) {
-    flag = 1;
+    bool flag = true;
-    for (int m = 0; m <= Q_Ftmp_N[N]; m++) {
+    for (int m = 0; m <= Q_Ftmp_N[N]; m++)
      if (Q_0_Nminus1[n] == Q_Ftmp_N[m]) {
-        flag = 0;
+        flag = false;
        break;
      }
-    }
    if (flag) {
      Q_Itmp_N[Q_Itmp_N[N] + 1] = Q_0_Nminus1[n];
      Q_Itmp_N[N]++;
@@ -317,7 +295,7 @@ void nr_polar_info_bit_pattern(uint8_t *ibp,
  // Q_I_N comprises (K+n_PC) most reliable bit indices in Q_I,tmp_N
  for (int n = 0; n <= (K + n_PC) - 1; n++) {
-    ind = Q_Itmp_N[N] + n - ((K + n_PC) - 1);
+    int ind = Q_Itmp_N[N] + n - ((K + n_PC) - 1);
    Q_I_N[n] = Q_Itmp_N[ind];
  }
@@ -330,13 +308,12 @@ void nr_polar_info_bit_pattern(uint8_t *ibp,
  // Q_F_N = Q_0_N-1 \ Q_I_N
  for (int n = 0; n <= N - 1; n++) {
-    flag = 1;
+    bool flag = true;
-    for (int m = 0; m <= (K + n_PC) - 1; m++) {
+    for (int m = 0; m <= (K + n_PC) - 1; m++)
      if (Q_0_Nminus1[n] == Q_I_N[m]) {
-        flag = 0;
+        flag = false;
        break;
      }
-    }
    if (flag) {
      Q_F_N[Q_F_N[N] + 1] = Q_0_Nminus1[n];
      Q_F_N[N]++;
@@ -361,11 +338,7 @@ void nr_polar_info_bit_pattern(uint8_t *ibp,
        break;
      }
    }
  }
-  free(Q_Ftmp_N);
-  free(Q_Itmp_N);
 }
@@ -391,22 +364,21 @@ void nr_polar_rate_matching_pattern(uint16_t *rmp,
 				    uint16_t N,
 				    uint16_t E)
 {
-  uint8_t i;
+  uint16_t d[N];
-  uint16_t *d, ind;
+  for (int m = 0; m < N; m++)
-  d = (uint16_t *)malloc(sizeof(uint16_t) * N);
+    d[m] = m;
-  uint16_t* y = calloc(N, sizeof(uint16_t));
+  uint16_t y[N];
+  memset(y, 0, sizeof(y));
-  for (int m=0; m<=N-1; m++) d[m]=m;
  for (int m=0; m<=N-1; m++){
-    i=floor((32*m)/N);
+    int i = floor((32 * m) / N);
    J[m] = (P_i_[i]*(N/32)) + (m%(N/32));
    y[m] = d[J[m]];
  }
  if (E>=N) { //repetition
    for (int k=0; k<=E-1; k++) {
-      ind = (k%N);
+      int ind = (k % N);
      rmp[k]=y[ind];
    }
  } else {
@@ -420,9 +392,6 @@ void nr_polar_rate_matching_pattern(uint16_t *rmp,
      }
    }
  }
-  free(d);
-  free(y);
 }
@@ -459,9 +428,9 @@ void nr_polar_rm_deinterleaving_cb(const int16_t *in, int16_t *out, const uint16
 {
  int T = ceil((sqrt(8 * E + 1) - 1) / 2);
  int v_tab[T][T];
+  memset(v_tab, 0, sizeof(v_tab));
  int k = 0;
  for (int i = 0; i < T; i++) {
-    memset(v_tab[i], 0, T * sizeof(int));
    for (int j = 0; j < T - i; j++) {
      if (k < E) {
        v_tab[i][j] = k + 1;
@@ -484,7 +453,7 @@ void nr_polar_rm_deinterleaving_cb(const int16_t *in, int16_t *out, const uint16
  }
  k = 0;
-  memset(out, 0, E * sizeof(int16_t));
+  memset(out, 0, E * sizeof(*out));
  for (int i = 0; i < T; i++) {
    for (int j = 0; j < T - i; j++) {
      if (v[i][j] != INT_MAX) {
@@ -508,12 +477,12 @@ void nr_polar_rate_matching_int16(int16_t *input,
  }
  if (E >= N) { // repetition
-    memset((void *)output, 0, N * sizeof(int16_t));
+    memset(output, 0, N * sizeof(*output));
    for (int i = 0; i <= E - 1; i++)
      output[rmp[i]] += input[i];
  } else {
    if ((K / (double)E) <= (7.0 / 16))
-      memset((void *)output, 0, N * sizeof(int16_t)); // puncturing
+      memset(output, 0, N * sizeof(*output)); // puncturing
    else { // shortening
      for (int i = 0; i <= N - 1; i++)
        output[i] = 32767; // instead of INFINITY, to prevent [-Woverflow]

--- a/openair1/PHY/CODING/nrPolar_tools/nr_polar_rate_match.c
+++ b/openair1/PHY/CODING/nrPolar_tools/nr_polar_rate_match.c
@@ -23,39 +23,34 @@
 #include "PHY/CODING/nrPolar_tools/nr_polar_defs.h"
 void nr_polar_rate_matching_pattern(uint16_t *rmp, uint16_t *J, const uint8_t *P_i_, uint16_t K, uint16_t N, uint16_t E){
+  int d[N];
-	uint8_t i;
+  int y[N];
-	uint16_t *d, *y, ind;
-	d = (uint16_t *)malloc(sizeof(uint16_t) * N);
+  for (int m = 0; m <= N - 1; m++)
-	y = (uint16_t *)malloc(sizeof(uint16_t) * N);
+    d[m] = m;
-	for (int m=0; m<=N-1; m++) d[m]=m;
+  for (int m = 0; m <= N - 1; m++) {
+    i = floor((32 * m) / N);
-	for (int m=0; m<=N-1; m++){
+    J[m] = (P_i_[i] * (N / 32)) + (m % (N / 32));
-		i=floor((32*m)/N);
+    y[m] = d[J[m]];
-		J[m] = (P_i_[i]*(N/32)) + (m%(N/32));
+  }
-		y[m] = d[J[m]];
-	}
+  if (E >= N) { // repetition
+    for (int k = 0; k <= E - 1; k++) {
-	if (E>=N) { //repetition
+      ind = (k % N);
-		for (int k=0; k<=E-1; k++) {
+      rmp[k] = y[ind];
-			ind = (k%N);
+    }
-			rmp[k]=y[ind];
+  } else {
-		}
+    if ((K / (double)E) <= (7.0 / 16)) { // puncturing
-	} else {
+      for (int k = 0; k <= E - 1; k++) {
-		if ( (K/(double)E) <= (7.0/16) ) { //puncturing
+        rmp[k] = y[k + N - E];
-			for (int k=0; k<=E-1; k++) {
+      }
-				rmp[k]=y[k+N-E];
+    } else { // shortening
-			}
+      for (int k = 0; k <= E - 1; k++) {
-		} else { //shortening
+        rmp[k] = y[k];
-			for (int k=0; k<=E-1; k++) {
+      }
-				rmp[k]=y[k];
+    }
-			}
+  }
-		}	
-	}	
-	free(d);
-	free(y);
 }

--- a/openair1/PHY/CODING/nr_polar_init.c
+++ b/openair1/PHY/CODING/nr_polar_init.c
@@ -48,10 +48,7 @@ static void nr_polar_delete_list(t_nrPolar_params * polarParams) {
    nr_polar_delete_list(polarParams->nextPtr);
  delete_decoder_tree(polarParams);
-  //From build_polar_tables()
+  // From build_polar_tables()
-  for (int n=0; n < polarParams->N; n++)
-    if (polarParams->G_N_tab[n])
-      free(polarParams->G_N_tab[n]);
  free(polarParams->G_N_tab);
  free(polarParams->rm_tab);
  if (polarParams->crc_generator_matrix)