added all optimizations except rate matching for N=512 polar-encoder.

c5c3c325 · Raymond Knopp · 6f38c94d · c5c3c325 · c5c3c325
Commit c5c3c325 authored Oct 26, 2018 by Raymond Knopp
2 changed files
--- a/openair1/PHY/CODING/nrPolar_tools/nr_polar_defs.h
+++ b/openair1/PHY/CODING/nrPolar_tools/nr_polar_defs.h
@@ -109,6 +109,8 @@ struct nrPolar_params {
  uint8_t **crc_generator_matrix; //G_P
  uint8_t **G_N;
+  uint64_t **G_N_tab;
+  uint64_t cprime_tab[8][256];
  uint32_t* crc256Table;
  uint8_t **extended_crc_generator_matrix;
  //lowercase: bits, Uppercase: Bits stored in bytes
@@ -125,7 +127,7 @@ struct nrPolar_params {
  uint8_t *nr_polar_B;
  uint8_t *nr_polar_U;
-decoder_tree_t tree;
+  decoder_tree_t tree;
 } __attribute__ ((__packed__));
 typedef struct nrPolar_params t_nrPolar_params;
 typedef t_nrPolar_params *t_nrPolar_paramsPtr;

--- a/openair1/PHY/CODING/nrPolar_tools/nr_polar_encoder.c
+++ b/openair1/PHY/CODING/nrPolar_tools/nr_polar_encoder.c
@@ -28,13 +28,14 @@
 * \email turker.yilmaz@eurecom.fr
 * \note
 * \warning
-*/
+ */
 //#define DEBUG_POLAR_ENCODER
 //#define DEBUG_POLAR_ENCODER_DCI
 //#define DEBUG_POLAR_ENCODER_TIMING
 #include "PHY/CODING/nrPolar_tools/nr_polar_defs.h"
+#include "assertions.h"
 //input  [a_31 a_30 ... a_0]
 //output [f_31 f_30 ... f_0] [f_63 f_62 ... f_32] ...
@@ -158,17 +159,17 @@ void polar_encoder_dci(uint32_t *in,
      ( polarParams->nr_polar_B[polarParams->payloadBits+8+i] + ((n_RNTI>>(15-i))&1) ) % 2;
  }
-/*	//(a to a')
+  /*	//(a to a')
 	nr_crc_bit2bit_uint32_8_t(in, polarParams->payloadBits, polarParams->nr_polar_aPrime);
 	//Parity bits computation (p)
 	polarParams->crcBit = crc24c(polarParams->nr_polar_aPrime, (polarParams->payloadBits+polarParams->crcParityBits));
-#ifdef DEBUG_POLAR_ENCODER_DCI
+	#ifdef DEBUG_POLAR_ENCODER_DCI
 	printf("[polar_encoder_dci] crc: 0x%08x\n", polarParams->crcBit);
 	for (int i=0; i<32; i++)
 	{
 	printf("%d\n",((polarParams->crcBit)>>i)&1);
 	}
-#endif
+	#endif
 	//(a to b)
 	//
 	// Bytewise operations
@@ -314,3 +315,89 @@ void polar_encoder_timing(uint32_t *in,
 	  (timeEncoderRateMatching.diff_now/(cpuFreqGHz*1000.0)),
 	  (timeEncoderByte2Bit.diff_now/(cpuFreqGHz*1000.0)));
 }
+void build_polar_tables(t_nrPolar_paramsPtr polarParams) {
+  // build table b -> c'
+  AssertFatal(polarParams->K > 32, "K = %d < 33, is not supported yet\n",polarParams->K);
+  AssertFatal(polarParams->K < 65, "K = %d > 64, is not supported yet\n",polarParams->K);
+  int bit_i;
+  for (int byte=0;byte<8;byte++) {
+    for (int val=0;val<256;val++) {
+      for (int i=0;i<8;i++) {
+	bit_i=(val>>i)&1;
+	polarParams->cprime_tab[byte][val] |= (bit_i<<polarParams->interleaving_pattern[(8*byte)+i]);				
+      }
+    }
+  }
+  AssertFatal(polarParams->N==512,"N = %d, not done yet\n",polarParams->N);
+  // build G bit vectors for information bit positions and convert the bit as bytes tables in nr_polar_kronecker_power_matrices.c to 64 bit packed vectors.
+  // keep only rows of G which correspond to information/crc bits
+  for (int i=0;i<polarParams->K;i++) {
+    memset((void*)polarParams->G_N_tab[i],0,(polarParams->N/64)*sizeof(uint64_t));
+    for (int j=0;j<polarParams->N;j++) 
+      polarParams->G_N_tab[i][j/64] |= polarParams->G_N[polarParams->Q_I_N[i]][j]<<(j&63);
+  }
+}
+void polar_encoder_fast(void *in,
+			void *out,
+			int bitlen,
+			void *crcmask,
+			t_nrPolar_paramsPtr polarParams) {
+  AssertFatal(polarParams->K > 32, "K = %d < 33, is not supported yet\n",polarParams->K);
+  AssertFatal(polarParams->K < 65, "K = %d > 64, is not supported yet\n",polarParams->K);
+  uint64_t A,B,Cprime;
+  uint64_t D[8] __attribute__((aligned(32)));
+  // append crc
+  B = A | (crc24c(A,bitlen)<<bitlen);
+  uint8_t *Bbyte = (uint8_t*)&B;
+  // for each byte of B, lookup in corresponding table for 64-bit word corresponding to tha byte and its position
+  Cprime = polarParams->cprime_tab[0][Bbyte[0]] | 
+           polarParams->cprime_tab[1][Bbyte[1]] | 
+           polarParams->cprime_tab[2][Bbyte[2]] | 
+           polarParams->cprime_tab[3][Bbyte[3]] | 
+           polarParams->cprime_tab[4][Bbyte[4]] | 
+           polarParams->cprime_tab[5][Bbyte[5]] | 
+           polarParams->cprime_tab[6][Bbyte[6]] | 
+           polarParams->cprime_tab[7][Bbyte[7]];
+  // now do Gu product (here using 64-bit XORs, we can also do with SIMD after)
+  // here we're reading out the bits LSB -> MSB, is this correct w.r.t. 3GPP ?
+  uint64_t Cprime_i = -(Cprime & 1); // this converts bit 0 as, 0 => 0000x00, 1 => 1111x11
+  D[0] = Cprime_i & polarParams->G_N_tab[0][0];
+  D[1] = Cprime_i & polarParams->G_N_tab[0][1];
+  D[2] = Cprime_i & polarParams->G_N_tab[0][2];
+  D[3] = Cprime_i & polarParams->G_N_tab[0][3];
+  D[4] = Cprime_i & polarParams->G_N_tab[0][4];
+  D[5] = Cprime_i & polarParams->G_N_tab[0][5];
+  D[6] = Cprime_i & polarParams->G_N_tab[0][6];
+  D[7] = Cprime_i & polarParams->G_N_tab[0][7];
+  for (int i=1;i<bitlen;i++) {
+    Cprime_i = -((Cprime>>i)&1);
+    D[0] ^= (Cprime_i & polarParams->G_N_tab[i][0]);
+    D[1] ^= (Cprime_i & polarParams->G_N_tab[i][1]);
+    D[2] ^= (Cprime_i & polarParams->G_N_tab[i][2]);
+    D[3] ^= (Cprime_i & polarParams->G_N_tab[i][3]);
+    D[4] ^= (Cprime_i & polarParams->G_N_tab[i][4]);
+    D[5] ^= (Cprime_i & polarParams->G_N_tab[i][5]);
+    D[6] ^= (Cprime_i & polarParams->G_N_tab[i][6]);
+    D[7] ^= (Cprime_i & polarParams->G_N_tab[i][7]);
+  }
+  // Rate matching on the 8 64-bit D bit-strings should be performed more or less like
+  // The interleaving on the single 64-bit input in the first step. We just need 64 lookup tables I guess, and they will have large entries
+}