LDPC SIMD optmizations and code generator (BG1)

Conflicts: cmake_targets/CMakeLists.txt openair1/PHY/CODING/TESTBENCH/ldpctest.c openair1/PHY/CODING/defs.h openair1/PHY/CODING/ldpc_generate_coefficient.c

LDPC SIMD optmizations and code generator (BG1)
Conflicts: cmake_targets/CMakeLists.txt openair1/PHY/CODING/TESTBENCH/ldpctest.c openair1/PHY/CODING/defs.h openair1/PHY/CODING/ldpc_generate_coefficient.c
468004ec · Raymond Knopp · Florian Kaltenberger · cea0369a · 468004ec · 468004ec
Commit 468004ec authored Feb 02, 2018 by Raymond Knopp Committed by Florian Kaltenberger Feb 05, 2018
18 changed files
--- a/cmake_targets/CMakeLists.txt
+++ b/cmake_targets/CMakeLists.txt
@@ -282,7 +282,7 @@ set(protobuf_generated_dir ${OPENAIR_BIN_DIR})

 # RRC
 ######
-add_list2_option(RRC_ASN1_VERSION "Rel10" "ASN.1 version of RRC interface" "Rel8" "Rel10" "CBA")
+add_list2_option(RRC_ASN1_VERSION "Rel14" "ASN.1 version of RRC interface" "Rel8" "Rel10" "CBA")

 if (${RRC_ASN1_VERSION} STREQUAL "Rel8")
  set (RRC_GRAMMAR ${OPENAIR2_DIR}/RRC/LITE/MESSAGES/asn1c/ASN1_files/EUTRA-RRC-Definitions-86.asn)
@@ -1089,13 +1089,12 @@ set(PHY_SRC
  ${OPENAIR1_DIR}/PHY/CODING/nr_segmentation.c
  ${OPENAIR1_DIR}/PHY/CODING/ldpc_decoder.c
  ${OPENAIR1_DIR}/PHY/CODING/ldpc_encoder.c
+  ${OPENAIR1_DIR}/PHY/CODING/ldpc_encoder2.c
  ${OPENAIR1_DIR}/PHY/CODING/ldpc_generate_coefficient.c
  ${OPENAIR1_DIR}/PHY/CODING/ccoding_byte.c
  ${OPENAIR1_DIR}/PHY/CODING/ccoding_byte_lte.c
  ${OPENAIR1_DIR}/PHY/CODING/3gpplte_sse.c
  ${OPENAIR1_DIR}/PHY/CODING/crc_byte.c
-  ${OPENAIR1_DIR}/PHY/CODING/ldpc_decoder.c
-  ${OPENAIR1_DIR}/PHY/CODING/ldpc_encoder.c
  ${OPENAIR1_DIR}/PHY/CODING/3gpplte_turbo_decoder_sse_8bit.c
  ${OPENAIR1_DIR}/PHY/CODING/3gpplte_turbo_decoder_sse_16bit.c
  ${OPENAIR1_DIR}/PHY/CODING/3gpplte_turbo_decoder_avx2_16bit.c

--- a/cmake_targets/lte-simulators/CMakeLists.txt
+++ b/cmake_targets/lte-simulators/CMakeLists.txt
@@ -11,5 +11,6 @@ set(PBS_SIM False)
 set(PERFECT_CE True)
 set(NAS_UE False)
 set(MESSAGE_CHART_GENERATOR False)
+set(RRC_ASN1_VERSION "Rel14")

 include(${CMAKE_CURRENT_SOURCE_DIR}/../CMakeLists.txt)
--- a/openair1/PHY/CODING/TESTBENCH/ldpctest.c
+++ b/openair1/PHY/CODING/TESTBENCH/ldpctest.c
@@ -62,7 +62,8 @@ char quantize(double D,double x,unsigned char B)
 #define MAX_BLOCK_LENGTH 8448

 int test_ldpc(short No_iteration,
-              double rate,
+              int nom_rate,
+              int denom_rate,
              double SNR,
              unsigned char qbits,
              short block_length,
@@ -71,7 +72,7 @@ int test_ldpc(short No_iteration,
              unsigned int *crc_misses)
 {
  //clock initiate
-  time_stats_t time;
+  time_stats_t time,time_optim,tinput,tprep,tparity,toutput;
  opp_enabled=1;
  cpu_freq_GHz = get_cpu_freq_GHz();
  //short test_input[block_length];
@@ -79,6 +80,7 @@ int test_ldpc(short No_iteration,
  //short *c; //padded codeword
  short *esimated_output;
  unsigned char *channel_input;
+  unsigned char *channel_input_optim;
  double *channel_output;
  double *modulated_input;
  short *channel_output_fixed;
@@ -93,11 +95,15 @@ int test_ldpc(short No_iteration,
  // generate input block
  test_input=(unsigned char *)malloc(sizeof(unsigned char) * block_length/8);
  channel_input = (unsigned char *)malloc(sizeof(unsigned char) * 68*384);
+  channel_input_optim = (unsigned char *)malloc(sizeof(unsigned char) * 68*384);
  modulated_input = (double *)malloc(sizeof(double) * 68*384);
  channel_output  = (double *)malloc(sizeof(double) * 68*384);
-
  reset_meas(&time);
-
+  reset_meas(&time_optim);
+  reset_meas(&tinput);
+  reset_meas(&tprep);
+  reset_meas(&tparity);
+  reset_meas(&toutput);
  for (i=0; i<block_length/8; i++)
  {
    //test_input[i]=(unsigned char) rand();
@@ -130,6 +136,7 @@ int test_ldpc(short No_iteration,

  //find minimum value in all sets of lifting size
  Zc=0;
+
  for (i1=0; i1 < 51; i1++)
  {
    if (lift_size[i1] >= (double) block_length/Kb)
@@ -141,35 +148,47 @@ int test_ldpc(short No_iteration,
  }

  printf("BG %d, Zc %d, Kb %d\n",BG, Zc, Kb);
-
-    no_punctured_columns=(int)((nrows-2)*Zc+block_length-block_length/rate)/Zc;
-    //printf("%d\n",no_punctured_columns);
+  no_punctured_columns=(int)((nrows-2)*Zc+block_length-block_length*(1/((float)nom_rate/(float)denom_rate)))/Zc;
+  //  printf("puncture:%d\n",no_punctured_columns);
+  removed_bit=(nrows-no_punctured_columns-2) * Zc+block_length-(int)(block_length/((float)nom_rate/(float)denom_rate));
+  if (ntrials==0)
+    ldpc_encoder_orig(test_input,channel_input, block_length, nom_rate, denom_rate, 1);
  
  for (trial=0; trial < ntrials; trial++)
  {
+
    //// encoder
-    if (ntrials==1)
-      ldpc_encoder_orig(test_input, channel_input,block_length,rate,1);
-    else {
    start_meas(&time);
+
    if (BG==1)
-	ldpc_encoder(test_input, channel_input,block_length,rate);
+      ldpc_encoder(test_input, channel_input,block_length,nom_rate,denom_rate);
    else
-	ldpc_encoder_orig(test_input, channel_input,block_length,rate,0);
+      ldpc_encoder_orig(test_input, channel_input,block_length,nom_rate,denom_rate,0);
+    
    stop_meas(&time);
+    start_meas(&time_optim);
+    if (BG==1) {
+      ldpc_encoder_optim(test_input, channel_input_optim,block_length,nom_rate,denom_rate,&tinput,&tprep,&tparity,&toutput);
    }
-    //print_meas_now(&time, "", stdout);
+    stop_meas(&time_optim);
    
    
-    //  for (i=0;i<8448;i++)
+    for (i = 0; i < block_length+(nrows-no_punctured_columns) * Zc - removed_bit; i++)
+      if (channel_input[i]!=channel_input_optim[i]) printf("differ in pos %d (%d,%d)\n",i,
+							   channel_input[i],
+							   channel_input_optim[i]);
+    //print_meas_now(&time, "", stdout);
+
+    //for (i=0;i<25344;i++)
    //printf("channel_input[%d]=%d\n",i,channel_input[i]);
+
    //printf("%d ",channel_input[i]);

    if ((BG==2) && (Zc==128||Zc==256))
    {
      channel_output_fixed  = (short *)malloc( (Kb+nrows) * Zc*sizeof(short));
      memset(channel_output_fixed,0,(Kb+nrows) * Zc*sizeof(short));
-      removed_bit=(nrows-no_punctured_columns-2) * Zc+block_length-(int)(block_length/rate);
+      removed_bit=(nrows-no_punctured_columns-2) * Zc+block_length-(int)(block_length/((float)nom_rate/(float)denom_rate));
      //printf("removed_bit:%d\n",removed_bit);

      for (i = 2*Zc; i < (Kb+nrows-no_punctured_columns) * Zc-removed_bit; i++)
@@ -201,7 +220,7 @@ int test_ldpc(short No_iteration,
 #endif
      // decode the sequence
      // decoder supports BG2, Z=128 & 256
-      esimated_output=ldpc_decoder(channel_output_fixed, block_length, No_iteration, rate);
+      esimated_output=ldpc_decoder(channel_output_fixed, block_length, No_iteration, (double)((float)nom_rate/(float)denom_rate));

      //for (i=(Kb+nrows) * Zc-5;i<(Kb+nrows) * Zc;i++)
      //  printf("esimated_output[%d]=%d\n",i,esimated_output[i]);
@@ -218,31 +237,35 @@ int test_ldpc(short No_iteration,

      free(channel_output_fixed);
    }
-    else
-      if (trial==0)
+    else if (trial==0)
      printf("decoder is not supported\n");
  }

  print_meas(&time,"ldpc_encoder",NULL,NULL);
-
+  print_meas(&time_optim,"ldpc_encoder_optim",NULL,NULL);
+  print_meas(&tinput,"ldpc_encoder_optim(input)",NULL,NULL);
+  print_meas(&tprep,"ldpc_encoder_optim(prep)",NULL,NULL);
+  print_meas(&tparity,"ldpc_encoder_optim(parity)",NULL,NULL);
+  print_meas(&toutput,"ldpc_encoder_optim(output)",NULL,NULL);
  return *errors;
 }

 int main(int argc, char *argv[])
 {
-
  unsigned int errors,crc_misses;
  short block_length=22*384; // decoder supports length: 1201 -> 1280, 2401 -> 2560
  short No_iteration=25;
-  double rate=0.333;
+  //double rate=0.333;
+  int nom_rate=1;
+  int denom_rate=3;
  double SNR,SNR_lin;
  unsigned char qbits=4;
  unsigned int decoded_errors[100]; // initiate the size of matrix equivalent to size of SNR
  int c,i=0;
-  int n_trials = 100;
+  int n_trials = 1;
  randominit(0);

-  while ((c = getopt (argc, argv, "q:r:l:n:")) != -1)
+  while ((c = getopt (argc, argv, "q:r:s:l:n:")) != -1)
    switch (c)
    {
      case 'q':
@@ -250,7 +273,11 @@ int main(int argc, char *argv[])
        break;

      case 'r':
-        rate = atof(optarg);
+        nom_rate = atoi(optarg);
+        break;
+
+      case 's':
+        denom_rate = atoi(optarg);
        break;

      case 'l':
@@ -268,13 +295,14 @@ int main(int argc, char *argv[])
  printf("the decoder supports BG2, Kb=10, Z=128 & 256\n");
  printf(" range of blocklength: 1201 -> 1280, 2401 -> 2560\n");
  printf("block length %d: \n", block_length);
-  printf("rate: %f\n",rate);
+  printf("rate: %d/%d\n",nom_rate,denom_rate);

  for (SNR=-2.1; SNR<-2; SNR+=.1)
  {
    SNR_lin = pow(10,SNR/10);
    decoded_errors[i]=test_ldpc(No_iteration,
-                                rate,
+                                nom_rate,
+                                denom_rate,
                                SNR_lin,   // noise standard deviation
                                qbits,
                                block_length,   // block length bytes

--- a/openair1/PHY/CODING/defs.h
+++ b/openair1/PHY/CODING/defs.h
@@ -572,7 +572,10 @@ int16_t reverseBits(int32_t ,int32_t);
 void phy_viterbi_dot11(int8_t *,uint8_t *,uint16_t);

 short *ldpc_decoder(short *msgChannel,short block_length,short No_iteration,double rate);
-int ldpc_encoder(unsigned char *test_input,unsigned char* channel_input,short block_length,double rate);
-int ldpc_encoder_orig(unsigned char *test_input,unsigned char* channel_input,short block_length,double rate,unsigned char gen_code);
+int encode_parity_check_part(uint16_t *c,uint16_t *d, short BG,short Zc,short Kb);
+int ldpc_encoder(unsigned char *test_input,unsigned char *channel_input,short block_length,int nom_rate,int denom_rate);
+int ldpc_encoder_orig(unsigned char *test_input,unsigned char *channel_input,short block_length,int nom_rate,int denom_rate,unsigned char gen_code);
+int ldpc_encoder_multi_segment(unsigned char **test_input,unsigned char **channel_input,short block_length,double rate,uint8_t n_segments);
+int ldpc_encoder_optim(unsigned char *test_input,unsigned char *channel_input,short block_length,int nom_rate,int denom_rate,time_stats_t *tinput,time_stats_t *tprep,time_stats_t *tparity,time_stats_t *toutput);

 #endif
--- a/openair1/PHY/CODING/ldpc176_byte.c
+++ b/openair1/PHY/CODING/ldpc176_byte.c
--- a/openair1/PHY/CODING/ldpc192_byte.c
+++ b/openair1/PHY/CODING/ldpc192_byte.c
--- a/openair1/PHY/CODING/ldpc208_byte.c
+++ b/openair1/PHY/CODING/ldpc208_byte.c
--- a/openair1/PHY/CODING/ldpc224_byte.c
+++ b/openair1/PHY/CODING/ldpc224_byte.c
--- a/openair1/PHY/CODING/ldpc240_byte.c
+++ b/openair1/PHY/CODING/ldpc240_byte.c
--- a/openair1/PHY/CODING/ldpc256_byte.c
+++ b/openair1/PHY/CODING/ldpc256_byte.c
--- a/openair1/PHY/CODING/ldpc288_byte.c
+++ b/openair1/PHY/CODING/ldpc288_byte.c
--- a/openair1/PHY/CODING/ldpc320_byte.c
+++ b/openair1/PHY/CODING/ldpc320_byte.c
--- a/openair1/PHY/CODING/ldpc352_byte.c
+++ b/openair1/PHY/CODING/ldpc352_byte.c
--- a/openair1/PHY/CODING/ldpc384_byte.c
+++ b/openair1/PHY/CODING/ldpc384_byte.c
--- a/openair1/PHY/CODING/ldpc_encoder.c
+++ b/openair1/PHY/CODING/ldpc_encoder.c
@@ -5072,6 +5072,8 @@ int encode_parity_check_part(unsigned char *c,unsigned char *d, short BG,short Z
          // calculate each row in base graph
          //row: 0
          d[i2+0*Zc]=c2[307]^c2[76]^c2[205]^c2[276]^c2[787]^c2[1018]^c2[855]^c2[1586]^c2[1612]^c2[1864]^c2[2673]^c2[2377]^c2[2304]^c2[3360]^c2[3404]^c2[3347]^c2[4021]^c2[3984]^c2[4096]^c2[4824]^c2[4769]^c2[4807]^c2[5707]^c2[5643]^c2[5529]^c2[6475]^c2[6304]^c2[6200]^c2[7229]^c2[7090]^c2[6975]^c2[7968]^c2[7809]^c2[7812]^c2[8557]^c2[8743]^c2[8753]^c2[9233]^c2[9558]^c2[9447]^c2[10341]^c2[10184]^c2[10325]^c2[10969]^c2[10840]^c2[10964]^c2[11735]^c2[11619]^c2[11573]^c2[12394]^c2[12642]^c2[12592]^c2[13170]^c2[13187]^c2[13356]^c2[14066]^c2[14064]^c2[14095]^c2[14772]^c2[14923]^c2[14797]^c2[15690]^c2[15373]^c2[15399]^c2[16474]^c2[16240]^c2[16485];
+	  //	  if ((i2&31)==0) printf("\ni2 %d: ",i2>>5);
+	  //	  printf("%d,",d[i2]);
          //row: 1
          d[i2+1*Zc]=c2[307]^c2[308]^c2[77]^c2[206]^c2[277]^c2[787]^c2[788]^c2[1019]^c2[856]^c2[1586]^c2[1587]^c2[1613]^c2[1865]^c2[2673]^c2[2674]^c2[2378]^c2[2305]^c2[3361]^c2[3405]^c2[3348]^c2[4021]^c2[4022]^c2[3985]^c2[4097]^c2[4824]^c2[4825]^c2[4770]^c2[4808]^c2[5708]^c2[5644]^c2[5530]^c2[6476]^c2[6305]^c2[6201]^c2[7229]^c2[7230]^c2[7091]^c2[6976]^c2[7968]^c2[7969]^c2[7810]^c2[7813]^c2[8557]^c2[8558]^c2[8744]^c2[8754]^c2[9233]^c2[9234]^c2[9559]^c2[9448]^c2[10341]^c2[10342]^c2[10185]^c2[10326]^c2[10970]^c2[10841]^c2[10965]^c2[11735]^c2[11736]^c2[11620]^c2[11574]^c2[12394]^c2[12395]^c2[12643]^c2[12593]^c2[13171]^c2[13188]^c2[13357]^c2[14066]^c2[14067]^c2[14065]^c2[14096]^c2[14772]^c2[14773]^c2[14924]^c2[14798]^c2[15690]^c2[15691]^c2[15374]^c2[15400]^c2[16474]^c2[16475]^c2[16241]^c2[16486];
          //row: 2

--- a/openair1/PHY/CODING/ldpc_encoder2.c
+++ b/openair1/PHY/CODING/ldpc_encoder2.c
--- a/openair1/PHY/CODING/ldpc_generate_coefficient.c
+++ b/openair1/PHY/CODING/ldpc_generate_coefficient.c
--- a/openair1/PHY/LTE_TRANSPORT/dlsch_coding.c
+++ b/openair1/PHY/LTE_TRANSPORT/dlsch_coding.c
@@ -766,7 +766,7 @@ int dlsch_encoding(PHY_VARS_eNB *eNB,
 	  unsigned int Kr=0,Kr_bytes,r,r_offset=0,Kr_int=0;
  //  unsigned short m=dlsch->harq_processes[harq_pid]->mcs;
  uint8_t beamforming_mode=0;
-  double rate = 0.33;
+  //double rate = 0.33;

  VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_ENB_DLSCH_ENCODING, VCD_FUNCTION_IN);

@@ -899,13 +899,13 @@ int dlsch_encoding(PHY_VARS_eNB *eNB,
    	Kr_int = Kr;

 #ifdef DEBUG_DLSCH_CODING
-      printf("start ldpc encoder B %d, rate %f\n",dlsch->harq_processes[harq_pid]->B,rate);
+      printf("start ldpc encoder B %d\n",dlsch->harq_processes[harq_pid]->B);
      printf("input %d %d %d %d %d \n", dlsch->harq_processes[harq_pid]->c[r][0], dlsch->harq_processes[harq_pid]->c[r][1], dlsch->harq_processes[harq_pid]->c[r][2],dlsch->harq_processes[harq_pid]->c[r][3], dlsch->harq_processes[harq_pid]->c[r][4]);
 #endif

      start_meas(te_stats);
      memset(dlsch->harq_processes[harq_pid]->d[r],0,(96+12+3+3*8448)*sizeof(uint8_t));
-      ldpc_encoder((unsigned char*)dlsch->harq_processes[harq_pid]->c[r],(unsigned char*)&dlsch->harq_processes[harq_pid]->d[r][96],Kr,rate);
+      ldpc_encoder((unsigned char*)dlsch->harq_processes[harq_pid]->c[r],(unsigned char*)&dlsch->harq_processes[harq_pid]->d[r][96],Kr,1,3);
      stop_meas(te_stats);

 #endif