Commit 449b79ea authored by Raymond Knopp's avatar Raymond Knopp Committed by Florian Kaltenberger

LDPC SIMD optmizations and code generator (BG1)

Conflicts:
	cmake_targets/CMakeLists.txt
	openair1/PHY/CODING/TESTBENCH/ldpctest.c
	openair1/PHY/CODING/defs.h
	openair1/PHY/CODING/ldpc_generate_coefficient.c

Conflicts:
	openair1/PHY/CODING/TESTBENCH/ldpctest.c
	openair1/PHY/CODING/defs.h
	openair1/PHY/LTE_TRANSPORT/dlsch_coding.c
parent 27e5258a
......@@ -281,7 +281,7 @@ set(protobuf_generated_dir ${OPENAIR_BIN_DIR})
# RRC
######
add_list2_option(RRC_ASN1_VERSION "Rel10" "ASN.1 version of RRC interface" "Rel8" "Rel10" "CBA")
add_list2_option(RRC_ASN1_VERSION "Rel14" "ASN.1 version of RRC interface" "Rel8" "Rel10" "CBA")
if (${RRC_ASN1_VERSION} STREQUAL "Rel8")
set (RRC_GRAMMAR ${OPENAIR2_DIR}/RRC/LITE/MESSAGES/asn1c/ASN1_files/EUTRA-RRC-Definitions-86.asn)
......@@ -1044,14 +1044,14 @@ set(PHY_SRC
${OPENAIR1_DIR}/PHY/CODING/lte_segmentation.c
${OPENAIR1_DIR}/PHY/CODING/nr_segmentation.c
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder.c
${OPENAIR1_DIR}/PHY/CODING/ldpc_decoder.c
${OPENAIR1_DIR}/PHY/CODING/ldpc_encoder.c
${OPENAIR1_DIR}/PHY/CODING/ldpc_encoder2.c
${OPENAIR1_DIR}/PHY/CODING/ldpc_generate_coefficient.c
${OPENAIR1_DIR}/PHY/CODING/ccoding_byte.c
${OPENAIR1_DIR}/PHY/CODING/ccoding_byte_lte.c
${OPENAIR1_DIR}/PHY/CODING/3gpplte_sse.c
${OPENAIR1_DIR}/PHY/CODING/crc_byte.c
${OPENAIR1_DIR}/PHY/CODING/ldpc_decoder.c
${OPENAIR1_DIR}/PHY/CODING/ldpc_encoder.c
${OPENAIR1_DIR}/PHY/CODING/3gpplte_turbo_decoder_sse_8bit.c
${OPENAIR1_DIR}/PHY/CODING/3gpplte_turbo_decoder_sse_16bit.c
${OPENAIR1_DIR}/PHY/CODING/3gpplte_turbo_decoder_avx2_16bit.c
......
......@@ -11,5 +11,6 @@ set(PBS_SIM False)
set(PERFECT_CE True)
set(NAS_UE False)
set(MESSAGE_CHART_GENERATOR False)
set(RRC_ASN1_VERSION "Rel14")
include(${CMAKE_CURRENT_SOURCE_DIR}/../CMakeLists.txt)
This diff is collapsed.
......@@ -571,8 +571,11 @@ uint32_t crcbit (uint8_t * ,
int16_t reverseBits(int32_t ,int32_t);
void phy_viterbi_dot11(int8_t *,uint8_t *,uint16_t);
//short *ldpc_decoder(short *msgChannel,short block_length,short No_iteration,double rate);
//short *ldpc_encoder(char *test_input,char* channel_input,short block_length,double rate);
int ldpc_encoder(unsigned char *test_input,unsigned char* channel_input,short block_length,double rate);
short *ldpc_decoder(short *msgChannel,short block_length,short No_iteration,double rate);
int encode_parity_check_part(uint16_t *c,uint16_t *d, short BG,short Zc,short Kb);
int ldpc_encoder(unsigned char *test_input,unsigned char *channel_input,short block_length,int nom_rate,int denom_rate);
int ldpc_encoder_orig(unsigned char *test_input,unsigned char *channel_input,short block_length,int nom_rate,int denom_rate,unsigned char gen_code);
int ldpc_encoder_multi_segment(unsigned char **test_input,unsigned char **channel_input,short block_length,double rate,uint8_t n_segments);
int ldpc_encoder_optim(unsigned char *test_input,unsigned char *channel_input,short block_length,int nom_rate,int denom_rate,time_stats_t *tinput,time_stats_t *tprep,time_stats_t *tparity,time_stats_t *toutput);
#endif
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -5071,6 +5071,8 @@ int encode_parity_check_part(uint16_t *c,uint16_t *d, short BG,short Zc,short Kb
// calculate each row in base graph
//row: 0
d[i2+0*Zc]=c2[307]^c2[76]^c2[205]^c2[276]^c2[787]^c2[1018]^c2[855]^c2[1586]^c2[1612]^c2[1864]^c2[2673]^c2[2377]^c2[2304]^c2[3360]^c2[3404]^c2[3347]^c2[4021]^c2[3984]^c2[4096]^c2[4824]^c2[4769]^c2[4807]^c2[5707]^c2[5643]^c2[5529]^c2[6475]^c2[6304]^c2[6200]^c2[7229]^c2[7090]^c2[6975]^c2[7968]^c2[7809]^c2[7812]^c2[8557]^c2[8743]^c2[8753]^c2[9233]^c2[9558]^c2[9447]^c2[10341]^c2[10184]^c2[10325]^c2[10969]^c2[10840]^c2[10964]^c2[11735]^c2[11619]^c2[11573]^c2[12394]^c2[12642]^c2[12592]^c2[13170]^c2[13187]^c2[13356]^c2[14066]^c2[14064]^c2[14095]^c2[14772]^c2[14923]^c2[14797]^c2[15690]^c2[15373]^c2[15399]^c2[16474]^c2[16240]^c2[16485];
// if ((i2&31)==0) printf("\ni2 %d: ",i2>>5);
// printf("%d,",d[i2]);
//row: 1
d[i2+1*Zc]=c2[307]^c2[308]^c2[77]^c2[206]^c2[277]^c2[787]^c2[788]^c2[1019]^c2[856]^c2[1586]^c2[1587]^c2[1613]^c2[1865]^c2[2673]^c2[2674]^c2[2378]^c2[2305]^c2[3361]^c2[3405]^c2[3348]^c2[4021]^c2[4022]^c2[3985]^c2[4097]^c2[4824]^c2[4825]^c2[4770]^c2[4808]^c2[5708]^c2[5644]^c2[5530]^c2[6476]^c2[6305]^c2[6201]^c2[7229]^c2[7230]^c2[7091]^c2[6976]^c2[7968]^c2[7969]^c2[7810]^c2[7813]^c2[8557]^c2[8558]^c2[8744]^c2[8754]^c2[9233]^c2[9234]^c2[9559]^c2[9448]^c2[10341]^c2[10342]^c2[10185]^c2[10326]^c2[10970]^c2[10841]^c2[10965]^c2[11735]^c2[11736]^c2[11620]^c2[11574]^c2[12394]^c2[12395]^c2[12643]^c2[12593]^c2[13171]^c2[13188]^c2[13357]^c2[14066]^c2[14067]^c2[14065]^c2[14096]^c2[14772]^c2[14773]^c2[14924]^c2[14798]^c2[15690]^c2[15691]^c2[15374]^c2[15400]^c2[16474]^c2[16475]^c2[16241]^c2[16486];
//row: 2
......
#include <stdlib.h>
#include <math.h>
#include <stdio.h>
#include <string.h>
#include <types.h>
#include "assertions.h"
#include "PHY/TOOLS/time_meas.h"
#include "ldpc384_byte.c"
#include "ldpc352_byte.c"
#include "ldpc320_byte.c"
#include "ldpc288_byte.c"
#include "ldpc256_byte.c"
#include "ldpc240_byte.c"
#include "ldpc224_byte.c"
#include "ldpc208_byte.c"
#include "ldpc192_byte.c"
void encode_parity_check_part_optim(unsigned char *c,unsigned char *d, short BG,short Zc,short Kb)
{
AssertFatal(BG==1,"BG %d is not supported yet\n",BG);
if (BG==1)
{
switch (Zc)
{
case 2: break;
case 3: break;
case 4: break;
case 5: break;
case 6: break;
case 7: break;
case 8: break;
case 9: break;
case 10: break;
case 11: break;
case 12: break;
case 13: break;
case 14: break;
case 15: break;
case 16: break;
case 18: break;
case 20: break;
case 22: break;
case 24: break;
case 26: break;
case 28: break;
case 30: break;
case 32: break;
case 36: break;
case 40: break;
case 44: break;
case 48: break;
case 52: break;
case 56: break;
case 60: break;
case 64: break;
case 72: break;
case 80: break;
case 88: break;
case 96: break;
case 104: break;
case 112: break;
case 120: break;
case 128: break;
case 144: break;
case 160: break;
// case 176: ldpc176_byte(c,d); break;
case 192: ldpc192_byte(c,d); break;
case 208: ldpc208_byte(c,d); break;
case 224: ldpc224_byte(c,d); break;
case 240: ldpc240_byte(c,d); break;
case 256: ldpc256_byte(c,d); break;
case 288: ldpc288_byte(c,d); break;
case 320: ldpc320_byte(c,d); break;
case 352: ldpc352_byte(c,d); break;
case 384: ldpc384_byte(c,d); break;
}
}
}
int ldpc_encoder_optim(unsigned char *test_input,unsigned char *channel_input,short block_length,int nom_rate,int denom_rate,time_stats_t *tinput,time_stats_t *tprep,time_stats_t *tparity,time_stats_t *toutput)
{
short BG,Zc,Kb,nrows,ncols;
int i,i1;
int no_punctured_columns,removed_bit;
//Table of possible lifting sizes
short lift_size[51]= {2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,18,20,22,24,26,28,30,32,36,40,44,48,52,56,60,64,72,80,88,96,104,112,120,128,144,160,176,192,208,224,240,256,288,320,352,384};
int simd_size;
//determine number of bits in codeword
//if (block_length>3840)
//{
AssertFatal(block_length>3840,"Block length < 3840 not supported yet\n");
BG=1;
Kb = 22;
nrows=46; //parity check bits
ncols=22; //info bits
//}
/* else if (block_length<=3840)
{
BG=2;
nrows=42; //parity check bits
ncols=10; // info bits
if (block_length>640)
Kb = 10;
else if (block_length>560)
Kb = 9;
else if (block_length>192)
Kb = 8;
else
Kb = 6;
} */
//find minimum value in all sets of lifting size
Zc=0;
for (i1=0; i1 < 51; i1++)
{
if (lift_size[i1] >= (double) block_length/Kb)
{
Zc = lift_size[i1];
//printf("%d\n",Zc);
break;
}
}
AssertFatal(Zc>0,"no valid Zc found for block length %d\n",block_length);
if ((Zc&31) > 0) simd_size = 16;
else simd_size = 32;
unsigned char c[22*Zc] __attribute__((aligned(32))); //padded input, unpacked, max size
unsigned char d[46*Zc] __attribute__((aligned(32))); //coded parity part output, unpacked, max size
unsigned char c_extension[2*22*Zc*simd_size] __attribute__((aligned(32))); //double size matrix of c
// calculate number of punctured bits
no_punctured_columns=(int)((nrows-2)*Zc+block_length-block_length/((float)nom_rate/(float)denom_rate))/Zc;
removed_bit=(nrows-no_punctured_columns-2) * Zc+block_length-(int)(block_length/((float)nom_rate/(float)denom_rate));
// printf("%d\n",no_punctured_columns);
// printf("%d\n",removed_bit);
// unpack input
// memset(c,0,sizeof(unsigned char) * ncols * Zc);
start_meas(tinput);
for (i=0; i<block_length; i++)
{
//c[i] = test_input[i/8]<<(i%8);
//c[i]=c[i]>>7&1;
c[i]=(test_input[i/8]&(1<<(i&7)))>>(i&7);
}
stop_meas(tinput);
// extend matrix
start_meas(tprep);
for (i1=0; i1 < ncols; i1++)
{
memcpy(&c_extension[2*i1*Zc], &c[i1*Zc], Zc*sizeof(unsigned char));
memcpy(&c_extension[(2*i1+1)*Zc], &c[i1*Zc], Zc*sizeof(unsigned char));
}
for (i1=1;i1<simd_size;i1++) {
memcpy(&c_extension[(2*ncols*Zc*i1)], &c_extension[i1], (2*ncols*Zc*sizeof(unsigned char))-i1);
// memset(&c_extension[(2*ncols*Zc*i1)],0,i1);
/*
printf("shift %d: ",i1);
for (int j=0;j<64;j++) printf("%d ",c_extension[(2*ncols*Zc*i1)+j]);
printf("\n");
*/
}
stop_meas(tprep);
//parity check part
start_meas(tparity);
encode_parity_check_part_optim(c_extension, d, BG, Zc, Kb);
stop_meas(tparity);
start_meas(toutput);
// information part and puncture columns
memcpy(&channel_input[0], &c[2*Zc], (block_length-2*Zc)*sizeof(unsigned char));
memcpy(&channel_input[block_length-2*Zc], &d[0], ((nrows-no_punctured_columns) * Zc-removed_bit)*sizeof(unsigned char));
stop_meas(toutput);
return 0;
}
......@@ -3,6 +3,7 @@
#include <stdio.h>
#include <string.h>
#include "Gen_shift_value.h"
#include "assertions.h"
short *choose_generator_matrix(short BG,short Zc)
{
......@@ -335,7 +336,7 @@ int ldpc_encoder_orig(unsigned char *test_input,unsigned char *channel_input,sho
unsigned char channel_temp,temp;
short *Gen_shift_values, *no_shift_values, *pointer_shift_values;
short BG,Zc,Kb,nrows,ncols;
int i,i1,i2,i3,i4,i5,t,t1,temp_prime;
int i,i1,i2,i3,i4,i5,t,var,temp_prime;
int no_punctured_columns,removed_bit;
//Table of possible lifting sizes
short lift_size[51]= {2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,18,20,22,24,26,28,30,32,36,40,44,48,52,56,60,64,72,80,88,96,104,112,120,128,144,160,176,192,208,224,240,256,288,320,352,384};
......@@ -373,6 +374,9 @@ int ldpc_encoder_orig(unsigned char *test_input,unsigned char *channel_input,sho
}
}
int nind=0;
int indlist[1000];
// load base graph of generator matrix
if (BG==1)
{
......@@ -402,9 +406,110 @@ int ldpc_encoder_orig(unsigned char *test_input,unsigned char *channel_input,sho
// parity check part
for (i2=0; i2 < 1; i2++)
if (gen_code==1)
{
char fname[100];
sprintf(fname,"ldpc%d_byte.c",Zc);
FILE *fd=fopen(fname,"w");
int shift;
char data_type[100];
char xor_command[100];
int mask;
AssertFatal(fd!=NULL,"cannot open %s\n",fname);
fprintf(fd,"#include \"PHY/sse_intrin.h\"\n");
if ((Zc&31)==0) {
shift=5; // AVX2 - 256-bit SIMD
mask=31;
strcpy(data_type,"__m256i");
strcpy(xor_command,"_mm256_xor_si256");
}
else if ((Zc&15)==0) {
shift=4; // SSE4 - 128-bit SIMD
mask=15;
strcpy(data_type,"__m128i");
strcpy(xor_command,"_mm_xor_si128");
}
else if ((Zc&7)==0) {
shift=3; // MMX - 64-bit SIMD
mask=7;
strcpy(data_type,"__m64i");
strcpy(xor_command,"_mm_xor_si64");
}
else {
shift=0; // no SIMD
mask=0;
strcpy(data_type,"uint8_t");
strcpy(xor_command,"scalar_xor");
fprintf(fd,"#define scalar_xor(a,b) ((a)^(b))\n");
}
fprintf(fd,"// generated code for Zc=%d, byte encoding\n",Zc);
fprintf(fd,"static inline void ldpc%d_byte(uint8_t *c,uint8_t *d) {\n",Zc);
fprintf(fd," %s *csimd=(%s *)c,*dsimd=(%s *)d;\n\n",data_type,data_type,data_type);
fprintf(fd," %s *c2,*d2;\n\n",data_type);
fprintf(fd," int i2;\n");
fprintf(fd," for (i2=0; i2<%d; i2++) {\n",Zc>>shift);
for (i2=0; i2 < 1; i2++)
{
t=Kb*Zc+i2;
//rotate matrix here
for (i5=0; i5 < Kb; i5++)
{
temp = c[i5*Zc];
memmove(&c[i5*Zc], &c[i5*Zc+1], (Zc-1)*sizeof(unsigned char));
c[i5*Zc+Zc-1] = temp;
}
// calculate each row in base graph
fprintf(fd," c2=&csimd[i2];\n");
fprintf(fd," d2=&dsimd[i2];\n");
for (i1=0; i1 < nrows-no_punctured_columns; i1++)
{
channel_temp=0;
fprintf(fd,"\n//row: %d\n",i1);
fprintf(fd," d2[%d]=",(Zc*i1)>>shift);
nind=0;
for (i3=0; i3 < Kb; i3++)
{
temp_prime=i1 * ncols + i3;
for (i4=0; i4 < no_shift_values[temp_prime]; i4++)
{
var=(int)((i3*Zc + (Gen_shift_values[ pointer_shift_values[temp_prime]+i4 ]+1)%Zc)/Zc);
int index =var*2*Zc + (i3*Zc + (Gen_shift_values[ pointer_shift_values[temp_prime]+i4 ]+1)%Zc) % Zc;
indlist[nind++] = ((index&mask)*((2*Zc)>>shift)*Kb)+(index>>shift);
}
}
for (i4=0;i4<nind-1;i4++) {
fprintf(fd,"%s(c2[%d],",xor_command,indlist[i4]);
}
fprintf(fd,"c2[%d]",indlist[i4]);
for (i4=0;i4<nind-1;i4++) fprintf(fd,")");
fprintf(fd,";\n");
d[t+i1*Zc]=channel_temp;
//channel_input[t+i1*Zc]=channel_temp;
}
fprintf(fd," }\n}\n");
}
fclose(fd);
}
else if(gen_code==0)
{
t=Kb*Zc+i2;
for (i2=0; i2 < Zc; i2++) {
//t=Kb*Zc+i2;
//rotate matrix here
for (i5=0; i5 < Kb; i5++)
......@@ -435,9 +540,10 @@ int ldpc_encoder_orig(unsigned char *test_input,unsigned char *channel_input,sho
}
if (gen_code)
printf("\n");
d[t+i1*Zc]=channel_temp;
d[i2+i1*Zc]=channel_temp;
//channel_input[t+i1*Zc]=channel_temp;
}
}
}
// information part and puncture columns
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment