Commit 09f13c21 authored by Florian Kaltenberger's avatar Florian Kaltenberger

Merge branch 'gNB_mwc18_integration' of...

Merge branch 'gNB_mwc18_integration' of https://gitlab.eurecom.fr/oai-nr/openairinterface5g into gNB_mwc18_integration

Conflicts:
	openair1/PHY/CODING/TESTBENCH/ldpctest.c
	openair1/PHY/CODING/defs.h
	openair1/PHY/CODING/ldpc_encoder2.c
parents fd08fe11 8aa3a637
...@@ -76,7 +76,7 @@ int test_ldpc(short No_iteration, ...@@ -76,7 +76,7 @@ int test_ldpc(short No_iteration,
opp_enabled=1; opp_enabled=1;
cpu_freq_GHz = get_cpu_freq_GHz(); cpu_freq_GHz = get_cpu_freq_GHz();
//short test_input[block_length]; //short test_input[block_length];
unsigned char *test_input[MAX_NUM_DLSCH_SEGMENTS]; unsigned char *test_input[MAX_NUM_DLSCH_SEGMENTS]={NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL};;
//short *c; //padded codeword //short *c; //padded codeword
short *esimated_output; short *esimated_output;
unsigned char *channel_input[MAX_NUM_DLSCH_SEGMENTS]; unsigned char *channel_input[MAX_NUM_DLSCH_SEGMENTS];
...@@ -96,9 +96,9 @@ int test_ldpc(short No_iteration, ...@@ -96,9 +96,9 @@ int test_ldpc(short No_iteration,
// generate input block // generate input block
for(j=0;j<MAX_NUM_DLSCH_SEGMENTS;j++) { for(j=0;j<MAX_NUM_DLSCH_SEGMENTS;j++) {
test_input[j]=(unsigned char *)malloc(sizeof(unsigned char) * block_length/8); test_input[j]=(unsigned char *)malloc16(sizeof(unsigned char) * block_length/8);
channel_input[j] = (unsigned char *)malloc(sizeof(unsigned char) * 68*384); channel_input[j] = (unsigned char *)malloc16(sizeof(unsigned char) * 68*384);
channel_input_optim[j] = (unsigned char *)malloc(sizeof(unsigned char) * 68*384); channel_input_optim[j] = (unsigned char *)malloc16(sizeof(unsigned char) * 68*384);
} }
modulated_input = (double *)malloc(sizeof(double) * 68*384); modulated_input = (double *)malloc(sizeof(double) * 68*384);
channel_output = (double *)malloc(sizeof(double) * 68*384); channel_output = (double *)malloc(sizeof(double) * 68*384);
...@@ -175,7 +175,7 @@ int test_ldpc(short No_iteration, ...@@ -175,7 +175,7 @@ int test_ldpc(short No_iteration,
stop_meas(&time); stop_meas(&time);
start_meas(&time_optim); start_meas(&time_optim);
ldpc_encoder_optim(test_input,channel_input_optim,block_length,nom_rate,denom_rate,n_segments,&tinput,&tprep,&tparity,&toutput); ldpc_encoder_optim_8seg(test_input,channel_input_optim,block_length,nom_rate,denom_rate,n_segments,&tinput,&tprep,&tparity,&toutput);
stop_meas(&time_optim); stop_meas(&time_optim);
if (ntrials==1) if (ntrials==1)
......
...@@ -577,6 +577,6 @@ int encode_parity_check_part_orig(unsigned char *c,unsigned char *d, short BG,sh ...@@ -577,6 +577,6 @@ int encode_parity_check_part_orig(unsigned char *c,unsigned char *d, short BG,sh
int ldpc_encoder(unsigned char *test_input,unsigned char *channel_input,short block_length, double rate); int ldpc_encoder(unsigned char *test_input,unsigned char *channel_input,short block_length, double rate);
int ldpc_encoder_orig(unsigned char *test_input,unsigned char *channel_input,short block_length,int nom_rate,int denom_rate,unsigned char gen_code); int ldpc_encoder_orig(unsigned char *test_input,unsigned char *channel_input,short block_length,int nom_rate,int denom_rate,unsigned char gen_code);
int ldpc_encoder_multi_segment(unsigned char **test_input,unsigned char **channel_input,short block_length,double rate,uint8_t n_segments); int ldpc_encoder_multi_segment(unsigned char **test_input,unsigned char **channel_input,short block_length,double rate,uint8_t n_segments);
int ldpc_encoder_optim(unsigned char **test_input,unsigned char **channel_input,short block_length,int nom_rate,int denom_rate,int n_segments,time_stats_t *tinput,time_stats_t *tprep,time_stats_t *tparity,time_stats_t *toutput); int ldpc_encoder_optim(unsigned char *test_input,unsigned char *channel_input,short block_length,int nom_rate,int denom_rate,time_stats_t *tinput,time_stats_t *tprep,time_stats_t *tparity,time_stats_t *toutput);
int ldpc_encoder_optim_8seg(unsigned char **test_input,unsigned char **channel_input,short block_length,int nom_rate,int denom_rate,int n_segments,time_stats_t *tinput,time_stats_t *tprep,time_stats_t *tparity,time_stats_t *toutput);
#endif #endif
...@@ -164,18 +164,132 @@ void encode_parity_check_part_optim(uint8_t *c,uint8_t *d, short BG,short Zc,sho ...@@ -164,18 +164,132 @@ void encode_parity_check_part_optim(uint8_t *c,uint8_t *d, short BG,short Zc,sho
} }
int ldpc_encoder_optim(unsigned char **test_input,unsigned char **channel_input,short block_length,int nom_rate,int denom_rate,int n_segments,time_stats_t *tinput,time_stats_t *tprep,time_stats_t *tparity,time_stats_t *toutput)
int ldpc_encoder_optim(unsigned char *test_input,unsigned char *channel_input,short block_length,int nom_rate,int denom_rate,time_stats_t *tinput,time_stats_t *tprep,time_stats_t *tparity,time_stats_t *toutput)
{ {
short BG,Zc,Kb,nrows,ncols; short BG,Zc,Kb,nrows,ncols;
int i,i1,j; int i,i1,j;
int no_punctured_columns,removed_bit; int no_punctured_columns,removed_bit;
//Table of possible lifting sizes //Table of possible lifting sizes
short lift_size[51]= {2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,18,20,22,24,26,28,30,32,36,40,44,48,52,56,60,64,72,80,88,96,104,112,120,128,144,160,176,192,208,224,240,256,288,320,352,384}; short lift_size[51]= {2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,18,20,22,24,26,28,30,32,36,40,44,48,52,56,60,64,72,80,88,96,104,112,120,128,144,160,176,192,208,224,240,256,288,320,352,384};
int simd_size; int simd_size;
char temp; char temp;
//determine number of bits in codeword
if (block_length>3840)
{
BG=1;
Kb = 22;
nrows=46; //parity check bits
ncols=22; //info bits
}
else if (block_length<=3840)
{
BG=2;
nrows=42; //parity check bits
ncols=10; // info bits
if (block_length>640)
Kb = 10;
else if (block_length>560)
Kb = 9;
else if (block_length>192)
Kb = 8;
else
Kb = 6;
}
//find minimum value in all sets of lifting size
Zc=0;
for (i1=0; i1 < 51; i1++)
{
if (lift_size[i1] >= (double) block_length/Kb)
{
Zc = lift_size[i1];
//printf("%d\n",Zc);
break;
}
}
AssertFatal(Zc>0,"no valid Zc found for block length %d\n",block_length);
if ((Zc&31) > 0) simd_size = 16;
else simd_size = 32;
unsigned char c[22*Zc] __attribute__((aligned(32))); //padded input, unpacked, max size
unsigned char d[46*Zc] __attribute__((aligned(32))); //coded parity part output, unpacked, max size
unsigned char c_extension[2*22*Zc*simd_size] __attribute__((aligned(32))); //double size matrix of c
// calculate number of punctured bits
no_punctured_columns=(int)((nrows-2)*Zc+block_length-block_length*3)/Zc;
removed_bit=(nrows-no_punctured_columns-2) * Zc+block_length-(int)(block_length*3);
// printf("%d\n",no_punctured_columns);
// printf("%d\n",removed_bit);
// unpack input
memset(c,0,sizeof(unsigned char) * ncols * Zc);
memset(d,0,sizeof(unsigned char) * nrows * Zc);
start_meas(tinput);
for (i=0; i<block_length; i++) {
c[i] = (test_input[i/8]&(1<<(i&7)))>>(i&7);
//printf("c(%d,%d)=%d\n",j,i,temp);
}
stop_meas(tinput);
if ((BG==1) || (BG==2 && Zc>64)) {
// extend matrix
start_meas(tprep);
for (i1=0; i1 < ncols; i1++)
{
memcpy(&c_extension[2*i1*Zc], &c[i1*Zc], Zc*sizeof(unsigned char));
memcpy(&c_extension[(2*i1+1)*Zc], &c[i1*Zc], Zc*sizeof(unsigned char));
}
for (i1=1;i1<simd_size;i1++) {
memcpy(&c_extension[(2*ncols*Zc*i1)], &c_extension[i1], (2*ncols*Zc*sizeof(unsigned char))-i1);
// memset(&c_extension[(2*ncols*Zc*i1)],0,i1);
/*
printf("shift %d: ",i1);
for (int j=0;j<64;j++) printf("%d ",c_extension[(2*ncols*Zc*i1)+j]);
printf("\n");
*/
}
stop_meas(tprep);
//parity check part
start_meas(tparity);
encode_parity_check_part_optim(c_extension, d, BG, Zc, Kb);
stop_meas(tparity);
}
else {
if (encode_parity_check_part_orig(c, d, BG, Zc, Kb, block_length)!=0) {
printf("Problem with encoder\n");
return(-1);
}
}
start_meas(toutput);
// information part and puncture columns
memcpy(&channel_input[0], &c[2*Zc], (block_length-2*Zc)*sizeof(unsigned char));
memcpy(&channel_input[block_length-2*Zc], &d[0], ((nrows-no_punctured_columns) * Zc-removed_bit)*sizeof(unsigned char));
stop_meas(toutput);
return 0;
}
int ldpc_encoder_optim_8seg(unsigned char **test_input,unsigned char **channel_input,short block_length,int nom_rate,int denom_rate,int n_segments,time_stats_t *tinput,time_stats_t *tprep,time_stats_t *tparity,time_stats_t *toutput)
{
short BG,Zc,Kb,nrows,ncols;
int i,i1,j;
int no_punctured_columns,removed_bit;
//Table of possible lifting sizes
short lift_size[51]= {2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,18,20,22,24,26,28,30,32,36,40,44,48,52,56,60,64,72,80,88,96,104,112,120,128,144,160,176,192,208,224,240,256,288,320,352,384};
int simd_size;
AssertFatal(n_segments>0&&n_segments<=8,"0 < n_segments %d <= 8\n",n_segments); AssertFatal(n_segments>0&&n_segments<=8,"0 < n_segments %d <= 8\n",n_segments);
//determine number of bits in codeword //determine number of bits in codeword
...@@ -233,14 +347,44 @@ int ldpc_encoder_optim(unsigned char **test_input,unsigned char **channel_input, ...@@ -233,14 +347,44 @@ int ldpc_encoder_optim(unsigned char **test_input,unsigned char **channel_input,
memset(d,0,sizeof(unsigned char) * nrows * Zc); memset(d,0,sizeof(unsigned char) * nrows * Zc);
start_meas(tinput); start_meas(tinput);
#if 0
char temp;
for (i=0; i<block_length; i++) { for (i=0; i<block_length; i++) {
for (j=0; j<n_segments; j++) { for (j=0; j<n_segments; j++) {
temp = (test_input[j][i/8]&(1<<(i&7)))>>(i&7); temp = (test_input[j][i/8]&(1<<(i&7)))>>(i&7);
//printf("c(%d,%d)=%d\n",j,i,temp); //printf("c(%d,%d)=%d\n",j,i,temp);
c[i] |= (temp << j); c[i] |= (temp << j);
} }
//printf("c[%d]=%d\n",i,c[i]);
} }
#else
#ifdef __AVX2__
__m256i shufmask = _mm256_set_epi64x(0x0303030303030303, 0x0202020202020202,0x0101010101010101, 0x0000000000000000);
__m256i andmask = _mm256_set1_epi64x(0x8040201008040201); // every 8 bits -> 8 bytes, pattern repeats.
__m256i zero256 = _mm256_setzero_si256();
__m256i masks[8];
register __m256i c256;
masks[0] = _mm256_set1_epi8(0x1);
masks[1] = _mm256_set1_epi8(0x2);
masks[2] = _mm256_set1_epi8(0x4);
masks[3] = _mm256_set1_epi8(0x8);
masks[4] = _mm256_set1_epi8(0x10);
masks[5] = _mm256_set1_epi8(0x20);
masks[6] = _mm256_set1_epi8(0x40);
masks[7] = _mm256_set1_epi8(0x80);
for (i=0; i<block_length>>5; i++) {
c256 = _mm256_and_si256(_mm256_cmpeq_epi8(_mm256_andnot_si256(_mm256_shuffle_epi8(_mm256_set1_epi32(((uint32_t*)test_input[0])[i]), shufmask),andmask),zero256),masks[0]);
for (j=1; j<n_segments; j++) {
c256 = _mm256_or_si256(_mm256_and_si256(_mm256_cmpeq_epi8(_mm256_andnot_si256(_mm256_shuffle_epi8(_mm256_set1_epi32(((uint32_t*)test_input[j])[i]), shufmask),andmask),zero256),masks[j]),c256);
}
((__m256i *)c)[i] = c256;
}
#else
AssertFatal(1==0,"Need AVX2 for this\n");
#endif
#endif
stop_meas(tinput); stop_meas(tinput);
if ((BG==1) || (BG==2 && Zc>64)) { if ((BG==1) || (BG==2 && Zc>64)) {
...@@ -278,13 +422,34 @@ int ldpc_encoder_optim(unsigned char **test_input,unsigned char **channel_input, ...@@ -278,13 +422,34 @@ int ldpc_encoder_optim(unsigned char **test_input,unsigned char **channel_input,
memcpy(&channel_input[0], &c[2*Zc], (block_length-2*Zc)*sizeof(unsigned char)); memcpy(&channel_input[0], &c[2*Zc], (block_length-2*Zc)*sizeof(unsigned char));
memcpy(&channel_input[block_length-2*Zc], &d[0], ((nrows-no_punctured_columns) * Zc-removed_bit)*sizeof(unsigned char)); memcpy(&channel_input[block_length-2*Zc], &d[0], ((nrows-no_punctured_columns) * Zc-removed_bit)*sizeof(unsigned char));
*/ */
#if 0
for (i=0;i<(block_length-2*Zc);i++) for (i=0;i<(block_length-2*Zc);i++)
for (j=0; j<n_segments; j++) for (j=0; j<n_segments; j++)
channel_input[j][i] = (c[2*Zc+i]>>j)&1; channel_input[j][i] = (c[2*Zc+i]>>j)&1;
for (i=0;i<((nrows-no_punctured_columns) * Zc-removed_bit);i++) for (i=0;i<((nrows-no_punctured_columns) * Zc-removed_bit);i++)
for (j=0; j<n_segments; j++) for (j=0; j<n_segments; j++)
channel_input[j][block_length-2*Zc+i] = (d[i]>>j)&1; channel_input[j][block_length-2*Zc+i] = (d[i]>>j)&1;
#else
#ifdef __AVX2__
uint32_t l1 = (block_length-(2*Zc))>>5;
uint32_t l2 = ((nrows-no_punctured_columns) * Zc-removed_bit)>>5;
AssertFatal(((2*Zc)&31) == 0,"2*Zc needs to be a multiple of 32 for now\n");
AssertFatal(((block_length-(2*Zc))&31) == 0,"block_length-(2*Zc) needs to be a multiple of 32 for now\n");
__m256i *c256p = (__m256i *)&c[2*Zc];
__m256i *d256p = (__m256i *)&d[0];
// if (((block_length-(2*Zc))&31)>0) l1++;
for (i=0;i<l1;i++)
for (j=0;j<n_segments;j++) ((__m256i *)channel_input[j])[i] = _mm256_and_si256(_mm256_srai_epi16(c256p[i],j),masks[0]);
// if ((((nrows-no_punctured_columns) * Zc-removed_bit)&31)>0) l2++;
for (i1=0;i1<l2;i1++,i++)
for (j=0;j<n_segments;j++) ((__m256i *)channel_input[j])[i] = _mm256_and_si256(_mm256_srai_epi16(d256p[i1],j),masks[0]);
#else
AssertFatal(1==0,"Need AVX2 for now\n");
#endif
#endif
stop_meas(toutput); stop_meas(toutput);
return 0; return 0;
} }
...@@ -652,6 +652,7 @@ int dlsch_encoding_all(PHY_VARS_eNB *eNB, ...@@ -652,6 +652,7 @@ int dlsch_encoding_all(PHY_VARS_eNB *eNB,
time_stats_t *i_stats) time_stats_t *i_stats)
{ {
int encoding_return = 0; int encoding_return = 0;
/*
unsigned int L,C,B; unsigned int L,C,B;
B = dlsch->harq_processes[dlsch->harq_ids[subframe]]->B; B = dlsch->harq_processes[dlsch->harq_ids[subframe]]->B;
if(B<=6144) if(B<=6144)
...@@ -669,7 +670,7 @@ int dlsch_encoding_all(PHY_VARS_eNB *eNB, ...@@ -669,7 +670,7 @@ int dlsch_encoding_all(PHY_VARS_eNB *eNB,
} }
} }
if(0/*C >= 8 && get_nprocs()>8 && codingw*/)//one main three worker if(C >= 8 && get_nprocs()>8 && codingw)//one main three worker
{ {
encoding_return = encoding_return =
dlsch_encoding_2threads(eNB, dlsch_encoding_2threads(eNB,
...@@ -725,6 +726,7 @@ int dlsch_encoding_all(PHY_VARS_eNB *eNB, ...@@ -725,6 +726,7 @@ int dlsch_encoding_all(PHY_VARS_eNB *eNB,
} }
else else
{ {
*/
encoding_return = encoding_return =
dlsch_encoding(eNB, dlsch_encoding(eNB,
a, a,
...@@ -735,7 +737,7 @@ int dlsch_encoding_all(PHY_VARS_eNB *eNB, ...@@ -735,7 +737,7 @@ int dlsch_encoding_all(PHY_VARS_eNB *eNB,
rm_stats, rm_stats,
te_stats, te_stats,
i_stats); i_stats);
} //}
return encoding_return; return encoding_return;
} }
...@@ -918,7 +920,8 @@ int dlsch_encoding(PHY_VARS_eNB *eNB, ...@@ -918,7 +920,8 @@ int dlsch_encoding(PHY_VARS_eNB *eNB,
start_meas(te_stats); start_meas(te_stats);
//ldpc_encoder((unsigned char*)dlsch->harq_processes[harq_pid]->c[r],(unsigned char*)&dlsch->harq_processes[harq_pid]->d[r][96],Kr,1.0/3.0); //ldpc_encoder((unsigned char*)dlsch->harq_processes[harq_pid]->c[r],(unsigned char*)&dlsch->harq_processes[harq_pid]->d[r][96],Kr,1.0/3.0);
ldpc_encoder_optim((unsigned char*)dlsch->harq_processes[harq_pid]->c,d_tmp,Kr,1,3,dlsch->harq_processes[harq_pid]->C,NULL,NULL,NULL,NULL); //ldpc_encoder_optim((unsigned char*)dlsch->harq_processes[harq_pid]->c[r],(unsigned char*)&dlsch->harq_processes[harq_pid]->d[r][96],Kr,1,3,NULL,NULL,NULL,NULL);
ldpc_encoder_optim_8seg(dlsch->harq_processes[harq_pid]->c,d_tmp,Kr,1,3,dlsch->harq_processes[harq_pid]->C,NULL,NULL,NULL,NULL);
stop_meas(te_stats); stop_meas(te_stats);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment