Commit 918d1fd1 authored by Florian Kaltenberger's avatar Florian Kaltenberger

Merge branch 'polar-decoder-optimizations' into 'develop-nr'

Polar decoder optimizations

See merge request oai/openairinterface5g!413
parents 9b3fc8b3 76ebc5ad
......@@ -18,13 +18,17 @@ int main(int argc, char *argv[]) {
//Initiate timing. (Results depend on CPU Frequency. Therefore, might change due to performance variances during simulation.)
time_stats_t timeEncoder,timeDecoder;
time_stats_t polar_decoder_init,polar_rate_matching,decoding,bit_extraction,deinterleaving;
time_stats_t path_metric,sorting,update_LLR;
opp_enabled=1;
int decoder_int16=0;
int generate_optim_code=0;
cpu_freq_GHz = get_cpu_freq_GHz();
reset_meas(&timeEncoder);
reset_meas(&timeDecoder);
randominit(0);
crcTableInit();
//Default simulation values (Aim for iterations = 1000000.)
int itr, iterations = 1000, arguments, polarMessageType = 0; //0=PBCH, 1=DCI, -1=UCI
double SNRstart = -20.0, SNRstop = 0.0, SNRinc= 0.5; //dB
......@@ -36,7 +40,7 @@ int main(int argc, char *argv[]) {
double timeEncoderCumulative = 0, timeDecoderCumulative = 0;
uint8_t aggregation_level = 8, decoderListSize = 8, pathMetricAppr = 0;
while ((arguments = getopt (argc, argv, "s:d:f:m:i:l:a:h")) != -1)
while ((arguments = getopt (argc, argv, "s:d:f:m:i:l:a:h:qg")) != -1)
switch (arguments)
{
case 's':
......@@ -67,6 +71,17 @@ int main(int argc, char *argv[]) {
pathMetricAppr = (uint8_t) atoi(optarg);
break;
case 'q':
decoder_int16=1;
break;
case 'g':
generate_optim_code=1;
iterations=1;
SNRstart=-6.0;
SNRstop =-6.0;
decoder_int16=1;
break;
case 'h':
printf("./polartest -s SNRstart -d SNRinc -f SNRstop -m [0=PBCH|1=DCI|2=UCI] -i iterations -l decoderListSize -a pathMetricAppr\n");
exit(-1);
......@@ -136,7 +151,9 @@ int main(int argc, char *argv[]) {
uint8_t *encoderOutputByte = malloc(sizeof(uint8_t) * coderLength);
double *modulatedInput = malloc (sizeof(double) * coderLength); //channel input
double *channelOutput = malloc (sizeof(double) * coderLength); //add noise
int16_t *channelOutput_int16;
if (decoder_int16 == 1) channelOutput_int16 = (int16_t*)malloc (sizeof(int16_t) * coderLength);
t_nrPolar_paramsPtr nrPolar_params = NULL, currentPtr = NULL;
nr_polar_init(&nrPolar_params, polarMessageType, testLength, aggregation_level);
currentPtr = nr_polar_params(nrPolar_params, polarMessageType, testLength, aggregation_level);
......@@ -285,6 +302,7 @@ int main(int argc, char *argv[]) {
for (int i=0; i<currentPtr->payloadBits; i++) aPrioriArray[i] = NAN;
for (SNR = SNRstart; SNR <= SNRstop; SNR += SNRinc) {
printf("SNR %f\n",SNR);
SNR_lin = pow(10, SNR/10);
for (itr = 1; itr <= iterations; itr++) {
......@@ -318,6 +336,15 @@ int main(int argc, char *argv[]) {
modulatedInput[i]=(-1)/sqrt(2);
channelOutput[i] = modulatedInput[i] + (gaussdouble(0.0,1.0) * (1/sqrt(2*SNR_lin)));
if (decoder_int16==1) {
if (channelOutput[i] > 15) channelOutput_int16[i] = 127;
else if (channelOutput[i] < -16) channelOutput_int16[i] = -128;
else channelOutput_int16[i] = (int16_t) (8*channelOutput[i]);
}
}
start_meas(&timeDecoder);
......@@ -327,12 +354,19 @@ int main(int argc, char *argv[]) {
NR_POLAR_DECODER_LISTSIZE,
aPrioriArray,
NR_POLAR_DECODER_PATH_METRIC_APPROXIMATION);*/
decoderState = polar_decoder_aPriori(channelOutput,
estimatedOutput,
currentPtr,
NR_POLAR_DECODER_LISTSIZE,
NR_POLAR_DECODER_PATH_METRIC_APPROXIMATION,
aPrioriArray);
if (decoder_int16==0)
decoderState = polar_decoder_aPriori(channelOutput,
estimatedOutput,
currentPtr,
NR_POLAR_DECODER_LISTSIZE,
NR_POLAR_DECODER_PATH_METRIC_APPROXIMATION,
aPrioriArray);
else
decoderState = polar_decoder_int16(channelOutput_int16,
estimatedOutput,
currentPtr);
stop_meas(&timeDecoder);
/*printf("testInput: [0]->0x%08x\n", testInput[0]);
printf("estimatedOutput: [0]->0x%08x\n", estimatedOutput[0]);*/
......
/*
* Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The OpenAirInterface Software Alliance licenses this file to You under
* the OAI Public License, Version 1.1 (the "License"); you may not use this file
* except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.openairinterface.org/?page_id=698
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*-------------------------------------------------------------------------------
* For more information about the OpenAirInterface (OAI) Software Alliance:
* contact@openairinterface.org
*/
#include "nrPolar_tools/nr_polar_defs.h"
#include "nrPolar_tools/nr_polar_pbch_defs.h"
#include "nrPolar_tools/nr_polar_uci_defs.h"
void nr_polar_init(t_nrPolar_params* polarParams, int messageType) {
uint32_t poly6 = 0x84000000; // 1000100000... -> D^6+D^5+1
uint32_t poly11 = 0x63200000; //11000100001000... -> D^11+D^10+D^9+D^5+1
//uint32_t poly16 = 0x81080000; //100000010000100... - > D^16+D^12+D^5+1
//uint32_t poly24a = 0x864cfb00; //100001100100110011111011 -> D^24+D^23+D^18+D^17+D^14+D^11+D^10+D^7+D^6+D^5+D^4+D^3+D+1
//uint32_t poly24b = 0x80006300; //100000000000000001100011 -> D^24+D^23+D^6+D^5+D+1
uint32_t poly24c = 0xB2B11700; //101100101011000100010111 -> D^24...
if (messageType == 0) { //DCI
} else if (messageType == 1) { //PBCH
polarParams->n_max = NR_POLAR_PBCH_N_MAX;
polarParams->i_il = NR_POLAR_PBCH_I_IL;
polarParams->i_seg = NR_POLAR_PBCH_I_SEG;
polarParams->n_pc = NR_POLAR_PBCH_N_PC;
polarParams->n_pc_wm = NR_POLAR_PBCH_N_PC_WM;
polarParams->i_bil = NR_POLAR_PBCH_I_BIL;
polarParams->payloadBits = NR_POLAR_PBCH_PAYLOAD_BITS;
polarParams->encoderLength = NR_POLAR_PBCH_E;
polarParams->crcParityBits = NR_POLAR_PBCH_CRC_PARITY_BITS;
polarParams->K = polarParams->payloadBits + polarParams->crcParityBits; // Number of bits to encode.
polarParams->N = nr_polar_output_length(polarParams->K, polarParams->encoderLength, polarParams->n_max);
polarParams->n = log2(polarParams->N);
polarParams->crc_generator_matrix=crc24c_generator_matrix(polarParams->payloadBits);
polarParams->crc_polynomial = poly24c;
polarParams->G_N = nr_polar_kronecker_power_matrices(polarParams->n);
//polar_encoder vectors:
polarParams->nr_polar_crc = malloc(sizeof(uint8_t) * polarParams->crcParityBits);
polarParams->nr_polar_cPrime = malloc(sizeof(uint8_t) * polarParams->K);
polarParams->nr_polar_d = malloc(sizeof(uint8_t) * polarParams->N);
//Polar Coding vectors
polarParams->nr_polar_u = malloc(sizeof(uint8_t) * polarParams->N); //Decoder: nr_polar_uHat
polarParams->nr_polar_cPrime = malloc(sizeof(uint8_t) * polarParams->K); //Decoder: nr_polar_cHat
polarParams->nr_polar_b = malloc(sizeof(uint8_t) * polarParams->K); //Decoder: nr_polar_bHat
polarParams->decoder_kernel = NULL;//polar_decoder_K56_N512_E864;
} else if (messageType == 2) { //UCI
polarParams->payloadBits = NR_POLAR_PUCCH_PAYLOAD_BITS; //A depends on what they carry...
polarParams->encoderLength = NR_POLAR_PUCCH_E ; //E depends on other standards 6.3.1.4
if (polarParams->payloadBits <= 11) //Ref. 38-212, Section 6.3.1.2.2
polarParams->crcParityBits = 0; //K=A
else //Ref. 38-212, Section 6.3.1.2.1
{
if (polarParams->payloadBits < 20)
polarParams->crcParityBits = NR_POLAR_PUCCH_CRC_PARITY_BITS_SHORT;
else
polarParams->crcParityBits = NR_POLAR_PUCCH_CRC_PARITY_BITS_LONG;
if (polarParams->payloadBits >= 360 && polarParams->encoderLength >= 1088)
polarParams->i_seg = NR_POLAR_PUCCH_I_SEG_LONG; // -> C=2
else
polarParams->i_seg = NR_POLAR_PUCCH_I_SEG_SHORT; // -> C=1
}
polarParams->K = polarParams->payloadBits + polarParams->crcParityBits; // Number of bits to encode.
//K_r = K/C ; C = I_seg+1
if((polarParams->K)/(polarParams->i_seg+1)>=18 && (polarParams->K)/(polarParams->i_seg+1)<=25) //Ref. 38-212, Section 6.3.1.3.1
{
polarParams->n_max = NR_POLAR_PUCCH_N_MAX;
polarParams->i_il =NR_POLAR_PUCCH_I_IL;
polarParams->n_pc = NR_POLAR_PUCCH_N_PC_SHORT;
if( (polarParams->encoderLength - polarParams->K)/(polarParams->i_seg + 1) + 3 > 192 )
polarParams->n_pc_wm = NR_POLAR_PUCCH_N_PC_WM_LONG;
else
polarParams->n_pc_wm = NR_POLAR_PUCCH_N_PC_WM_SHORT;
}
if( (polarParams->K)/(polarParams->i_seg + 1) > 30 ) //Ref. 38-212, Section 6.3.1.3.1
{
polarParams->n_max = NR_POLAR_PUCCH_N_MAX;
polarParams->i_il =NR_POLAR_PUCCH_I_IL;
polarParams->n_pc = NR_POLAR_PUCCH_N_PC_LONG;
polarParams->n_pc_wm = NR_POLAR_PUCCH_N_PC_WM_LONG;
}
polarParams->i_bil = NR_POLAR_PUCCH_I_BIL; //Ref. 38-212, Section 6.3.1.4.1
polarParams->N = nr_polar_output_length(polarParams->K, polarParams->encoderLength, polarParams->n_max);
polarParams->n = log2(polarParams->N);
if((polarParams->payloadBits) <= 19)
{
polarParams->crc_generator_matrix=crc6_generator_matrix(polarParams->payloadBits);
polarParams->crc_polynomial = poly6;
}
else
{
polarParams->crc_generator_matrix=crc11_generator_matrix(polarParams->payloadBits);
polarParams->crc_polynomial = poly11;
}
polarParams->G_N = nr_polar_kronecker_power_matrices(polarParams->n);
//polar_encoder vectors:
polarParams->nr_polar_crc = malloc(sizeof(uint8_t) * polarParams->crcParityBits);
polarParams->nr_polar_cPrime = malloc(sizeof(uint8_t) * polarParams->K);
polarParams->nr_polar_d = malloc(sizeof(uint8_t) * polarParams->N);
//Polar Coding vectors
polarParams->nr_polar_u = malloc(sizeof(uint8_t) * polarParams->N); //Decoder: nr_polar_uHat
polarParams->nr_polar_cPrime = malloc(sizeof(uint8_t) * polarParams->K); //Decoder: nr_polar_cHat
polarParams->nr_polar_b = malloc(sizeof(uint8_t) * polarParams->K); //Decoder: nr_polar_bHat
}
polarParams->crcCorrectionBits = NR_POLAR_CRC_ERROR_CORRECTION_BITS;
polarParams->crc256Table = malloc(sizeof(uint32_t)*256);
crcTable256Init(polarParams->crc_polynomial, polarParams->crc256Table);
polarParams->Q_0_Nminus1 = nr_polar_sequence_pattern(polarParams->n);
polarParams->interleaving_pattern = malloc(sizeof(uint16_t) * polarParams->K);
nr_polar_interleaving_pattern(polarParams->K, polarParams->i_il, polarParams->interleaving_pattern);
polarParams->rate_matching_pattern = malloc(sizeof(uint16_t) * polarParams->encoderLength);
uint16_t *J = malloc(sizeof(uint16_t) * polarParams->N);
nr_polar_rate_matching_pattern(polarParams->rate_matching_pattern, J,
nr_polar_subblock_interleaver_pattern, polarParams->K, polarParams->N, polarParams->encoderLength);
polarParams->information_bit_pattern = malloc(sizeof(uint8_t) * polarParams->N);
polarParams->Q_I_N = malloc(sizeof(int16_t) * (polarParams->K + polarParams->n_pc));
polarParams->Q_F_N = malloc(sizeof(int16_t) * (polarParams->N+1)); // Last element shows the final array index assigned a value.
polarParams->Q_PC_N = malloc(sizeof(int16_t) * (polarParams->n_pc));
for (int i=0; i<=polarParams->N; i++) polarParams->Q_F_N[i] = -1; // Empty array.
nr_polar_info_bit_pattern(polarParams->information_bit_pattern,
polarParams->Q_I_N, polarParams->Q_F_N, J, polarParams->Q_0_Nminus1,
polarParams->K, polarParams->N, polarParams->encoderLength, polarParams->n_pc);
polarParams->channel_interleaver_pattern = malloc(sizeof(uint16_t) * polarParams->encoderLength);
nr_polar_channel_interleaver_pattern(polarParams->channel_interleaver_pattern,
polarParams->i_bil, polarParams->encoderLength);
polarParams->extended_crc_generator_matrix = malloc(polarParams->K * sizeof(uint8_t *)); //G_P3
uint8_t tempECGM[polarParams->K][polarParams->crcParityBits];
for (int i = 0; i < polarParams->K; i++){
polarParams->extended_crc_generator_matrix[i] = malloc(polarParams->crcParityBits * sizeof(uint8_t));
}
for (int i=0; i<polarParams->payloadBits; i++) {
for (int j=0; j<polarParams->crcParityBits; j++) {
tempECGM[i][j]=polarParams->crc_generator_matrix[i][j];
}
}
for (int i=polarParams->payloadBits; i<polarParams->K; i++) {
for (int j=0; j<polarParams->crcParityBits; j++) {
if( (i-polarParams->payloadBits) == j ){
tempECGM[i][j]=1;
} else {
tempECGM[i][j]=0;
}
}
}
for (int i=0; i<polarParams->K; i++) {
for (int j=0; j<polarParams->crcParityBits; j++) {
polarParams->extended_crc_generator_matrix[i][j]=tempECGM[polarParams->interleaving_pattern[i]][j];
}
}
build_decoder_tree(polarParams);
printf("decoder tree nodes %d\n",polarParams->tree.num_nodes);
free(J);
}
......@@ -15,7 +15,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*-------------------------------------------------------------------------------
* For more information about the OpenAirInterface (OAI) Software Alliance:
* For more information about the OpenAirInterface (OAI) Software Alliance
* contact@openairinterface.org
*/
......@@ -38,6 +38,7 @@
#include "PHY/CODING/nrPolar_tools/nr_polar_defs.h"
int8_t polar_decoder(
double *input,
uint8_t *out,
......@@ -285,7 +286,6 @@ int8_t polar_decoder_aPriori(double *input,
uint8_t pathMetricAppr,
double *aPrioriPayload)
{
uint8_t ***bit = nr_alloc_uint8_t_3D_array(polarParams->N, (polarParams->n+1), 2*listSize);
uint8_t **bitUpdated = nr_alloc_uint8_t_2D_array(polarParams->N, (polarParams->n+1)); //0=False, 1=True
uint8_t **llrUpdated = nr_alloc_uint8_t_2D_array(polarParams->N, (polarParams->n+1)); //0=False, 1=True
......@@ -355,6 +355,7 @@ int8_t polar_decoder_aPriori(double *input,
uint8_t listIndex[2*listSize], copyIndex;
for (uint16_t currentBit=0; currentBit<polarParams->N; currentBit++){
updateLLR(llr, llrUpdated, bit, bitUpdated, currentListSize, currentBit, 0, polarParams->N, (polarParams->n+1), pathMetricAppr);
if (polarParams->information_bit_pattern[currentBit]==0) { //Frozen bit.
updatePathMetric(pathMetric, llr, currentListSize, 0, currentBit, pathMetricAppr); //approximation=0 --> 11b, approximation=1 --> 12
......@@ -528,6 +529,7 @@ int8_t polar_decoder_aPriori(double *input,
*/
nr_byte2bit_uint8_32_t(polarParams->nr_polar_A, polarParams->payloadBits, out);
return(0);
}
......@@ -1034,3 +1036,42 @@ int8_t polar_decoder_dci(double *input,
nr_byte2bit_uint8_32_t(polarParams->nr_polar_A, polarParams->payloadBits, out);
return(0);
}
int8_t polar_decoder_int16(int16_t *input,
uint8_t *out,
t_nrPolar_params *polarParams)
{
int16_t d_tilde[polarParams->N];// = malloc(sizeof(double) * polarParams->N);
nr_polar_rate_matching_int16(input, d_tilde, polarParams->rate_matching_pattern, polarParams->K, polarParams->N, polarParams->encoderLength);
for (int i=0;i<polarParams->N;i++) {
if (d_tilde[i]<-128) d_tilde[i]=-128;
else if (d_tilde[i]>127) d_tilde[i]=128;
}
memcpy((void*)&polarParams->tree.root->alpha[0],(void*)&d_tilde[0],sizeof(int16_t)*polarParams->N);
/*
* SCL polar decoder.
*/
generic_polar_decoder(polarParams,polarParams->tree.root);
//Extract the information bits (û to ĉ)
nr_polar_info_bit_extraction(polarParams->nr_polar_U, polarParams->nr_polar_CPrime, polarParams->information_bit_pattern, polarParams->N);
//Deinterleaving (ĉ to b)
nr_polar_deinterleaver(polarParams->nr_polar_CPrime, polarParams->nr_polar_B, polarParams->interleaving_pattern, polarParams->K);
//Remove the CRC (â)
for (int j = 0; j < polarParams->payloadBits; j++) polarParams->nr_polar_A[j]=polarParams->nr_polar_B[j];
nr_byte2bit_uint8_32_t(polarParams->nr_polar_A, polarParams->payloadBits, out);
return(0);
}
......@@ -31,6 +31,10 @@
*/
#include "PHY/CODING/nrPolar_tools/nr_polar_defs.h"
#include "PHY/sse_intrin.h"
#include "PHY/impl_defs_top.h"
//#define DEBUG_NEW_IMPL
void updateLLR(double ***llr,
uint8_t **llrU,
......@@ -56,8 +60,9 @@ void updateLLR(double ***llr,
computeLLR(llr, row, col, i, offset, approximation);
}
}
llrU[row][col]=1;
// printf("LLR (a %f, b %f): llr[%d][%d] %f\n",32*a,32*b,col,row,32*llr[col][row]);
}
void updateBit(uint8_t ***bit,
......@@ -68,6 +73,7 @@ void updateBit(uint8_t ***bit,
uint16_t xlen,
uint8_t ylen)
{
uint16_t offset = ( xlen/(pow(2,(ylen-col))) );
for (uint8_t i=0; i<listSize; i++) {
......@@ -101,6 +107,7 @@ void updatePathMetric(double *pathMetric,
}
}
void updatePathMetric2(double *pathMetric,
double ***llr,
uint8_t listSize,
......@@ -110,6 +117,7 @@ void updatePathMetric2(double *pathMetric,
double *tempPM = malloc(sizeof(double) * listSize);
for (int i=0; i < listSize; i++) tempPM[i]=pathMetric[i];
uint8_t bitValue = 0;
if (appr) { //eq. (12)
for (uint8_t i = 0; i < listSize; i++) {
......@@ -132,7 +140,9 @@ void updatePathMetric2(double *pathMetric,
free(tempPM);
}
}
void computeLLR(double ***llr,
uint16_t row,
......@@ -152,6 +162,7 @@ void computeLLR(double ***llr,
llr[row][col][i] = log((exp(a + b) + 1) / (exp(a) + exp(b)));
}
}
void updateCrcChecksum(uint8_t **crcChecksum,
......@@ -179,3 +190,347 @@ void updateCrcChecksum2(uint8_t **crcChecksum,
}
}
}
decoder_node_t *new_decoder_node(int first_leaf_index,int level) {
decoder_node_t *node=(decoder_node_t *)malloc(sizeof(decoder_node_t));
node->first_leaf_index=first_leaf_index;
node->level=level;
node->Nv = 1<<level;
node->leaf = 0;
node->left=(decoder_node_t *)NULL;
node->right=(decoder_node_t *)NULL;
node->all_frozen=0;
node->alpha = (int16_t*)malloc16(node->Nv*sizeof(int16_t));
node->beta = (int16_t*)malloc16(node->Nv*sizeof(int16_t));
memset((void*)node->beta,-1,node->Nv*sizeof(int16_t));
return(node);
}
decoder_node_t *add_nodes(int level,int first_leaf_index,t_nrPolar_params *pp) {
int all_frozen_below=1;
int Nv = 1<<level;
decoder_node_t *new_node = new_decoder_node(first_leaf_index,level);
#ifdef DEBUG_NEW_IMPL
printf("New node %d order %d, level %d\n",pp->tree.num_nodes,Nv,level);
pp->tree.num_nodes++;
#endif
if (level==0) {
#ifdef DEBUG_NEW_IMPL
printf("leaf %d (%s)\n",first_leaf_index,pp->information_bit_pattern[first_leaf_index]==1 ? "information or crc" : "frozen");
#endif
new_node->leaf=1;
new_node->all_frozen = pp->information_bit_pattern[first_leaf_index]==0 ? 1 : 0;
return new_node; // this is a leaf node
}
for (int i=0;i<Nv;i++) {
if (pp->information_bit_pattern[i+first_leaf_index]>0) all_frozen_below=0;
}
if (all_frozen_below==0) new_node->left=add_nodes(level-1,first_leaf_index,pp);
else {
#ifdef DEBUG_NEW_IMPL
printf("aggregating frozen bits %d ... %d at level %d (%s)\n",first_leaf_index,first_leaf_index+Nv-1,level,((first_leaf_index/Nv)&1)==0?"left":"right");
#endif
new_node->leaf=1;
new_node->all_frozen=1;
}
if (all_frozen_below==0) new_node->right=add_nodes(level-1,first_leaf_index+(Nv/2),pp);
return(new_node);
}
void build_decoder_tree(t_nrPolar_params *pp) {
pp->tree.num_nodes=0;
pp->tree.root = add_nodes(pp->n,0,pp);
}
#if defined(__arm__) || defined(__aarch64__)
// translate 1-1 SIMD functions from SSE to NEON
#define __m128i int16x8_t
#define __m64 int8x8_t
#define _mm_abs_epi16(a) vabsq_s16(a)
#define _mm_min_epi16(a,b) vminq_s16(a,b)
#define _mm_subs_epi16(a,b) vsubq_s16(a,b)
#define _mm_abs_pi16(a) vabs_s16(a)
#define _mm_min_pi16(a,b) vmin_s16(a,b)
#define _mm_subs_pi16(a,b) vsub_s16(a,b)
#endif
void applyFtoleft(t_nrPolar_params *pp,decoder_node_t *node) {
int16_t *alpha_v=node->alpha;
int16_t *alpha_l=node->left->alpha;
int16_t *betal = node->left->beta;
int16_t a,b,absa,absb,maska,maskb,minabs;
#ifdef DEBUG_NEW_IMPL
printf("applyFtoleft %d, Nv %d (level %d,node->left (leaf %d, AF %d))\n",node->first_leaf_index,node->Nv,node->level,node->left->leaf,node->left->all_frozen);
for (int i=0;i<node->Nv;i++) printf("i%d (frozen %d): alpha_v[i] = %d\n",i,1-pp->information_bit_pattern[node->first_leaf_index+i],alpha_v[i]);
#endif
if (node->left->all_frozen == 0) {
#if defined(__AVX2__)
int avx2mod = (node->Nv/2)&15;
if (avx2mod == 0) {
__m256i a256,b256,absa256,absb256,minabs256;
int avx2len = node->Nv/2/16;
// printf("avx2len %d\n",avx2len);
for (int i=0;i<avx2len;i++) {
a256 =((__m256i*)alpha_v)[i];
b256 =((__m256i*)alpha_v)[i+avx2len];
absa256 =_mm256_abs_epi16(a256);
absb256 =_mm256_abs_epi16(b256);
minabs256 =_mm256_min_epi16(absa256,absb256);
((__m256i*)alpha_l)[i] =_mm256_sign_epi16(minabs256,_mm256_sign_epi16(a256,b256));
}
}
else if (avx2mod == 8) {
__m128i a128,b128,absa128,absb128,minabs128;
a128 =*((__m128i*)alpha_v);
b128 =((__m128i*)alpha_v)[1];
absa128 =_mm_abs_epi16(a128);
absb128 =_mm_abs_epi16(b128);
minabs128 =_mm_min_epi16(absa128,absb128);
*((__m128i*)alpha_l) =_mm_sign_epi16(minabs128,_mm_sign_epi16(a128,b128));
}
else if (avx2mod == 4) {
__m64 a64,b64,absa64,absb64,minabs64;
a64 =*((__m64*)alpha_v);
b64 =((__m64*)alpha_v)[1];
absa64 =_mm_abs_pi16(a64);
absb64 =_mm_abs_pi16(b64);
minabs64 =_mm_min_pi16(absa64,absb64);
*((__m64*)alpha_l) =_mm_sign_pi16(minabs64,_mm_sign_pi16(a64,b64));
}
else
#else
int sse4mod = (node->Nv/2)&7;
int sse4len = node->Nv/2/8;
#if defined(__arm__) || defined(__aarch64__)
int16x8_t signatimesb,comp1,comp2,negminabs128;
int16x8_t zero=vdupq_n_s16(0);
#endif
if (sse4mod == 0) {
for (int i=0;i<sse4len;i++) {
__m128i a128,b128,absa128,absb128,minabs128;
int sse4len = node->Nv/2/8;
a128 =*((__m128i*)alpha_v);
b128 =((__m128i*)alpha_v)[1];
absa128 =_mm_abs_epi16(a128);
absb128 =_mm_abs_epi16(b128);
minabs128 =_mm_min_epi16(absa128,absb128);
#if defined(__arm__) || defined(__aarch64__)
// unfortunately no direct equivalent to _mm_sign_epi16
signatimesb=vxorrq_s16(a128,b128);
comp1=vcltq_s16(signatimesb,zero);
comp2=vcgeq_s16(signatimesb,zero);
negminabs128=vnegq_s16(minabs128);
*((__m128i*)alpha_l) =vorrq_s16(vandq_s16(minabs128,comp0),vandq_s16(negminabs128,comp1));
#else
*((__m128i*)alpha_l) =_mm_sign_epi16(minabs128,_mm_sign_epi16(a128,b128));
#endif
}
}
else if (sse4mod == 4) {
__m64 a64,b64,absa64,absb64,minabs64;
a64 =*((__m64*)alpha_v);
b64 =((__m64*)alpha_v)[1];
absa64 =_mm_abs_pi16(a64);
absb64 =_mm_abs_pi16(b64);
minabs64 =_mm_min_pi16(absa64,absb64);
#if defined(__arm__) || defined(__aarch64__)
AssertFatal(1==0,"Need to do this still for ARM\n");
#else
*((__m64*)alpha_l) =_mm_sign_pi16(minabs64,_mm_sign_epi16(a64,b64));
#endif
}
else
#endif
{ // equvalent scalar code to above, activated only on non x86/ARM architectures
for (int i=0;i<node->Nv/2;i++) {
a=alpha_v[i];
b=alpha_v[i+(node->Nv/2)];
maska=a>>15;
maskb=b>>15;
absa=(a+maska)^maska;
absb=(b+maskb)^maskb;
minabs = absa<absb ? absa : absb;
alpha_l[i] = (maska^maskb)==0 ? minabs : -minabs;
// printf("alphal[%d] %d (%d,%d)\n",i,alpha_l[i],a,b);
}
}
if (node->Nv == 2) { // apply hard decision on left node
betal[0] = (alpha_l[0]>0) ? -1 : 1;
#ifdef DEBUG_NEW_IMPL
printf("betal[0] %d (%p)\n",betal[0],&betal[0]);
#endif
pp->nr_polar_U[node->first_leaf_index] = (1+betal[0])>>1;
#ifdef DEBUG_NEW_IMPL
printf("Setting bit %d to %d (LLR %d)\n",node->first_leaf_index,(betal[0]+1)>>1,alpha_l[0]);
#endif
}
}
}
void applyGtoright(t_nrPolar_params *pp,decoder_node_t *node) {
int16_t *alpha_v=node->alpha;
int16_t *alpha_r=node->right->alpha;
int16_t *betal = node->left->beta;
int16_t *betar = node->right->beta;
#ifdef DEBUG_NEW_IMPL
printf("applyGtoright %d, Nv %d (level %d), (leaf %d, AF %d)\n",node->first_leaf_index,node->Nv,node->level,node->right->leaf,node->right->all_frozen);
#endif
if (node->right->all_frozen == 0) {
#if defined(__AVX2__)
int avx2mod = (node->Nv/2)&15;
if (avx2mod == 0) {
int avx2len = node->Nv/2/16;
for (int i=0;i<avx2len;i++) {
((__m256i *)alpha_r)[i] =
_mm256_subs_epi16(((__m256i *)alpha_v)[i+avx2len],
_mm256_sign_epi16(((__m256i *)alpha_v)[i],
((__m256i *)betal)[i]));
}
}
else if (avx2mod == 8) {
((__m128i *)alpha_r)[0] = _mm_subs_epi16(((__m128i *)alpha_v)[1],_mm_sign_epi16(((__m128i *)alpha_v)[0],((__m128i *)betal)[0]));
}
else if (avx2mod == 4) {
((__m64 *)alpha_r)[0] = _mm_subs_pi16(((__m64 *)alpha_v)[1],_mm_sign_pi16(((__m64 *)alpha_v)[0],((__m64 *)betal)[0]));
}
else
#else
int sse4mod = (node->Nv/2)&7;
if (sse4mod == 0) {
int sse4len = node->Nv/2/8;
for (int i=0;i<sse4len;i++) {
#if defined(__arm__) || defined(__aarch64__)
((int16x8_t *)alpha_r)[0] = vsubq_s16(((int16x8_t *)alpha_v)[1],vmulq_epi16(((int16x8_t *)alpha_v)[0],((int16x8_t *)betal)[0]));
#else
((__m128i *)alpha_r)[0] = _mm_subs_epi16(((__m128i *)alpha_v)[1],_mm_sign_epi16(((__m128i *)alpha_v)[0],((__m128i *)betal)[0]));
#endif
}
}
else if (sse4mod == 4) {
#if defined(__arm__) || defined(__aarch64__)
((int16x4_t *)alpha_r)[0] = vsub_s16(((int16x4_t *)alpha_v)[1],vmul_epi16(((int16x4_t *)alpha_v)[0],((int16x4_t *)betal)[0]));
#else
((__m64 *)alpha_r)[0] = _mm_subs_pi16(((__m64 *)alpha_v)[1],_mm_sign_pi16(((__64 *)alpha_v)[0],((__m64 *)betal)[0]));
#endif
}
else
#endif
{// equvalent scalar code to above, activated only on non x86/ARM architectures
for (int i=0;i<node->Nv/2;i++) {
alpha_r[i] = alpha_v[i+(node->Nv/2)] - (betal[i]*alpha_v[i]);
}
}
if (node->Nv == 2) { // apply hard decision on right node
betar[0] = (alpha_r[0]>0) ? -1 : 1;
pp->nr_polar_U[node->first_leaf_index+1] = (1+betar[0])>>1;
#ifdef DEBUG_NEW_IMPL
printf("Setting bit %d to %d (LLR %d)\n",node->first_leaf_index+1,(betar[0]+1)>>1,alpha_r[0]);
#endif
}
}
}
int16_t all1[16] = {1,1,1,1,
1,1,1,1,
1,1,1,1,
1,1,1,1};
void computeBeta(t_nrPolar_params *pp,decoder_node_t *node) {
int16_t *betav = node->beta;
int16_t *betal = node->left->beta;
int16_t *betar = node->right->beta;
#ifdef DEBUG_NEW_IMPL
printf("Computing beta @ level %d first_leaf_index %d (all_frozen %d)\n",node->level,node->first_leaf_index,node->left->all_frozen);
#endif
if (node->left->all_frozen==0) { // if left node is not aggregation of frozen bits
#if defined(__AVX2__)
int avx2mod = (node->Nv/2)&15;
register __m256i allones=*((__m256i*)all1);
if (avx2mod == 0) {
int avx2len = node->Nv/2/16;
for (int i=0;i<avx2len;i++) {
((__m256i*)betav)[i] = _mm256_or_si256(_mm256_cmpeq_epi16(((__m256i*)betar)[i],
((__m256i*)betal)[i]),allones);
}
}
else if (avx2mod == 8) {
((__m128i*)betav)[0] = _mm_or_si128(_mm_cmpeq_epi16(((__m128i*)betar)[0],
((__m128i*)betal)[0]),*((__m128i*)all1));
}
else if (avx2mod == 4) {
((__m64*)betav)[0] = _mm_or_si64(_mm_cmpeq_pi16(((__m64*)betar)[0],
((__m64*)betal)[0]),*((__m64*)all1));
}
else
#else
int avx2mod = (node->Nv/2)&15;
if (ssr4mod == 0) {
int ssr4len = node->Nv/2/8;
register __m128i allones=*((__m128i*)all1);
for (int i=0;i<sse4len;i++) {
((__m256i*)betav)[i] = _mm_or_si128(_mm_cmpeq_epi16(((__m128i*)betar)[i],
((__m128i*)betal)[i]),allones));
}
}
else if (sse4mod == 4) {
((__m64*)betav)[0] = _mm_or_si64(_mm_cmpeq_pi16(((__m64*)betar)[0],
((__m64*)betal)[0]),*((__m64*)all1));
}
else
#endif
{
for (int i=0;i<node->Nv/2;i++) {
betav[i] = (betal[i] != betar[i]) ? 1 : -1;
}
}
}
else memcpy((void*)&betav[0],betar,(node->Nv/2)*sizeof(int16_t));
memcpy((void*)&betav[node->Nv/2],betar,(node->Nv/2)*sizeof(int16_t));
}
void generic_polar_decoder(t_nrPolar_params *pp,decoder_node_t *node) {
// Apply F to left
applyFtoleft(pp,node);
// if left is not a leaf recurse down to the left
if (node->left->leaf==0) generic_polar_decoder(pp,node->left);
applyGtoright(pp,node);
if (node->right->leaf==0) generic_polar_decoder(pp,node->right);
computeBeta(pp,node);
}
......@@ -56,6 +56,27 @@
static const uint8_t nr_polar_subblock_interleaver_pattern[32] = { 0, 1, 2, 4, 3, 5, 6, 7, 8, 16, 9, 17, 10, 18, 11, 19, 12, 20, 13, 21, 14, 22, 15, 23, 24, 25, 26, 28, 27, 29, 30, 31 };
#define Nmax 1024
#define nmax 10
typedef struct decoder_node_t_s {
struct decoder_node_t_s *left;
struct decoder_node_t_s *right;
int level;
int leaf;
int Nv;
int first_leaf_index;
int all_frozen;
int16_t *alpha;
int16_t *beta;
} decoder_node_t;
typedef struct decoder_tree_t_s {
decoder_node_t *root;
int num_nodes;
} decoder_tree_t;
struct nrPolar_params {
//messageType: 0=PBCH, 1=DCI, -1=UCI
int idx; //idx = (messageType * messageLength * aggregation_prime);
......@@ -89,7 +110,7 @@ struct nrPolar_params {
uint8_t **crc_generator_matrix; //G_P
uint8_t **G_N;
uint32_t* crc256Table;
uint8_t **extended_crc_generator_matrix;
//lowercase: bits, Uppercase: Bits stored in bytes
//polar_encoder vectors
uint8_t *nr_polar_crc;
......@@ -103,6 +124,8 @@ struct nrPolar_params {
uint8_t *nr_polar_CPrime;
uint8_t *nr_polar_B;
uint8_t *nr_polar_U;
decoder_tree_t tree;
} __attribute__ ((__packed__));
typedef struct nrPolar_params t_nrPolar_params;
typedef t_nrPolar_params *t_nrPolar_paramsPtr;
......@@ -151,6 +174,8 @@ int8_t polar_decoder_dci(double *input,
uint8_t pathMetricAppr,
uint16_t n_RNTI);
void generic_polar_decoder(t_nrPolar_params *,decoder_node_t *);
void nr_polar_init(t_nrPolar_paramsPtr *polarParams,
int8_t messageType,
uint16_t messageLength,
......@@ -196,6 +221,8 @@ void nr_polar_rate_matching(double *input,
uint16_t N,
uint16_t E);
void nr_polar_rate_matching_int16(int16_t *input, int16_t *output, uint16_t *rmp, uint16_t K, uint16_t N, uint16_t E);
void nr_polar_interleaving_pattern(uint16_t K,
uint8_t I_IL,
uint16_t *PI_k_);
......
......@@ -111,6 +111,32 @@ double ***nr_alloc_double_3D_array(uint16_t xlen, uint16_t ylen, uint16_t zlen)
return output;
}
double **nr_alloc_double_2D_array(uint16_t xlen, uint16_t ylen) {
double **output;
int i, j;
if ((output = malloc(xlen * sizeof(*output))) == NULL) {
perror("[nr_alloc_double_3D_array] Problem at 1D allocation");
return NULL;
}
for (i = 0; i < xlen; i++)
output[i] = NULL;
for (i = 0; i < xlen; i++)
if ((output[i] = malloc(ylen * sizeof *output[i])) == NULL) {
perror("[nr_alloc_double_2D_array] Problem at 2D allocation");
nr_free_double_2D_array(output, xlen);
return NULL;
}
for (i = 0; i < xlen; i++)
for (j = 0; j < ylen; j++)
output[i][j] = 0;
return output;
}
uint8_t **nr_alloc_uint8_t_2D_array(uint16_t xlen, uint16_t ylen) {
uint8_t **output;
int i, j;
......@@ -136,7 +162,7 @@ uint8_t **nr_alloc_uint8_t_2D_array(uint16_t xlen, uint16_t ylen) {
return output;
}
void nr_free_uint8_t_3D_array(uint8_t ***input, uint16_t xlen, uint16_t ylen) {
void nr_free_double_3D_array(double ***input, uint16_t xlen, uint16_t ylen) {
int i, j;
for (i = 0; i < xlen; i++) {
......@@ -148,12 +174,7 @@ void nr_free_uint8_t_3D_array(uint8_t ***input, uint16_t xlen, uint16_t ylen) {
free(input);
}
void nr_free_uint8_t_2D_array(uint8_t **input, uint16_t xlen) {
for (int i = 0; i < xlen; i++) free(input[i]);
free(input);
}
void nr_free_double_3D_array(double ***input, uint16_t xlen, uint16_t ylen) {
void nr_free_uint8_t_3D_array(uint8_t ***input, uint16_t xlen, uint16_t ylen) {
int i, j;
for (i = 0; i < xlen; i++) {
......@@ -165,15 +186,29 @@ void nr_free_double_3D_array(double ***input, uint16_t xlen, uint16_t ylen) {
free(input);
}
void nr_free_uint8_t_2D_array(uint8_t **input, uint16_t xlen) {
for (int i = 0; i < xlen; i++) free(input[i]);
free(input);
}
void nr_free_double_2D_array(double **input, uint16_t xlen) {
int i;
for (i = 0; i < xlen; i++) {
free(input[i]);
}
free(input);
}
// Modified Bubble Sort.
void nr_sort_asc_double_1D_array_ind(double *matrix, uint8_t *ind, uint8_t len) {
uint8_t swaps;
int swaps;
double temp;
uint8_t tempInd;
int tempInd;
for (uint8_t i = 0; i < len; i++) {
for (int i = 0; i < len; i++) {
swaps = 0;
for (uint8_t j = 0; j < (len - i) - 1; j++) {
for (int j = 0; j < (len - i) - 1; j++) {
if (matrix[j] > matrix[j + 1]) {
temp = matrix[j];
matrix[j] = matrix[j + 1];
......@@ -190,3 +225,28 @@ void nr_sort_asc_double_1D_array_ind(double *matrix, uint8_t *ind, uint8_t len)
break;
}
}
void nr_sort_asc_int16_1D_array_ind(int32_t *matrix, int *ind, int len) {
int swaps;
int16_t temp;
int tempInd;
for (int i = 0; i < len; i++) {
swaps = 0;
for (int j = 0; j < (len - i) - 1; j++) {
if (matrix[j] > matrix[j + 1]) {
temp = matrix[j];
matrix[j] = matrix[j + 1];
matrix[j + 1] = temp;
tempInd = ind[j];
ind[j] = ind[j + 1];
ind[j + 1] = tempInd;
swaps++;
}
}
if (swaps == 0)
break;
}
}
......@@ -313,3 +313,25 @@ void nr_polar_rate_matching(double *input, double *output, uint16_t *rmp, uint16
}
}
void nr_polar_rate_matching_int16(int16_t *input, int16_t *output, uint16_t *rmp, uint16_t K, uint16_t N, uint16_t E){
if (E>=N) { //repetition
for (int i=0; i<=N-1; i++) output[i]=0;
for (int i=0; i<=E-1; i++){
output[rmp[i]]+=input[i];
}
} else {
if ( (K/(double)E) <= (7.0/16) ) { //puncturing
for (int i=0; i<=N-1; i++) output[i]=0;
} else { //shortening
for (int i=0; i<=N-1; i++) output[i]=INFINITY;
}
for (int i=0; i<=E-1; i++){
output[rmp[i]]=input[i];
}
}
}
/*
* Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The OpenAirInterface Software Alliance licenses this file to You under
* the OAI Public License, Version 1.1 (the "License"); you may not use this file
* except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.openairinterface.org/?page_id=698
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*-------------------------------------------------------------------------------
* For more information about the OpenAirInterface (OAI) Software Alliance:
* contact@openairinterface.org
*/
#include <math.h>
#include "PHY/CODING/nrPolar_tools/nr_polar_defs.h"
void nr_polar_rate_matching_pattern(uint16_t *rmp, uint16_t *J, const uint8_t *P_i_, uint16_t K, uint16_t N, uint16_t E){
uint8_t i;
uint16_t *d, *y, ind;
d = (uint16_t *)malloc(sizeof(uint16_t) * N);
y = (uint16_t *)malloc(sizeof(uint16_t) * N);
for (int m=0; m<=N-1; m++) d[m]=m;
for (int m=0; m<=N-1; m++){
i=floor((32*m)/N);
J[m] = (P_i_[i]*(N/32)) + (m%(N/32));
y[m] = d[J[m]];
}
if (E>=N) { //repetition
for (int k=0; k<=E-1; k++) {
ind = (k%N);
rmp[k]=y[ind];
}
} else {
if ( (K/(double)E) <= (7.0/16) ) { //puncturing
for (int k=0; k<=E-1; k++) {
rmp[k]=y[k+N-E];
}
} else { //shortening
for (int k=0; k<=E-1; k++) {
rmp[k]=y[k];
}
}
}
free(d);
free(y);
}
void nr_polar_rate_matching(double *input, double *output, uint16_t *rmp, uint16_t K, uint16_t N, uint16_t E){
if (E>=N) { //repetition
for (int i=0; i<=N-1; i++) output[i]=0;
for (int i=0; i<=E-1; i++){
output[rmp[i]]+=input[i];
}
} else {
if ( (K/(double)E) <= (7.0/16) ) { //puncturing
for (int i=0; i<=N-1; i++) output[i]=0;
} else { //shortening
for (int i=0; i<=N-1; i++) output[i]=INFINITY;
}
for (int i=0; i<=E-1; i++){
output[rmp[i]]=input[i];
}
}
}
void nr_polar_rate_matching_int8(int16_t *input, int16_t *output, uint16_t *rmp, uint16_t K, uint16_t N, uint16_t E){
if (E>=N) { //repetition
for (int i=0; i<=N-1; i++) output[i]=0;
for (int i=0; i<=E-1; i++){
output[rmp[i]]+=input[i];
}
} else {
if ( (K/(double)E) <= (7.0/16) ) { //puncturing
for (int i=0; i<=N-1; i++) output[i]=0;
} else { //shortening
for (int i=0; i<=N-1; i++) output[i]=INFINITY;
}
for (int i=0; i<=E-1; i++){
output[rmp[i]]=input[i];
}
}
}
......@@ -147,6 +147,9 @@ void nr_polar_init(t_nrPolar_paramsPtr *polarParams,
free(J);
build_decoder_tree(newPolarInitNode);
printf("decoder tree nodes %d\n",newPolarInitNode->tree.num_nodes);
} else {
AssertFatal(1 == 0, "[nr_polar_init] New t_nrPolar_paramsPtr could not be created");
}
......
......@@ -215,7 +215,7 @@ int nr_generate_pbch(NR_gNB_PBCH *pbch,
uint8_t nushift;
uint8_t *xbyte = pbch->pbch_a;
memset((void*) xbyte, 0, 1);
uint8_t pbch_a_b[32];
//uint8_t pbch_a_b[32];
LOG_I(PHY, "PBCH generation started\n");
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment