nr_polar_decoding_tools.c 16.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
/*
 * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The OpenAirInterface Software Alliance licenses this file to You under
 * the OAI Public License, Version 1.1  (the "License"); you may not use this file
 * except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.openairinterface.org/?page_id=698
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *-------------------------------------------------------------------------------
 * For more information about the OpenAirInterface (OAI) Software Alliance:
 *      contact@openairinterface.org
 */

yilmazt's avatar
yilmazt committed
22 23 24 25 26 27 28 29 30 31 32
/*!\file PHY/CODING/nrPolar_tools/nr_polar_decoding_tools.c
 * \brief
 * \author Turker Yilmaz
 * \date 2018
 * \version 0.1
 * \company EURECOM
 * \email turker.yilmaz@eurecom.fr
 * \note
 * \warning
*/

Guy De Souza's avatar
Guy De Souza committed
33
#include "PHY/CODING/nrPolar_tools/nr_polar_defs.h"
34
#include "PHY/sse_intrin.h"
35
#include "PHY/impl_defs_top.h"
36

37
//#define DEBUG_NEW_IMPL 1
38

yilmazt's avatar
yilmazt committed
39
void updateLLR(double ***llr,
40 41 42 43 44 45 46
			   uint8_t **llrU,
			   uint8_t ***bit,
			   uint8_t **bitU,
			   uint8_t listSize,
			   uint16_t row,
			   uint16_t col,
			   uint16_t xlen,
47
			   uint8_t ylen)
yilmazt's avatar
yilmazt committed
48
{
Guy De Souza's avatar
Guy De Souza committed
49 50 51 52
	uint16_t offset = (xlen/(pow(2,(ylen-col-1))));
	for (uint8_t i=0; i<listSize; i++) {
		if (( (row) % (2*offset) ) >= offset ) {
			if(bitU[row-offset][col]==0) updateBit(bit, bitU, listSize, (row-offset), col, xlen, ylen);
53 54
			if(llrU[row-offset][col+1]==0) updateLLR(llr, llrU, bit, bitU, listSize, (row-offset), (col+1), xlen, ylen);
			if(llrU[row][col+1]==0) updateLLR(llr, llrU, bit, bitU, listSize, row, (col+1), xlen, ylen);
Guy De Souza's avatar
Guy De Souza committed
55 56
			llr[row][col][i] = (pow((-1),bit[row-offset][col][i])*llr[row-offset][col+1][i]) + llr[row][col+1][i];
		} else {
57 58 59
			if(llrU[row][col+1]==0) updateLLR(llr, llrU, bit, bitU, listSize, row, (col+1), xlen, ylen);
			if(llrU[row+offset][col+1]==0) updateLLR(llr, llrU, bit, bitU, listSize, (row+offset), (col+1), xlen, ylen);
			computeLLR(llr, row, col, i, offset);
Guy De Souza's avatar
Guy De Souza committed
60
		}
61
	}
Guy De Souza's avatar
Guy De Souza committed
62
	llrU[row][col]=1;
63

64
	//	printf("LLR (a %f, b %f): llr[%d][%d] %f\n",32*a,32*b,col,row,32*llr[col][row]);
65 66
}

yilmazt's avatar
yilmazt committed
67 68 69 70 71 72 73 74
void updateBit(uint8_t ***bit,
			   uint8_t **bitU,
			   uint8_t listSize,
			   uint16_t row,
			   uint16_t col,
			   uint16_t xlen,
			   uint8_t ylen)
{
Guy De Souza's avatar
Guy De Souza committed
75 76 77 78 79 80 81 82 83 84 85 86 87 88
	uint16_t offset = ( xlen/(pow(2,(ylen-col))) );

	for (uint8_t i=0; i<listSize; i++) {
		if (( (row) % (2*offset) ) >= offset ) {
			if (bitU[row][col-1]==0) updateBit(bit, bitU, listSize, row, (col-1), xlen, ylen);
			bit[row][col][i] = bit[row][col-1][i];
		} else {
			if (bitU[row][col-1]==0) updateBit(bit, bitU, listSize, row, (col-1), xlen, ylen);
			if (bitU[row+offset][col-1]==0) updateBit(bit, bitU, listSize, (row+offset), (col-1), xlen, ylen);
			bit[row][col][i] = ( (bit[row][col-1][i]+bit[row+offset][col-1][i]) % 2);
		}
	}

	bitU[row][col]=1;
89 90
}

yilmazt's avatar
yilmazt committed
91
void updatePathMetric(double *pathMetric,
92
		              double ***llr,
yilmazt's avatar
yilmazt committed
93 94
					  uint8_t listSize,
					  uint8_t bitValue,
95
					  uint16_t row)
yilmazt's avatar
yilmazt committed
96
{
97 98 99 100
	int8_t multiplier = (2*bitValue) - 1;
	for (uint8_t i=0; i<listSize; i++)
		pathMetric[i] += log ( 1 + exp(multiplier*llr[row][0][i]) ) ; //eq. (11b)

Guy De Souza's avatar
Guy De Souza committed
101
}
102

yilmazt's avatar
yilmazt committed
103 104 105
void updatePathMetric2(double *pathMetric,
					   double ***llr,
					   uint8_t listSize,
106
					   uint16_t row)
yilmazt's avatar
yilmazt committed
107
{
Guy De Souza's avatar
Guy De Souza committed
108
	double *tempPM = malloc(sizeof(double) * listSize);
109
	memcpy(tempPM, pathMetric, (sizeof(double) * listSize));
110

Guy De Souza's avatar
Guy De Souza committed
111
	uint8_t bitValue = 0;
112 113 114
	int8_t multiplier = (2 * bitValue) - 1;
	for (uint8_t i = 0; i < listSize; i++)
		pathMetric[i] += log(1 + exp(multiplier * llr[row][0][i])); //eq. (11b)
115

Guy De Souza's avatar
Guy De Souza committed
116
	bitValue = 1;
117 118 119
	multiplier = (2 * bitValue) - 1;
	for (uint8_t i = listSize; i < 2*listSize; i++)
		pathMetric[i] = tempPM[(i-listSize)] + log(1 + exp(multiplier * llr[row][0][(i-listSize)])); //eq. (11b)
120

Guy De Souza's avatar
Guy De Souza committed
121
	free(tempPM);
122
}
123

yilmazt's avatar
yilmazt committed
124 125 126 127
void computeLLR(double ***llr,
				uint16_t row,
				uint16_t col,
				uint8_t i,
128
				uint16_t offset)
yilmazt's avatar
yilmazt committed
129
{
Guy De Souza's avatar
Guy De Souza committed
130 131
	double a = llr[row][col + 1][i];
	double b = llr[row + offset][col + 1][i];
132
	llr[row][col][i] = log((exp(a + b) + 1) / (exp(a) + exp(b))); //eq. (8a)
Guy De Souza's avatar
Guy De Souza committed
133 134
}

yilmazt's avatar
yilmazt committed
135 136 137 138 139 140
void updateCrcChecksum(uint8_t **crcChecksum,
					   uint8_t **crcGen,
					   uint8_t listSize,
					   uint32_t i2,
					   uint8_t len)
{
Guy De Souza's avatar
Guy De Souza committed
141 142 143 144 145 146
	for (uint8_t i = 0; i < listSize; i++) {
		for (uint8_t j = 0; j < len; j++) {
			crcChecksum[j][i] = ( (crcChecksum[j][i] + crcGen[i2][j]) % 2 );
		}
	}
}
147

yilmazt's avatar
yilmazt committed
148 149 150 151 152 153
void updateCrcChecksum2(uint8_t **crcChecksum,
						uint8_t **crcGen,
						uint8_t listSize,
						uint32_t i2,
						uint8_t len)
{
Guy De Souza's avatar
Guy De Souza committed
154 155 156 157 158
	for (uint8_t i = 0; i < listSize; i++) {
		for (uint8_t j = 0; j < len; j++) {
			crcChecksum[j][i+listSize] = ( (crcChecksum[j][i] + crcGen[i2][j]) % 2 );
		}
	}
159 160
}

161 162


163
decoder_node_t *new_decoder_node(int first_leaf_index, int level) {
164 165 166 167 168 169 170 171 172 173 174

  decoder_node_t *node=(decoder_node_t *)malloc(sizeof(decoder_node_t));

  node->first_leaf_index=first_leaf_index;
  node->level=level;
  node->Nv = 1<<level;
  node->leaf = 0;
  node->left=(decoder_node_t *)NULL;
  node->right=(decoder_node_t *)NULL;
  node->all_frozen=0;
  node->alpha  = (int16_t*)malloc16(node->Nv*sizeof(int16_t));
Raymond Knopp's avatar
Raymond Knopp committed
175
  node->beta   = (int16_t*)malloc16(node->Nv*sizeof(int16_t));
176 177 178 179 180
  memset((void*)node->beta,-1,node->Nv*sizeof(int16_t));
  
  return(node);
}

181
decoder_node_t *add_nodes(int level, int first_leaf_index, t_nrPolar_params *polarParams) {
182

183
  int all_frozen_below = 1;
184
  int Nv = 1<<level;
185
  decoder_node_t *new_node = new_decoder_node(first_leaf_index, level);
186
#ifdef DEBUG_NEW_IMPL
187
  printf("New node %d order %d, level %d\n",polarParams->tree.num_nodes,Nv,level);
188
#endif
189
  polarParams->tree.num_nodes++;
190 191
  if (level==0) {
#ifdef DEBUG_NEW_IMPL
192
    printf("leaf %d (%s)\n", first_leaf_index, polarParams->information_bit_pattern[first_leaf_index]==1 ? "information or crc" : "frozen");
193 194
#endif
    new_node->leaf=1;
195
    new_node->all_frozen = polarParams->information_bit_pattern[first_leaf_index]==0 ? 1 : 0;
196 197 198 199
    return new_node; // this is a leaf node
  }

  for (int i=0;i<Nv;i++) {
200 201
    if (polarParams->information_bit_pattern[i+first_leaf_index]>0)
    	all_frozen_below=0;
202
  }
203 204 205

  if (all_frozen_below==0)
	  new_node->left=add_nodes(level-1, first_leaf_index, polarParams);
206
  else {
207
#ifdef DEBUG_NEW_IMPL
208
    printf("aggregating frozen bits %d ... %d at level %d (%s)\n",first_leaf_index,first_leaf_index+Nv-1,level,((first_leaf_index/Nv)&1)==0?"left":"right");
209 210 211 212
#endif
    new_node->leaf=1;
    new_node->all_frozen=1;
  }
213 214
  if (all_frozen_below==0)
	  new_node->right=add_nodes(level-1,first_leaf_index+(Nv/2),polarParams);
215

216 217 218
#ifdef DEBUG_NEW_IMPL
  printf("new_node (%d): first_leaf_index %d, left %p, right %p\n",Nv,first_leaf_index,new_node->left,new_node->right);
#endif
219 220

  return(new_node);
221 222
}

223 224 225 226
void build_decoder_tree(t_nrPolar_params *polarParams)
{
  polarParams->tree.num_nodes=0;
  polarParams->tree.root = add_nodes(polarParams->n,0,polarParams);
227
#ifdef DEBUG_NEW_IMPL
228
  printf("root : left %p, right %p\n",polarParams->tree.root->left,polarParams->tree.root->right);
229
#endif
230 231
}

232 233 234 235 236 237 238 239 240 241 242 243
#if defined(__arm__) || defined(__aarch64__)
// translate 1-1 SIMD functions from SSE to NEON
#define __m128i int16x8_t
#define __m64 int8x8_t
#define _mm_abs_epi16(a) vabsq_s16(a)
#define _mm_min_epi16(a,b) vminq_s16(a,b)
#define _mm_subs_epi16(a,b) vsubq_s16(a,b)
#define _mm_abs_pi16(a) vabs_s16(a)
#define _mm_min_pi16(a,b) vmin_s16(a,b)
#define _mm_subs_pi16(a,b) vsub_s16(a,b)
#endif

244
void applyFtoleft(const t_nrPolar_params *pp, decoder_node_t *node) {
245 246 247 248 249 250 251 252 253 254 255 256
  int16_t *alpha_v=node->alpha;
  int16_t *alpha_l=node->left->alpha;
  int16_t *betal = node->left->beta;
  int16_t a,b,absa,absb,maska,maskb,minabs;

#ifdef DEBUG_NEW_IMPL
  printf("applyFtoleft %d, Nv %d (level %d,node->left (leaf %d, AF %d))\n",node->first_leaf_index,node->Nv,node->level,node->left->leaf,node->left->all_frozen);


  for (int i=0;i<node->Nv;i++) printf("i%d (frozen %d): alpha_v[i] = %d\n",i,1-pp->information_bit_pattern[node->first_leaf_index+i],alpha_v[i]);
#endif

257
 
258 259 260 261 262 263 264

  if (node->left->all_frozen == 0) {
#if defined(__AVX2__)
    int avx2mod = (node->Nv/2)&15;
    if (avx2mod == 0) {
      __m256i a256,b256,absa256,absb256,minabs256;
      int avx2len = node->Nv/2/16;
265 266

      //      printf("avx2len %d\n",avx2len);
267 268 269 270 271 272
      for (int i=0;i<avx2len;i++) {
	a256       =((__m256i*)alpha_v)[i];
	b256       =((__m256i*)alpha_v)[i+avx2len];
	absa256    =_mm256_abs_epi16(a256);
	absb256    =_mm256_abs_epi16(b256);
	minabs256  =_mm256_min_epi16(absa256,absb256);
273
	((__m256i*)alpha_l)[i] =_mm256_sign_epi16(minabs256,_mm256_sign_epi16(a256,b256));
274 275 276 277 278 279 280 281 282
      }
    }
    else if (avx2mod == 8) {
      __m128i a128,b128,absa128,absb128,minabs128;
      a128       =*((__m128i*)alpha_v);
      b128       =((__m128i*)alpha_v)[1];
      absa128    =_mm_abs_epi16(a128);
      absb128    =_mm_abs_epi16(b128);
      minabs128  =_mm_min_epi16(absa128,absb128);
283
      *((__m128i*)alpha_l) =_mm_sign_epi16(minabs128,_mm_sign_epi16(a128,b128));
284 285 286 287 288 289 290 291
    }
    else if (avx2mod == 4) {
      __m64 a64,b64,absa64,absb64,minabs64;
      a64       =*((__m64*)alpha_v);
      b64       =((__m64*)alpha_v)[1];
      absa64    =_mm_abs_pi16(a64);
      absb64    =_mm_abs_pi16(b64);
      minabs64  =_mm_min_pi16(absa64,absb64);
292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336
      *((__m64*)alpha_l) =_mm_sign_pi16(minabs64,_mm_sign_pi16(a64,b64));
    }
    else
#else
    int sse4mod = (node->Nv/2)&7;
    int sse4len = node->Nv/2/8;
#if defined(__arm__) || defined(__aarch64__)
    int16x8_t signatimesb,comp1,comp2,negminabs128;
    int16x8_t zero=vdupq_n_s16(0);
#endif

    if (sse4mod == 0) {
      for (int i=0;i<sse4len;i++) {
	__m128i a128,b128,absa128,absb128,minabs128;
	int sse4len = node->Nv/2/8;
	
	a128       =*((__m128i*)alpha_v);
	b128       =((__m128i*)alpha_v)[1];
	absa128    =_mm_abs_epi16(a128);
	absb128    =_mm_abs_epi16(b128);
	minabs128  =_mm_min_epi16(absa128,absb128);
#if defined(__arm__) || defined(__aarch64__)
	// unfortunately no direct equivalent to _mm_sign_epi16
	signatimesb=vxorrq_s16(a128,b128);
	comp1=vcltq_s16(signatimesb,zero);
	comp2=vcgeq_s16(signatimesb,zero);
	negminabs128=vnegq_s16(minabs128);
	*((__m128i*)alpha_l) =vorrq_s16(vandq_s16(minabs128,comp0),vandq_s16(negminabs128,comp1));
#else
	*((__m128i*)alpha_l) =_mm_sign_epi16(minabs128,_mm_sign_epi16(a128,b128));
#endif
      }
    }
    else if (sse4mod == 4) {
      __m64 a64,b64,absa64,absb64,minabs64;
      a64       =*((__m64*)alpha_v);
      b64       =((__m64*)alpha_v)[1];
      absa64    =_mm_abs_pi16(a64);
      absb64    =_mm_abs_pi16(b64);
      minabs64  =_mm_min_pi16(absa64,absb64);
#if defined(__arm__) || defined(__aarch64__)
	AssertFatal(1==0,"Need to do this still for ARM\n");
#else
      *((__m64*)alpha_l) =_mm_sign_pi16(minabs64,_mm_sign_epi16(a64,b64));
#endif
337
    }
338

339 340
    else
#endif
341
    { // equivalent scalar code to above, activated only on non x86/ARM architectures
342
      for (int i=0;i<node->Nv/2;i++) {
343 344 345 346 347 348 349 350 351 352
    	  a=alpha_v[i];
    	  b=alpha_v[i+(node->Nv/2)];
    	  maska=a>>15;
    	  maskb=b>>15;
    	  absa=(a+maska)^maska;
    	  absb=(b+maskb)^maskb;
    	  minabs = absa<absb ? absa : absb;
    	  alpha_l[i] = (maska^maskb)==0 ? minabs : -minabs;
    	  //	printf("alphal[%d] %d (%d,%d)\n",i,alpha_l[i],a,b);
    	  }
353 354 355 356 357 358
    }
    if (node->Nv == 2) { // apply hard decision on left node
      betal[0] = (alpha_l[0]>0) ? -1 : 1;
#ifdef DEBUG_NEW_IMPL
      printf("betal[0] %d (%p)\n",betal[0],&betal[0]);
#endif
359
      pp->nr_polar_U[node->first_leaf_index] = (1+betal[0])>>1; 
360 361 362 363 364 365 366
#ifdef DEBUG_NEW_IMPL
      printf("Setting bit %d to %d (LLR %d)\n",node->first_leaf_index,(betal[0]+1)>>1,alpha_l[0]);
#endif
    }
  }
}

367
void applyGtoright(const t_nrPolar_params *pp,decoder_node_t *node) {
368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393

  int16_t *alpha_v=node->alpha;
  int16_t *alpha_r=node->right->alpha;
  int16_t *betal = node->left->beta;
  int16_t *betar = node->right->beta;

#ifdef DEBUG_NEW_IMPL
  printf("applyGtoright %d, Nv %d (level %d), (leaf %d, AF %d)\n",node->first_leaf_index,node->Nv,node->level,node->right->leaf,node->right->all_frozen);
#endif
  
  if (node->right->all_frozen == 0) {  
#if defined(__AVX2__) 
    int avx2mod = (node->Nv/2)&15;
    if (avx2mod == 0) {
      int avx2len = node->Nv/2/16;
      
      for (int i=0;i<avx2len;i++) {
	((__m256i *)alpha_r)[i] = 
	  _mm256_subs_epi16(((__m256i *)alpha_v)[i+avx2len],
			    _mm256_sign_epi16(((__m256i *)alpha_v)[i],
					      ((__m256i *)betal)[i]));	
      }
    }
    else if (avx2mod == 8) {
      ((__m128i *)alpha_r)[0] = _mm_subs_epi16(((__m128i *)alpha_v)[1],_mm_sign_epi16(((__m128i *)alpha_v)[0],((__m128i *)betal)[0]));	
    }
394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418
    else if (avx2mod == 4) {
      ((__m64 *)alpha_r)[0] = _mm_subs_pi16(((__m64 *)alpha_v)[1],_mm_sign_pi16(((__m64 *)alpha_v)[0],((__m64 *)betal)[0]));	
    }
    else
#else
    int sse4mod = (node->Nv/2)&7;

    if (sse4mod == 0) {
      int sse4len = node->Nv/2/8;
      
      for (int i=0;i<sse4len;i++) {
#if defined(__arm__) || defined(__aarch64__)
	((int16x8_t *)alpha_r)[0] = vsubq_s16(((int16x8_t *)alpha_v)[1],vmulq_epi16(((int16x8_t *)alpha_v)[0],((int16x8_t *)betal)[0]));
#else
	((__m128i *)alpha_r)[0] = _mm_subs_epi16(((__m128i *)alpha_v)[1],_mm_sign_epi16(((__m128i *)alpha_v)[0],((__m128i *)betal)[0]));
#endif	
      }
    }
    else if (sse4mod == 4) {
#if defined(__arm__) || defined(__aarch64__)
      ((int16x4_t *)alpha_r)[0] = vsub_s16(((int16x4_t *)alpha_v)[1],vmul_epi16(((int16x4_t *)alpha_v)[0],((int16x4_t *)betal)[0]));
#else
      ((__m64 *)alpha_r)[0] = _mm_subs_pi16(((__m64 *)alpha_v)[1],_mm_sign_pi16(((__64 *)alpha_v)[0],((__m64 *)betal)[0]));	
#endif
    }
419 420
    else 
#endif
421
      {// equivalent scalar code to above, activated only on non x86/ARM architectures or Nv=1,2
422 423 424 425 426 427
	for (int i=0;i<node->Nv/2;i++) {
	  alpha_r[i] = alpha_v[i+(node->Nv/2)] - (betal[i]*alpha_v[i]);
	}
      }
    if (node->Nv == 2) { // apply hard decision on right node
      betar[0] = (alpha_r[0]>0) ? -1 : 1;
428
      pp->nr_polar_U[node->first_leaf_index+1] = (1+betar[0])>>1;
429
#ifdef DEBUG_NEW_IMPL
430
      printf("Setting bit %d to %d (LLR %d)\n",node->first_leaf_index+1,(betar[0]+1)>>1,alpha_r[0]);
431 432
#endif
    } 
433 434
  }
}
435

436
int16_t all1[16] = {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
Raymond Knopp's avatar
Raymond Knopp committed
437

438
void computeBeta(const t_nrPolar_params *pp,decoder_node_t *node) {
439 440 441 442 443 444 445 446

  int16_t *betav = node->beta;
  int16_t *betal = node->left->beta;
  int16_t *betar = node->right->beta;
#ifdef DEBUG_NEW_IMPL
  printf("Computing beta @ level %d first_leaf_index %d (all_frozen %d)\n",node->level,node->first_leaf_index,node->left->all_frozen);
#endif
  if (node->left->all_frozen==0) { // if left node is not aggregation of frozen bits
Raymond Knopp's avatar
Raymond Knopp committed
447
#if defined(__AVX2__) 
448
    int avx2mod = (node->Nv/2)&15;
449
    register __m256i allones=*((__m256i*)all1);
450 451 452
    if (avx2mod == 0) {
      int avx2len = node->Nv/2/16;
      for (int i=0;i<avx2len;i++) {
453 454
	((__m256i*)betav)[i] = _mm256_or_si256(_mm256_cmpeq_epi16(((__m256i*)betar)[i],
								  ((__m256i*)betal)[i]),allones);
455
      }
456
    }
457
    else if (avx2mod == 8) {
458 459
      ((__m128i*)betav)[0] = _mm_or_si128(_mm_cmpeq_epi16(((__m128i*)betar)[0],
							  ((__m128i*)betal)[0]),*((__m128i*)all1));
460 461
    }
    else if (avx2mod == 4) {
462 463 464 465 466 467 468 469 470 471 472
      ((__m64*)betav)[0] = _mm_or_si64(_mm_cmpeq_pi16(((__m64*)betar)[0],
						      ((__m64*)betal)[0]),*((__m64*)all1));
    }
    else
#else
    int avx2mod = (node->Nv/2)&15;

    if (ssr4mod == 0) {
      int ssr4len = node->Nv/2/8;
      register __m128i allones=*((__m128i*)all1);
      for (int i=0;i<sse4len;i++) {
473
      ((__m256i*)betav)[i] = _mm_or_si128(_mm_cmpeq_epi16(((__m128i*)betar)[i], ((__m128i*)betal)[i]),allones);
474 475 476
      }
    }
    else if (sse4mod == 4) {
477
      ((__m64*)betav)[0] = _mm_or_si64(_mm_cmpeq_pi16(((__m64*)betar)[0], ((__m64*)betal)[0]),*((__m64*)all1));
478 479
    }
    else
Raymond Knopp's avatar
Raymond Knopp committed
480
#endif
481 482
      {
	for (int i=0;i<node->Nv/2;i++) {
483
		betav[i] = (betal[i] != betar[i]) ? 1 : -1;
484 485
	}
      }
486 487 488 489 490
  }
  else memcpy((void*)&betav[0],betar,(node->Nv/2)*sizeof(int16_t));
  memcpy((void*)&betav[node->Nv/2],betar,(node->Nv/2)*sizeof(int16_t));
}

laurent's avatar
laurent committed
491
void generic_polar_decoder(const t_nrPolar_params *pp,decoder_node_t *node) {
492 493 494


  // Apply F to left
495
  applyFtoleft(pp, node);
496
  // if left is not a leaf recurse down to the left
497
  if (node->left->leaf==0)
498
    generic_polar_decoder(pp, node->left);
499

500 501
  applyGtoright(pp, node);
  if (node->right->leaf==0) generic_polar_decoder(pp, node->right);
502

503
  computeBeta(pp, node);
504 505

} 
506