Commit 1cb484f1 authored by frtabu's avatar frtabu

fix more trivial cppcheck errors and warnings

parent d171e18c
...@@ -164,9 +164,10 @@ int processoption(paramdef_t *cfgoptions, char *value) { ...@@ -164,9 +164,10 @@ int processoption(paramdef_t *cfgoptions, char *value) {
*/ */
int config_check_unknown_cmdlineopt(char *prefix) { int config_check_unknown_cmdlineopt(char *prefix) {
int unknowndetected=0; int unknowndetected=0;
char testprefix[CONFIG_MAXOPTLENGTH]=""; char testprefix[CONFIG_MAXOPTLENGTH];
int finalcheck = 0; int finalcheck = 0;
memset(testpref,0,sizeof(testprefix));
if (prefix != NULL) { if (prefix != NULL) {
if (strcmp(prefix,CONFIG_CHECKALLSECTIONS) == 0) if (strcmp(prefix,CONFIG_CHECKALLSECTIONS) == 0)
finalcheck = 1; finalcheck = 1;
......
...@@ -270,6 +270,7 @@ int pnf_param_resp_cb(nfapi_vnf_config_t* config, int p5_idx, nfapi_pnf_param_re ...@@ -270,6 +270,7 @@ int pnf_param_resp_cb(nfapi_vnf_config_t* config, int p5_idx, nfapi_pnf_param_re
for(int i = 0; i < resp->pnf_phy.number_of_phys; ++i) for(int i = 0; i < resp->pnf_phy.number_of_phys; ++i)
{ {
phy_info phy; phy_info phy;
memset(phy,0,sizeof(phy));
phy.index = resp->pnf_phy.phy[i].phy_config_index; phy.index = resp->pnf_phy.phy[i].phy_config_index;
printf("[VNF] (PHY:%d) phy_config_idx:%d\n", i, resp->pnf_phy.phy[i].phy_config_index); printf("[VNF] (PHY:%d) phy_config_idx:%d\n", i, resp->pnf_phy.phy[i].phy_config_index);
...@@ -287,6 +288,7 @@ int pnf_param_resp_cb(nfapi_vnf_config_t* config, int p5_idx, nfapi_pnf_param_re ...@@ -287,6 +288,7 @@ int pnf_param_resp_cb(nfapi_vnf_config_t* config, int p5_idx, nfapi_pnf_param_re
for(int i = 0; i < resp->pnf_rf.number_of_rfs; ++i) { for(int i = 0; i < resp->pnf_rf.number_of_rfs; ++i) {
rf_info rf; rf_info rf;
memset(rf,0,sizeof(rf));
rf.index = resp->pnf_rf.rf[i].rf_config_index; rf.index = resp->pnf_rf.rf[i].rf_config_index;
printf("[VNF] (RF:%d) rf_config_idx:%d\n", i, resp->pnf_rf.rf[i].rf_config_index); printf("[VNF] (RF:%d) rf_config_idx:%d\n", i, resp->pnf_rf.rf[i].rf_config_index);
...@@ -897,7 +899,7 @@ int param_resp_cb(nfapi_vnf_config_t* config, int p5_idx, nfapi_param_response_t ...@@ -897,7 +899,7 @@ int param_resp_cb(nfapi_vnf_config_t* config, int p5_idx, nfapi_param_response_t
// for now just 1 // for now just 1
printf("[VNF] %d.%d pnf p7 %s:%d timing %d %d %d %d\n", p5_idx, phy->id, phy->remote_addr, phy->remote_port, p7_vnf->timing_window, p7_vnf->periodic_timing_period, p7_vnf->aperiodic_timing_enabled, p7_vnf->periodic_timing_period); printf("[VNF] %d.%d pnf p7 %s:%d timing %u %u %u %u\n", p5_idx, phy->id, phy->remote_addr, phy->remote_port, p7_vnf->timing_window, p7_vnf->periodic_timing_period, p7_vnf->aperiodic_timing_enabled, p7_vnf->periodic_timing_period);
req->header.message_id = NFAPI_CONFIG_REQUEST; req->header.message_id = NFAPI_CONFIG_REQUEST;
req->header.phy_id = phy->id; req->header.phy_id = phy->id;
...@@ -919,7 +921,7 @@ int param_resp_cb(nfapi_vnf_config_t* config, int p5_idx, nfapi_param_response_t ...@@ -919,7 +921,7 @@ int param_resp_cb(nfapi_vnf_config_t* config, int p5_idx, nfapi_param_response_t
req->nfapi_config.timing_window.tl.tag = NFAPI_NFAPI_TIMING_WINDOW_TAG; req->nfapi_config.timing_window.tl.tag = NFAPI_NFAPI_TIMING_WINDOW_TAG;
req->nfapi_config.timing_window.value = p7_vnf->timing_window; req->nfapi_config.timing_window.value = p7_vnf->timing_window;
printf("[VNF] Timing window:%d\n", p7_vnf->timing_window); printf("[VNF] Timing window:%u\n", p7_vnf->timing_window);
req->num_tlv++; req->num_tlv++;
if(p7_vnf->periodic_timing_enabled || p7_vnf->aperiodic_timing_enabled) { if(p7_vnf->periodic_timing_enabled || p7_vnf->aperiodic_timing_enabled) {
......
...@@ -26,9 +26,9 @@ ...@@ -26,9 +26,9 @@
date: 09.2012 date: 09.2012
*/ */
#ifndef TC_MAIN #ifndef TC_MAIN
#include "coding_defs.h" #include "coding_defs.h"
#else #else
#include <stdint.h> #include <stdint.h>
#endif #endif
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
...@@ -66,11 +66,11 @@ struct treillis { ...@@ -66,11 +66,11 @@ struct treillis {
union { union {
uint8x8_t systematic_andp1_64[3]; uint8x8_t systematic_andp1_64[3];
char systematic_andp1_8[24]; char systematic_andp1_8[24];
}__attribute__((aligned(64))); } __attribute__((aligned(64)));
union { union {
uint8x8_t parity2_64[3]; uint8x8_t parity2_64[3];
char parity2_8[24]; char parity2_8[24];
}__attribute__((aligned(64))); } __attribute__((aligned(64)));
int exit_state; int exit_state;
}; };
#endif #endif
...@@ -79,23 +79,20 @@ struct treillis all_treillis[8][256]; ...@@ -79,23 +79,20 @@ struct treillis all_treillis[8][256];
int all_treillis_initialized=0; int all_treillis_initialized=0;
static inline unsigned char threegpplte_rsc(unsigned char input,unsigned char *state) static inline unsigned char threegpplte_rsc(unsigned char input,unsigned char *state) {
{
unsigned char output; unsigned char output;
output = (input ^ (*state>>2) ^ (*state>>1))&1; output = (input ^ (*state>>2) ^ (*state>>1))&1;
*state = (((input<<2)^(*state>>1))^((*state>>1)<<2)^((*state)<<2))&7; *state = (((input<<2)^(*state>>1))^((*state>>1)<<2)^((*state)<<2))&7;
return(output); return(output);
} }
static inline void threegpplte_rsc_termination(unsigned char *x,unsigned char *z,unsigned char *state) static inline void threegpplte_rsc_termination(unsigned char *x,unsigned char *z,unsigned char *state) {
{
*z = ((*state>>2) ^ (*state)) &1; *z = ((*state>>2) ^ (*state)) &1;
*x = ((*state) ^ (*state>>1)) &1; *x = ((*state) ^ (*state>>1)) &1;
*state = (*state)>>1; *state = (*state)>>1;
} }
static void treillis_table_init(void) static void treillis_table_init(void) {
{
//struct treillis t[][]=all_treillis; //struct treillis t[][]=all_treillis;
//t=memalign(16,sizeof(struct treillis)*8*256); //t=memalign(16,sizeof(struct treillis)*8*256);
int i, j,b; int i, j,b;
...@@ -114,8 +111,8 @@ static void treillis_table_init(void) ...@@ -114,8 +111,8 @@ static void treillis_table_init(void)
all_treillis[i][j].systematic_andp1_8[b*3]= (j&(1<<(7-b)))>>(7-b); all_treillis[i][j].systematic_andp1_8[b*3]= (j&(1<<(7-b)))>>(7-b);
v=threegpplte_rsc( all_treillis[i][j].systematic_andp1_8[b*3] , v=threegpplte_rsc( all_treillis[i][j].systematic_andp1_8[b*3] ,
&current_state); &current_state);
all_treillis[i][j].systematic_andp1_8[b*3+1]=v; // for the yparity1 all_treillis[i][j].systematic_andp1_8[b*3+1]=v; // for the yparity1
// all_treillis[i][j].parity1_8[b*3+1]=v; // for the yparity1 // all_treillis[i][j].parity1_8[b*3+1]=v; // for the yparity1
all_treillis[i][j].parity2_8[b*3+2]=v; // for the yparity2 all_treillis[i][j].parity2_8[b*3+2]=v; // for the yparity2
} }
...@@ -128,14 +125,12 @@ static void treillis_table_init(void) ...@@ -128,14 +125,12 @@ static void treillis_table_init(void)
} }
char interleave_compact_byte(short * base_interleaver,unsigned char * input, unsigned char * output, int n) char interleave_compact_byte(short *base_interleaver,unsigned char *input, unsigned char *output, int n) {
{
char expandInput[768*8] __attribute__((aligned(32))); char expandInput[768*8] __attribute__((aligned(32)));
int i,loop=n>>4; int i,loop=n>>4;
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
#ifndef __AVX2__ #ifndef __AVX2__
__m128i *i_128=(__m128i *)input, *o_128=(__m128i*)expandInput; __m128i *i_128=(__m128i *)input, *o_128=(__m128i *)expandInput;
__m128i tmp1, tmp2, tmp3, tmp4; __m128i tmp1, tmp2, tmp3, tmp4;
__m128i BIT_MASK = _mm_set_epi8( 0b00000001, __m128i BIT_MASK = _mm_set_epi8( 0b00000001,
0b00000010, 0b00000010,
...@@ -153,42 +148,41 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns ...@@ -153,42 +148,41 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
0b00100000, 0b00100000,
0b01000000, 0b01000000,
0b10000000); 0b10000000);
#else #else
__m256i *i_256=(__m256i *)input, *o_256=(__m256i*)expandInput; __m256i *i_256=(__m256i *)input, *o_256=(__m256i *)expandInput;
__m256i tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; __m256i tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
__m256i BIT_MASK = _mm256_set_epi8( 0b00000001, __m256i BIT_MASK = _mm256_set_epi8( 0b00000001,
0b00000010, 0b00000010,
0b00000100, 0b00000100,
0b00001000, 0b00001000,
0b00010000, 0b00010000,
0b00100000, 0b00100000,
0b01000000, 0b01000000,
0b10000000, 0b10000000,
0b00000001, 0b00000001,
0b00000010, 0b00000010,
0b00000100, 0b00000100,
0b00001000, 0b00001000,
0b00010000, 0b00010000,
0b00100000, 0b00100000,
0b01000000, 0b01000000,
0b10000000, 0b10000000,
0b00000001, 0b00000001,
0b00000010, 0b00000010,
0b00000100, 0b00000100,
0b00001000, 0b00001000,
0b00010000, 0b00010000,
0b00100000, 0b00100000,
0b01000000, 0b01000000,
0b10000000, 0b10000000,
0b00000001, 0b00000001,
0b00000010, 0b00000010,
0b00000100, 0b00000100,
0b00001000, 0b00001000,
0b00010000, 0b00010000,
0b00100000, 0b00100000,
0b01000000, 0b01000000,
0b10000000); 0b10000000);
#endif #endif
#elif defined(__arm__) #elif defined(__arm__)
uint8x16_t *i_128=(uint8x16_t *)input, *o_128=(uint8x16_t *)expandInput; uint8x16_t *i_128=(uint8x16_t *)input, *o_128=(uint8x16_t *)expandInput;
...@@ -196,40 +190,41 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns ...@@ -196,40 +190,41 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
uint16x8_t tmp3; uint16x8_t tmp3;
uint32x4_t tmp4; uint32x4_t tmp4;
uint8x16_t and_tmp; uint8x16_t and_tmp;
uint8x16_t BIT_MASK = { 0b10000000, uint8x16_t BIT_MASK = { 0b10000000,
0b01000000, 0b01000000,
0b00100000, 0b00100000,
0b00010000, 0b00010000,
0b00001000, 0b00001000,
0b00000100, 0b00000100,
0b00000010, 0b00000010,
0b00000001, 0b00000001,
0b10000000, 0b10000000,
0b01000000, 0b01000000,
0b00100000, 0b00100000,
0b00010000, 0b00010000,
0b00001000, 0b00001000,
0b00000100, 0b00000100,
0b00000010, 0b00000010,
0b00000001}; 0b00000001
};
#endif #endif
#ifndef __AVX2__ #ifndef __AVX2__
if ((n&15) > 0) if ((n&15) > 0)
loop++; loop++;
#else #else
loop=n>>5; loop=n>>5;
if ((n&31) > 0) if ((n&31) > 0)
loop++; loop++;
#endif
#endif
for (i=0; i<loop ; i++ ) { for (i=0; i<loop ; i++ ) {
// int cur_byte=i<<3; // int cur_byte=i<<3;
// for (b=0;b<8;b++) // for (b=0;b<8;b++)
// expandInput[cur_byte+b] = (input[i]&(1<<(7-b)))>>(7-b); // expandInput[cur_byte+b] = (input[i]&(1<<(7-b)))>>(7-b);
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
#ifndef __AVX2__ #ifndef __AVX2__
tmp1=_mm_load_si128(i_128++); // tmp1 = B0,B1,...,B15 tmp1=_mm_load_si128(i_128++); // tmp1 = B0,B1,...,B15
...@@ -237,29 +232,22 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns ...@@ -237,29 +232,22 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
tmp3=_mm_unpacklo_epi16(tmp2,tmp2); // tmp3 = B0,B0,B0,B0,B1,B1,B1,B1,B2,B2,B2,B2,B3,B3,B3,B3 tmp3=_mm_unpacklo_epi16(tmp2,tmp2); // tmp3 = B0,B0,B0,B0,B1,B1,B1,B1,B2,B2,B2,B2,B3,B3,B3,B3
tmp4=_mm_unpacklo_epi32(tmp3,tmp3); // tmp4 - B0,B0,B0,B0,B0,B0,B0,B0,B1,B1,B1,B1,B1,B1,B1,B1 tmp4=_mm_unpacklo_epi32(tmp3,tmp3); // tmp4 - B0,B0,B0,B0,B0,B0,B0,B0,B1,B1,B1,B1,B1,B1,B1,B1
*o_128++=_mm_cmpeq_epi8(_mm_and_si128(tmp4,BIT_MASK),BIT_MASK); *o_128++=_mm_cmpeq_epi8(_mm_and_si128(tmp4,BIT_MASK),BIT_MASK);
tmp4=_mm_unpackhi_epi32(tmp3,tmp3); // tmp4 - B2,B2,B2,B2,B2,B2,B2,B2,B3,B3,B3,B3,B3,B3,B3,B3 tmp4=_mm_unpackhi_epi32(tmp3,tmp3); // tmp4 - B2,B2,B2,B2,B2,B2,B2,B2,B3,B3,B3,B3,B3,B3,B3,B3
*o_128++=_mm_cmpeq_epi8(_mm_and_si128(tmp4,BIT_MASK),BIT_MASK);; *o_128++=_mm_cmpeq_epi8(_mm_and_si128(tmp4,BIT_MASK),BIT_MASK);;
tmp3=_mm_unpackhi_epi16(tmp2,tmp2); // tmp3 = B4,B4,B4,B4,B5,B5,B5,B5,B6,B6,B6,B6,B7,B7,B7,B7 tmp3=_mm_unpackhi_epi16(tmp2,tmp2); // tmp3 = B4,B4,B4,B4,B5,B5,B5,B5,B6,B6,B6,B6,B7,B7,B7,B7
tmp4=_mm_unpacklo_epi32(tmp3,tmp3); // tmp4 - B4,B4,B4,B4,B4,B4,B4,B4,B5,B5,B5,B5,B5,B5,B5,B5 tmp4=_mm_unpacklo_epi32(tmp3,tmp3); // tmp4 - B4,B4,B4,B4,B4,B4,B4,B4,B5,B5,B5,B5,B5,B5,B5,B5
*o_128++=_mm_cmpeq_epi8(_mm_and_si128(tmp4,BIT_MASK),BIT_MASK);; *o_128++=_mm_cmpeq_epi8(_mm_and_si128(tmp4,BIT_MASK),BIT_MASK);;
tmp4=_mm_unpackhi_epi32(tmp3,tmp3); // tmp4 - B6,B6,B6,B6,B6,B6,B6,B6,B7,B7,B7,B7,B7,B7,B7,B7 tmp4=_mm_unpackhi_epi32(tmp3,tmp3); // tmp4 - B6,B6,B6,B6,B6,B6,B6,B6,B7,B7,B7,B7,B7,B7,B7,B7
*o_128++=_mm_cmpeq_epi8(_mm_and_si128(tmp4,BIT_MASK),BIT_MASK);; *o_128++=_mm_cmpeq_epi8(_mm_and_si128(tmp4,BIT_MASK),BIT_MASK);;
tmp2=_mm_unpackhi_epi8(tmp1,tmp1); // tmp2 = B8,B8,B9,B9,...,B15,B15 tmp2=_mm_unpackhi_epi8(tmp1,tmp1); // tmp2 = B8,B8,B9,B9,...,B15,B15
tmp3=_mm_unpacklo_epi16(tmp2,tmp2); // tmp3 = B8,B8,B8,B8,B9,B9,B9,B9,B10,B10,B10,B10,B11,B11,B11,B11 tmp3=_mm_unpacklo_epi16(tmp2,tmp2); // tmp3 = B8,B8,B8,B8,B9,B9,B9,B9,B10,B10,B10,B10,B11,B11,B11,B11
tmp4=_mm_unpacklo_epi32(tmp3,tmp3); // tmp4 = B8,B8,B8,B8,B8,B8,B8,B8,B9,B9,B9,B9,B9,B9,B9,B9 tmp4=_mm_unpacklo_epi32(tmp3,tmp3); // tmp4 = B8,B8,B8,B8,B8,B8,B8,B8,B9,B9,B9,B9,B9,B9,B9,B9
*o_128++=_mm_cmpeq_epi8(_mm_and_si128(tmp4,BIT_MASK),BIT_MASK);; *o_128++=_mm_cmpeq_epi8(_mm_and_si128(tmp4,BIT_MASK),BIT_MASK);;
tmp4=_mm_unpackhi_epi32(tmp3,tmp3); // tmp4 = B10,B10,B10,B10,B10,B10,B10,B10,B11,B11,B11,B11,B11,B11,B11,B11 tmp4=_mm_unpackhi_epi32(tmp3,tmp3); // tmp4 = B10,B10,B10,B10,B10,B10,B10,B10,B11,B11,B11,B11,B11,B11,B11,B11
*o_128++=_mm_cmpeq_epi8(_mm_and_si128(tmp4,BIT_MASK),BIT_MASK);; *o_128++=_mm_cmpeq_epi8(_mm_and_si128(tmp4,BIT_MASK),BIT_MASK);;
tmp3=_mm_unpackhi_epi16(tmp2,tmp2); // tmp3 = B12,B12,B12,B12,B13,B13,B13,B13,B14,B14,B14,B14,B15,B15,B15,B15 tmp3=_mm_unpackhi_epi16(tmp2,tmp2); // tmp3 = B12,B12,B12,B12,B13,B13,B13,B13,B14,B14,B14,B14,B15,B15,B15,B15
tmp4=_mm_unpacklo_epi32(tmp3,tmp3); // tmp4 = B12,B12,B12,B12,B12,B12,B12,B12,B13,B13,B13,B13,B13,B13,B13,B13 tmp4=_mm_unpacklo_epi32(tmp3,tmp3); // tmp4 = B12,B12,B12,B12,B12,B12,B12,B12,B13,B13,B13,B13,B13,B13,B13,B13
*o_128++=_mm_cmpeq_epi8(_mm_and_si128(tmp4,BIT_MASK),BIT_MASK);; *o_128++=_mm_cmpeq_epi8(_mm_and_si128(tmp4,BIT_MASK),BIT_MASK);;
tmp4=_mm_unpackhi_epi32(tmp3,tmp3); // tmp4 = B14,B14,B14,B14,B14,B14,B14,B14,B15,B15,B15,B15,B15,B15,B15,B15 tmp4=_mm_unpackhi_epi32(tmp3,tmp3); // tmp4 = B14,B14,B14,B14,B14,B14,B14,B14,B15,B15,B15,B15,B15,B15,B15,B15
*o_128++=_mm_cmpeq_epi8(_mm_and_si128(tmp4,BIT_MASK),BIT_MASK);; *o_128++=_mm_cmpeq_epi8(_mm_and_si128(tmp4,BIT_MASK),BIT_MASK);;
#else #else
...@@ -281,7 +269,6 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns ...@@ -281,7 +269,6 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
//print_bytes2("out",(uint8_t*)o_256); //print_bytes2("out",(uint8_t*)o_256);
o_256[4]=_mm256_cmpeq_epi8(_mm256_and_si256(tmp7,BIT_MASK),BIT_MASK);; o_256[4]=_mm256_cmpeq_epi8(_mm256_and_si256(tmp7,BIT_MASK),BIT_MASK);;
//print_bytes2("out",(uint8_t*)(o_256+4)); //print_bytes2("out",(uint8_t*)(o_256+4));
tmp3=_mm256_unpackhi_epi16(tmp2,tmp2); // tmp3 = B4,B4,B4,B4,B5,B5,B5,B5,B6,B6,B6,B6,B7,B7,B7,B7,B20,B20,B20,B20,...,B23,B23,B23,B23 tmp3=_mm256_unpackhi_epi16(tmp2,tmp2); // tmp3 = B4,B4,B4,B4,B5,B5,B5,B5,B6,B6,B6,B6,B7,B7,B7,B7,B20,B20,B20,B20,...,B23,B23,B23,B23
tmp4=_mm256_unpacklo_epi32(tmp3,tmp3); // tmp4 - B4,B4,B4,B4,B4,B4,B4,B4,B5,B5,B5,B5,B5,B5,B5,B5,B20,B20...,B21..,B21 tmp4=_mm256_unpacklo_epi32(tmp3,tmp3); // tmp4 - B4,B4,B4,B4,B4,B4,B4,B4,B5,B5,B5,B5,B5,B5,B5,B5,B20,B20...,B21..,B21
tmp5=_mm256_unpackhi_epi32(tmp3,tmp3); // tmp5 - B6,B6,B6,B6,B6,B6,B6,B6,B7,B7,B7,B7,B7,B7,B7,B7,B22...,B22,B23,...,B23 tmp5=_mm256_unpackhi_epi32(tmp3,tmp3); // tmp5 - B6,B6,B6,B6,B6,B6,B6,B6,B7,B7,B7,B7,B7,B7,B7,B7,B22...,B22,B23,...,B23
...@@ -297,7 +284,6 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns ...@@ -297,7 +284,6 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
//print_bytes2("out",(uint8_t*)(o_256+1)); //print_bytes2("out",(uint8_t*)(o_256+1));
o_256[5]=_mm256_cmpeq_epi8(_mm256_and_si256(tmp7,BIT_MASK),BIT_MASK);; o_256[5]=_mm256_cmpeq_epi8(_mm256_and_si256(tmp7,BIT_MASK),BIT_MASK);;
//print_bytes2("out",(uint8_t*)(o_256+4)); //print_bytes2("out",(uint8_t*)(o_256+4));
tmp2=_mm256_unpackhi_epi8(tmp1,tmp1); // tmp2 = B8 B9 B10 B11 B12 B13 B14 B15 B25 B26 B27 B28 B29 B30 B31 tmp2=_mm256_unpackhi_epi8(tmp1,tmp1); // tmp2 = B8 B9 B10 B11 B12 B13 B14 B15 B25 B26 B27 B28 B29 B30 B31
tmp3=_mm256_unpacklo_epi16(tmp2,tmp2); // tmp3 = B8,B9,B10,B11,B26,B27,B28,B29 tmp3=_mm256_unpacklo_epi16(tmp2,tmp2); // tmp3 = B8,B9,B10,B11,B26,B27,B28,B29
tmp4=_mm256_unpacklo_epi32(tmp3,tmp3); // tmp4 - B8,B9,B26,B27 tmp4=_mm256_unpacklo_epi32(tmp3,tmp3); // tmp4 - B8,B9,B26,B27
...@@ -314,11 +300,10 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns ...@@ -314,11 +300,10 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
//print_bytes2("out",(uint8_t*)(o_256+2)); //print_bytes2("out",(uint8_t*)(o_256+2));
o_256[6]=_mm256_cmpeq_epi8(_mm256_and_si256(tmp7,BIT_MASK),BIT_MASK);; o_256[6]=_mm256_cmpeq_epi8(_mm256_and_si256(tmp7,BIT_MASK),BIT_MASK);;
//print_bytes2("out",(uint8_t*)(o_256+4)); //print_bytes2("out",(uint8_t*)(o_256+4));
tmp3=_mm256_unpackhi_epi16(tmp2,tmp2); // tmp3 = B12 B13 B14 B15 B28 B29 B30 B31 tmp3=_mm256_unpackhi_epi16(tmp2,tmp2); // tmp3 = B12 B13 B14 B15 B28 B29 B30 B31
tmp4=_mm256_unpacklo_epi32(tmp3,tmp3); // tmp4 = B12 B13 B28 B29 tmp4=_mm256_unpacklo_epi32(tmp3,tmp3); // tmp4 = B12 B13 B28 B29
tmp5=_mm256_unpackhi_epi32(tmp3,tmp3); // tmp5 = B14 B15 B30 B31 tmp5=_mm256_unpackhi_epi32(tmp3,tmp3); // tmp5 = B14 B15 B30 B31
tmp6=_mm256_insertf128_si256(tmp4,_mm256_extracti128_si256(tmp5,0),1); // tmp6 = B12 B13 B14 B15 tmp6=_mm256_insertf128_si256(tmp4,_mm256_extracti128_si256(tmp5,0),1); // tmp6 = B12 B13 B14 B15
tmp7=_mm256_insertf128_si256(tmp5,_mm256_extracti128_si256(tmp4,1),0); // tmp7 = B28 B29 B30 B31 tmp7=_mm256_insertf128_si256(tmp5,_mm256_extracti128_si256(tmp4,1),0); // tmp7 = B28 B29 B30 B31
//print_bytes2("tmp2",(uint8_t*)&tmp2); //print_bytes2("tmp2",(uint8_t*)&tmp2);
//print_bytes2("tmp3",(uint8_t*)&tmp3); //print_bytes2("tmp3",(uint8_t*)&tmp3);
...@@ -330,48 +315,35 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns ...@@ -330,48 +315,35 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
//print_bytes2("out",(uint8_t*)(o_256+3)); //print_bytes2("out",(uint8_t*)(o_256+3));
o_256[7]=_mm256_cmpeq_epi8(_mm256_and_si256(tmp7,BIT_MASK),BIT_MASK);; o_256[7]=_mm256_cmpeq_epi8(_mm256_and_si256(tmp7,BIT_MASK),BIT_MASK);;
//print_bytes2("out",(uint8_t*)(o_256+7)); //print_bytes2("out",(uint8_t*)(o_256+7));
o_256+=8; o_256+=8;
#endif #endif
#elif defined(__arm__) #elif defined(__arm__)
tmp1=vld1q_u8((uint8_t*)i_128); tmp1=vld1q_u8((uint8_t *)i_128);
//print_bytes("tmp1:",(uint8_t*)&tmp1); //print_bytes("tmp1:",(uint8_t*)&tmp1);
uint8x16x2_t temp1 = vzipq_u8(tmp1,tmp1); uint8x16x2_t temp1 = vzipq_u8(tmp1,tmp1);
tmp2 = temp1.val[0]; tmp2 = temp1.val[0];
uint16x8x2_t temp2 = vzipq_u16((uint16x8_t)tmp2,(uint16x8_t)tmp2); uint16x8x2_t temp2 = vzipq_u16((uint16x8_t)tmp2,(uint16x8_t)tmp2);
tmp3 = temp2.val[0]; tmp3 = temp2.val[0];
uint32x4x2_t temp3 = vzipq_u32((uint32x4_t)tmp3,(uint32x4_t)tmp3); uint32x4x2_t temp3 = vzipq_u32((uint32x4_t)tmp3,(uint32x4_t)tmp3);
tmp4 = temp3.val[0]; tmp4 = temp3.val[0];
//print_bytes("tmp4:",(uint8_t*)&tmp4); //print_bytes("tmp4:",(uint8_t*)&tmp4);
*o_128++=vceqq_u8(vandq_u8((uint8x16_t)tmp4,BIT_MASK),BIT_MASK); //1 *o_128++=vceqq_u8(vandq_u8((uint8x16_t)tmp4,BIT_MASK),BIT_MASK); //1
//print_bytes("o:",(uint8_t*)(o_128-1)); //print_bytes("o:",(uint8_t*)(o_128-1));
tmp4 = temp3.val[1]; tmp4 = temp3.val[1];
//print_bytes("tmp4:",(uint8_t*)&tmp4); //print_bytes("tmp4:",(uint8_t*)&tmp4);
*o_128++=vceqq_u8(vandq_u8((uint8x16_t)tmp4,BIT_MASK),BIT_MASK); //2 *o_128++=vceqq_u8(vandq_u8((uint8x16_t)tmp4,BIT_MASK),BIT_MASK); //2
//print_bytes("o:",(uint8_t*)(o_128-1)); //print_bytes("o:",(uint8_t*)(o_128-1));
tmp3 = temp2.val[1]; tmp3 = temp2.val[1];
temp3 = vzipq_u32((uint32x4_t)tmp3,(uint32x4_t)tmp3); temp3 = vzipq_u32((uint32x4_t)tmp3,(uint32x4_t)tmp3);
tmp4 = temp3.val[0]; tmp4 = temp3.val[0];
//print_bytes("tmp4:",(uint8_t*)&tmp4); //print_bytes("tmp4:",(uint8_t*)&tmp4);
*o_128++=vceqq_u8(vandq_u8((uint8x16_t)tmp4,BIT_MASK),BIT_MASK); //3 *o_128++=vceqq_u8(vandq_u8((uint8x16_t)tmp4,BIT_MASK),BIT_MASK); //3
//print_bytes("o:",(uint8_t*)(o_128-1)); //print_bytes("o:",(uint8_t*)(o_128-1));
tmp4 = temp3.val[1]; tmp4 = temp3.val[1];
//print_bytes("tmp4:",(uint8_t*)&tmp4); //print_bytes("tmp4:",(uint8_t*)&tmp4);
*o_128++=vceqq_u8(vandq_u8((uint8x16_t)tmp4,BIT_MASK),BIT_MASK); //4 *o_128++=vceqq_u8(vandq_u8((uint8x16_t)tmp4,BIT_MASK),BIT_MASK); //4
//and_tmp = vandq_u8((uint8x16_t)tmp4,BIT_MASK); print_bytes("and:",and_tmp); //and_tmp = vandq_u8((uint8x16_t)tmp4,BIT_MASK); print_bytes("and:",and_tmp);
//print_bytes("o:",(uint8_t*)(o_128-1)); //print_bytes("o:",(uint8_t*)(o_128-1));
temp1 = vzipq_u8(tmp1,tmp1); temp1 = vzipq_u8(tmp1,tmp1);
tmp2 = temp1.val[1]; tmp2 = temp1.val[1];
temp2 = vzipq_u16((uint16x8_t)tmp2,(uint16x8_t)tmp2); temp2 = vzipq_u16((uint16x8_t)tmp2,(uint16x8_t)tmp2);
...@@ -379,52 +351,41 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns ...@@ -379,52 +351,41 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
temp3 = vzipq_u32((uint32x4_t)tmp3,(uint32x4_t)tmp3); temp3 = vzipq_u32((uint32x4_t)tmp3,(uint32x4_t)tmp3);
tmp4 = temp3.val[0]; tmp4 = temp3.val[0];
//print_bytes("tmp4:",(uint8_t*)&tmp4); //print_bytes("tmp4:",(uint8_t*)&tmp4);
*o_128++=vceqq_u8(vandq_u8((uint8x16_t)tmp4,BIT_MASK),BIT_MASK); //5 *o_128++=vceqq_u8(vandq_u8((uint8x16_t)tmp4,BIT_MASK),BIT_MASK); //5
//print_bytes("o:",(uint8_t*)(o_128-1)); //print_bytes("o:",(uint8_t*)(o_128-1));
tmp4 = temp3.val[1]; tmp4 = temp3.val[1];
//print_bytes("tmp4:",(uint8_t*)&tmp4); //print_bytes("tmp4:",(uint8_t*)&tmp4);
*o_128++=vceqq_u8(vandq_u8((uint8x16_t)tmp4,BIT_MASK),BIT_MASK); //6 *o_128++=vceqq_u8(vandq_u8((uint8x16_t)tmp4,BIT_MASK),BIT_MASK); //6
//print_bytes("o:",(uint8_t*)(o_128-1)); //print_bytes("o:",(uint8_t*)(o_128-1));
temp2 = vzipq_u16((uint16x8_t)tmp2,(uint16x8_t)tmp2); temp2 = vzipq_u16((uint16x8_t)tmp2,(uint16x8_t)tmp2);
tmp3 = temp2.val[1]; tmp3 = temp2.val[1];
temp3 = vzipq_u32((uint32x4_t)tmp3,(uint32x4_t)tmp3); temp3 = vzipq_u32((uint32x4_t)tmp3,(uint32x4_t)tmp3);
tmp4 = temp3.val[0]; tmp4 = temp3.val[0];
//print_bytes("tmp4:",(uint8_t*)&tmp4); //print_bytes("tmp4:",(uint8_t*)&tmp4);
*o_128++=vceqq_u8(vandq_u8((uint8x16_t)tmp4,BIT_MASK),BIT_MASK); //7 *o_128++=vceqq_u8(vandq_u8((uint8x16_t)tmp4,BIT_MASK),BIT_MASK); //7
//print_bytes("o:",(uint8_t*)(o_128-1)); //print_bytes("o:",(uint8_t*)(o_128-1));
tmp4 = temp3.val[1]; tmp4 = temp3.val[1];
//print_bytes("tmp4:",(uint8_t*)&tmp4); //print_bytes("tmp4:",(uint8_t*)&tmp4);
*o_128++=vceqq_u8(vandq_u8((uint8x16_t)tmp4,BIT_MASK),BIT_MASK); //7 *o_128++=vceqq_u8(vandq_u8((uint8x16_t)tmp4,BIT_MASK),BIT_MASK); //7
//print_bytes("o:",(uint8_t*)(o_128-1)); //print_bytes("o:",(uint8_t*)(o_128-1));
i_128++; i_128++;
#endif #endif
} }
short * ptr_intl=base_interleaver; short *ptr_intl=base_interleaver;
#if defined(__x86_64) || defined(__i386__) #if defined(__x86_64) || defined(__i386__)
#ifndef __AVX2__ #ifndef __AVX2__
__m128i tmp; __m128i tmp;
uint16_t *systematic2_ptr=(uint16_t *) output; uint16_t *systematic2_ptr=(uint16_t *) output;
#else #else
__m256i tmp; __m256i tmp;
uint32_t *systematic2_ptr=(uint32_t *) output; uint32_t *systematic2_ptr=(uint32_t *) output;
#endif #endif
#elif defined(__arm__) #elif defined(__arm__)
uint8x16_t tmp; uint8x16_t tmp;
const uint8_t __attribute__ ((aligned (16))) _Powers[16]= const uint8_t __attribute__ ((aligned (16))) _Powers[16]=
{ 1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128 }; { 1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128 };
// Set the powers of 2 (do it once for all, if applicable)
// Set the powers of 2 (do it once for all, if applicable)
uint8x16_t Powers= vld1q_u8(_Powers); uint8x16_t Powers= vld1q_u8(_Powers);
uint8_t *systematic2_ptr=(uint8_t *) output; uint8_t *systematic2_ptr=(uint8_t *) output;
#endif #endif
...@@ -435,8 +396,6 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns ...@@ -435,8 +396,6 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
#endif #endif
for ( i=0; i< input_length_words ; i ++ ) { for ( i=0; i< input_length_words ; i ++ ) {
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
#ifndef __AVX2__ #ifndef __AVX2__
tmp=_mm_insert_epi8(tmp,expandInput[*ptr_intl++],7); tmp=_mm_insert_epi8(tmp,expandInput[*ptr_intl++],7);
...@@ -465,7 +424,6 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns ...@@ -465,7 +424,6 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],2); tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],2);
tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],1); tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],1);
tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],0); tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],0);
tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],8+7); tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],8+7);
tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],8+6); tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],8+6);
tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],8+5); tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],8+5);
...@@ -474,7 +432,6 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns ...@@ -474,7 +432,6 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],8+2); tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],8+2);
tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],8+1); tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],8+1);
tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],8+0); tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],8+0);
tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],16+7); tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],16+7);
tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],16+6); tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],16+6);
tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],16+5); tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],16+5);
...@@ -483,7 +440,6 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns ...@@ -483,7 +440,6 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],16+2); tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],16+2);
tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],16+1); tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],16+1);
tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],16+0); tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],16+0);
tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],24+7); tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],24+7);
tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],24+6); tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],24+6);
tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],24+5); tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],24+5);
...@@ -492,7 +448,6 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns ...@@ -492,7 +448,6 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],24+2); tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],24+2);
tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],24+1); tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],24+1);
tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],24+0); tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],24+0);
*systematic2_ptr++=(unsigned int)_mm256_movemask_epi8(tmp); *systematic2_ptr++=(unsigned int)_mm256_movemask_epi8(tmp);
#endif #endif
#elif defined(__arm__) #elif defined(__arm__)
...@@ -512,11 +467,10 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns ...@@ -512,11 +467,10 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
tmp=vsetq_lane_u8(expandInput[*ptr_intl++],tmp,8+2); tmp=vsetq_lane_u8(expandInput[*ptr_intl++],tmp,8+2);
tmp=vsetq_lane_u8(expandInput[*ptr_intl++],tmp,8+1); tmp=vsetq_lane_u8(expandInput[*ptr_intl++],tmp,8+1);
tmp=vsetq_lane_u8(expandInput[*ptr_intl++],tmp,8+0); tmp=vsetq_lane_u8(expandInput[*ptr_intl++],tmp,8+0);
// Compute the mask from the input // Compute the mask from the input
uint64x2_t Mask= vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(vandq_u8(tmp, Powers)))); uint64x2_t Mask= vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(vandq_u8(tmp, Powers))));
vst1q_lane_u8(systematic2_ptr++, (uint8x16_t)Mask, 0); vst1q_lane_u8(systematic2_ptr++, (uint8x16_t)Mask, 0);
vst1q_lane_u8(systematic2_ptr++, (uint8x16_t)Mask, 8); vst1q_lane_u8(systematic2_ptr++, (uint8x16_t)Mask, 8);
#endif #endif
} }
...@@ -537,14 +491,12 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns ...@@ -537,14 +491,12 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns
void threegpplte_turbo_encoder_sse(unsigned char *input, void threegpplte_turbo_encoder_sse(unsigned char *input,
unsigned short input_length_bytes, unsigned short input_length_bytes,
unsigned char *output, unsigned char *output,
unsigned char F) unsigned char F) {
{
int i; int i;
unsigned char *x; unsigned char *x;
unsigned char state0=0,state1=0; unsigned char state0=0,state1=0;
unsigned short input_length_bits = input_length_bytes<<3; unsigned short input_length_bits = input_length_bytes<<3;
short * base_interleaver; short *base_interleaver;
if ( all_treillis_initialized == 0 ) { if ( all_treillis_initialized == 0 ) {
treillis_table_init(); treillis_table_init();
...@@ -560,15 +512,12 @@ void threegpplte_turbo_encoder_sse(unsigned char *input, ...@@ -560,15 +512,12 @@ void threegpplte_turbo_encoder_sse(unsigned char *input,
base_interleaver=il_tb+f1f2mat[i].beg_index; base_interleaver=il_tb+f1f2mat[i].beg_index;
} }
unsigned char systematic2[768] __attribute__((aligned(32))); unsigned char systematic2[768] __attribute__((aligned(32)));
interleave_compact_byte(base_interleaver,input,systematic2,input_length_bytes); interleave_compact_byte(base_interleaver,input,systematic2,input_length_bytes);
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
__m64 *ptr_output=(__m64*) output; __m64 *ptr_output=(__m64 *) output;
#elif defined(__arm__) #elif defined(__arm__)
uint8x8_t *ptr_output=(uint8x8_t*)output; uint8x8_t *ptr_output=(uint8x8_t *)output;
#endif #endif
unsigned char cur_s1, cur_s2; unsigned char cur_s1, cur_s2;
int code_rate; int code_rate;
...@@ -582,54 +531,45 @@ void threegpplte_turbo_encoder_sse(unsigned char *input, ...@@ -582,54 +531,45 @@ void threegpplte_turbo_encoder_sse(unsigned char *input,
/* /*
*ptr_output++ = _mm_add_pi8(all_treillis[state0][cur_s1].systematic_64[code_rate], *ptr_output++ = _mm_add_pi8(all_treillis[state0][cur_s1].systematic_64[code_rate],
_mm_add_pi8(all_treillis[state0][cur_s1].parity1_64[code_rate], _mm_add_pi8(all_treillis[state0][cur_s1].parity1_64[code_rate],
all_treillis[state1][cur_s2].parity2_64[code_rate])); all_treillis[state1][cur_s2].parity2_64[code_rate]));
*/ */
*ptr_output++ = _mm_add_pi8(all_treillis[state0][cur_s1].systematic_andp1_64[code_rate], *ptr_output++ = _mm_add_pi8(all_treillis[state0][cur_s1].systematic_andp1_64[code_rate],
all_treillis[state1][cur_s2].parity2_64[code_rate]); all_treillis[state1][cur_s2].parity2_64[code_rate]);
#elif defined(__arm__) #elif defined(__arm__)
*ptr_output++ = vadd_u8(all_treillis[state0][cur_s1].systematic_andp1_64[code_rate], *ptr_output++ = vadd_u8(all_treillis[state0][cur_s1].systematic_andp1_64[code_rate],
all_treillis[state0][cur_s1].parity2_64[code_rate]); all_treillis[state0][cur_s1].parity2_64[code_rate]);
#endif #endif
} }
state0=all_treillis[state0][cur_s1].exit_state; state0=all_treillis[state0][cur_s1].exit_state;
state1=all_treillis[state1][cur_s2].exit_state; state1=all_treillis[state1][cur_s2].exit_state;
} }
x=output+(input_length_bits*3); x=output+(input_length_bits*3);
// Trellis termination // Trellis termination
threegpplte_rsc_termination(&x[0],&x[1],&state0); threegpplte_rsc_termination(&x[0],&x[1],&state0);
#ifdef DEBUG_TURBO_ENCODER #ifdef DEBUG_TURBO_ENCODER
printf("term: x0 %d, x1 %d, state0 %d\n",x[0],x[1],state0); printf("term: x0 %u, x1 %u, state0 %d\n",x[0],x[1],state0);
#endif //DEBUG_TURBO_ENCODER #endif //DEBUG_TURBO_ENCODER
threegpplte_rsc_termination(&x[2],&x[3],&state0); threegpplte_rsc_termination(&x[2],&x[3],&state0);
#ifdef DEBUG_TURBO_ENCODER #ifdef DEBUG_TURBO_ENCODER
printf("term: x0 %d, x1 %d, state0 %d\n",x[2],x[3],state0); printf("term: x0 %u, x1 %u, state0 %d\n",x[2],x[3],state0);
#endif //DEBUG_TURBO_ENCODER #endif //DEBUG_TURBO_ENCODER
threegpplte_rsc_termination(&x[4],&x[5],&state0); threegpplte_rsc_termination(&x[4],&x[5],&state0);
#ifdef DEBUG_TURBO_ENCODER #ifdef DEBUG_TURBO_ENCODER
printf("term: x0 %d, x1 %d, state0 %d\n",x[4],x[5],state0); printf("term: x0 %u, x1 %u, state0 %d\n",x[4],x[5],state0);
#endif //DEBUG_TURBO_ENCODER #endif //DEBUG_TURBO_ENCODER
threegpplte_rsc_termination(&x[6],&x[7],&state1); threegpplte_rsc_termination(&x[6],&x[7],&state1);
#ifdef DEBUG_TURBO_ENCODER #ifdef DEBUG_TURBO_ENCODER
printf("term: x0 %d, x1 %d, state1 %d\n",x[6],x[7],state1); printf("term: x0 %u, x1 %u, state1 %d\n",x[6],x[7],state1);
#endif //DEBUG_TURBO_ENCODER #endif //DEBUG_TURBO_ENCODER
threegpplte_rsc_termination(&x[8],&x[9],&state1); threegpplte_rsc_termination(&x[8],&x[9],&state1);
#ifdef DEBUG_TURBO_ENCODER #ifdef DEBUG_TURBO_ENCODER
printf("term: x0 %d, x1 %d, state1 %d\n",x[8],x[9],state1); printf("term: x0 %u, x1 %u, state1 %d\n",x[8],x[9],state1);
#endif //DEBUG_TURBO_ENCODER #endif //DEBUG_TURBO_ENCODER
threegpplte_rsc_termination(&x[10],&x[11],&state1); threegpplte_rsc_termination(&x[10],&x[11],&state1);
#ifdef DEBUG_TURBO_ENCODER #ifdef DEBUG_TURBO_ENCODER
printf("term: x0 %d, x1 %d, state1 %d\n",x[10],x[11],state1); printf("term: x0 %u, x1 %u, state1 %d\n",x[10],x[11],state1);
#endif //DEBUG_TURBO_ENCODER #endif //DEBUG_TURBO_ENCODER
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
_mm_empty(); _mm_empty();
...@@ -638,32 +578,31 @@ void threegpplte_turbo_encoder_sse(unsigned char *input, ...@@ -638,32 +578,31 @@ void threegpplte_turbo_encoder_sse(unsigned char *input,
} }
void init_encoder_sse (void) { void init_encoder_sse (void) {
treillis_table_init(); treillis_table_init();
} }
/* function which will be called by the shared lib loader, to check shared lib version /* function which will be called by the shared lib loader, to check shared lib version
against main exec version. version mismatch no considered as fatal (interfaces not supposed to change) against main exec version. version mismatch no considered as fatal (interfaces not supposed to change)
*/ */
int coding_checkbuildver(char * mainexec_buildversion, char ** shlib_buildversion) int coding_checkbuildver(char *mainexec_buildversion, char **shlib_buildversion) {
{
#ifndef PACKAGE_VERSION #ifndef PACKAGE_VERSION
#define PACKAGE_VERSION "standalone built: " __DATE__ __TIME__ #define PACKAGE_VERSION "standalone built: " __DATE__ __TIME__
#endif #endif
*shlib_buildversion = PACKAGE_VERSION; *shlib_buildversion = PACKAGE_VERSION;
if (strcmp(mainexec_buildversion, *shlib_buildversion) != 0) {
fprintf(stderr,"[CODING] shared lib version %s, doesn't match main version %s, compatibility should be checked\n", if (strcmp(mainexec_buildversion, *shlib_buildversion) != 0) {
mainexec_buildversion,*shlib_buildversion); fprintf(stderr,"[CODING] shared lib version %s, doesn't match main version %s, compatibility should be checked\n",
} mainexec_buildversion,*shlib_buildversion);
return 0; }
return 0;
} }
#ifdef TC_MAIN #ifdef TC_MAIN
#define INPUT_LENGTH 20 #define INPUT_LENGTH 20
#define F1 21 #define F1 21
#define F2 120 #define F2 120
int main(int argc,char **argv) int main(int argc,char **argv) {
{
unsigned char input[INPUT_LENGTH+32],state,state2; unsigned char input[INPUT_LENGTH+32],state,state2;
unsigned char output[12+(3*(INPUT_LENGTH<<3))],x,z; unsigned char output[12+(3*(INPUT_LENGTH<<3))],x,z;
int i; int i;
...@@ -680,28 +619,27 @@ int main(int argc,char **argv) ...@@ -680,28 +619,27 @@ int main(int argc,char **argv)
printf("\n"); printf("\n");
for (state=0; state<8; state++) { for (state=0; state<8; state++) {
state2=state; state2=state;
threegpplte_rsc_termination(&x,&z,&state2); threegpplte_rsc_termination(&x,&z,&state2);
printf("Termination: (%d->%d) : (%d,%d)\n",state,state2,x,z); printf("Termination: (%d->%d) : (%d,%d)\n",state,state2,x,z);
} }
memset((void*)input,0,INPUT_LENGTH+16); memset((void *)input,0,INPUT_LENGTH+16);
for (i=0; i<INPUT_LENGTH; i++) { for (i=0; i<INPUT_LENGTH; i++) {
input[i] = i*219; input[i] = i*219;
printf("Input %d : %d\n",i,input[i]); printf("Input %d : %u\n",i,input[i]);
} }
threegpplte_turbo_encoder_sse(&input[0], threegpplte_turbo_encoder_sse(&input[0],
INPUT_LENGTH, INPUT_LENGTH,
&output[0], &output[0],
0); 0);
for (i=0; i<12+(INPUT_LENGTH*24); i++)
printf("%u",output[i]);
for (i=0;i<12+(INPUT_LENGTH*24);i++)
printf("%d",output[i]);
printf("\n"); printf("\n");
return(0); return(0);
} }
......
...@@ -38,33 +38,33 @@ ...@@ -38,33 +38,33 @@
#include "PHY/sse_intrin.h" #include "PHY/sse_intrin.h"
#ifndef TEST_DEBUG #ifndef TEST_DEBUG
#include "PHY/defs.h" #include "PHY/defs.h"
#include "PHY/CODING/defs.h" #include "PHY/CODING/defs.h"
#include "PHY/CODING/lte_interleaver_inline.h" #include "PHY/CODING/lte_interleaver_inline.h"
#else #else
#include "defs.h" #include "defs.h"
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#endif #endif
#define SHUFFLE16(a,b,c,d,e,f,g,h) _mm_set_epi8(h==-1?-1:h*2+1, \ #define SHUFFLE16(a,b,c,d,e,f,g,h) _mm_set_epi8(h==-1?-1:h*2+1, \
h==-1?-1:h*2, \ h==-1?-1:h*2, \
g==-1?-1:g*2+1, \ g==-1?-1:g*2+1, \
g==-1?-1:g*2, \ g==-1?-1:g*2, \
f==-1?-1:f*2+1, \ f==-1?-1:f*2+1, \
f==-1?-1:f*2, \ f==-1?-1:f*2, \
e==-1?-1:e*2+1, \ e==-1?-1:e*2+1, \
e==-1?-1:e*2, \ e==-1?-1:e*2, \
d==-1?-1:d*2+1, \ d==-1?-1:d*2+1, \
d==-1?-1:d*2, \ d==-1?-1:d*2, \
c==-1?-1:c*2+1, \ c==-1?-1:c*2+1, \
c==-1?-1:c*2, \ c==-1?-1:c*2, \
b==-1?-1:b*2+1, \ b==-1?-1:b*2+1, \
b==-1?-1:b*2, \ b==-1?-1:b*2, \
a==-1?-1:a*2+1, \ a==-1?-1:a*2+1, \
a==-1?-1:a*2); a==-1?-1:a*2);
...@@ -75,44 +75,40 @@ ...@@ -75,44 +75,40 @@
#ifdef LLR8 #ifdef LLR8
typedef int8_t llr_t; // internal decoder LLR data is 8-bit fixed typedef int8_t llr_t; // internal decoder LLR data is 8-bit fixed
typedef int8_t channel_t; typedef int8_t channel_t;
#define MAX 64 #define MAX 64
#else #else
typedef int16_t llr_t; // internal decoder LLR data is 16-bit fixed typedef int16_t llr_t; // internal decoder LLR data is 16-bit fixed
typedef int16_t channel_t; typedef int16_t channel_t;
#define MAX 256 #define MAX 256
#endif #endif
void log_map (llr_t* systematic,channel_t* y_parity, llr_t* m11, llr_t* m10, llr_t *alpha, llr_t *beta, llr_t* ext,unsigned short frame_length,unsigned char term_flag,unsigned char F,int offset8_flag, void log_map (llr_t *systematic,channel_t *y_parity, llr_t *m11, llr_t *m10, llr_t *alpha, llr_t *beta, llr_t *ext,unsigned short frame_length,unsigned char term_flag,unsigned char F,int offset8_flag,
time_stats_t *alpha_stats,time_stats_t *beta_stats,time_stats_t *gamma_stats,time_stats_t *ext_stats); time_stats_t *alpha_stats,time_stats_t *beta_stats,time_stats_t *gamma_stats,time_stats_t *ext_stats);
void compute_gamma(llr_t* m11,llr_t* m10,llr_t* systematic, channel_t* y_parity, unsigned short frame_length,unsigned char term_flag); void compute_gamma(llr_t *m11,llr_t *m10,llr_t *systematic, channel_t *y_parity, unsigned short frame_length,unsigned char term_flag);
void compute_alpha(llr_t*alpha,llr_t *beta, llr_t* m11,llr_t* m10, unsigned short frame_length,unsigned char F); void compute_alpha(llr_t *alpha,llr_t *beta, llr_t *m11,llr_t *m10, unsigned short frame_length,unsigned char F);
void compute_beta(llr_t*alpha, llr_t* beta,llr_t* m11,llr_t* m10, unsigned short frame_length,unsigned char F,int offset8_flag); void compute_beta(llr_t *alpha, llr_t *beta,llr_t *m11,llr_t *m10, unsigned short frame_length,unsigned char F,int offset8_flag);
void compute_ext(llr_t* alpha,llr_t* beta,llr_t* m11,llr_t* m10,llr_t* extrinsic, llr_t* ap, unsigned short frame_length); void compute_ext(llr_t *alpha,llr_t *beta,llr_t *m11,llr_t *m10,llr_t *extrinsic, llr_t *ap, unsigned short frame_length);
void print_bytes(char *s, __m128i *x) void print_bytes(char *s, __m128i *x) {
{
int8_t *tempb = (int8_t *)x; int8_t *tempb = (int8_t *)x;
printf("%s : %d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d\n",s, printf("%s : %d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d\n",s,
tempb[0],tempb[1],tempb[2],tempb[3],tempb[4],tempb[5],tempb[6],tempb[7], tempb[0],tempb[1],tempb[2],tempb[3],tempb[4],tempb[5],tempb[6],tempb[7],
tempb[8],tempb[9],tempb[10],tempb[11],tempb[12],tempb[13],tempb[14],tempb[15]); tempb[8],tempb[9],tempb[10],tempb[11],tempb[12],tempb[13],tempb[14],tempb[15]);
} }
void log_map(llr_t* systematic, void log_map(llr_t *systematic,
channel_t* y_parity, channel_t *y_parity,
llr_t* m11, llr_t *m11,
llr_t* m10, llr_t *m10,
llr_t *alpha, llr_t *alpha,
llr_t *beta, llr_t *beta,
llr_t* ext, llr_t *ext,
unsigned short frame_length, unsigned short frame_length,
unsigned char term_flag, unsigned char term_flag,
unsigned char F, unsigned char F,
...@@ -120,13 +116,10 @@ void log_map(llr_t* systematic, ...@@ -120,13 +116,10 @@ void log_map(llr_t* systematic,
time_stats_t *alpha_stats, time_stats_t *alpha_stats,
time_stats_t *beta_stats, time_stats_t *beta_stats,
time_stats_t *gamma_stats, time_stats_t *gamma_stats,
time_stats_t *ext_stats) time_stats_t *ext_stats) {
{
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
msg("log_map, frame_length %d\n",frame_length); msg("log_map, frame_length %d\n",frame_length);
#endif #endif
start_meas(gamma_stats) ; start_meas(gamma_stats) ;
compute_gamma(m11,m10,systematic,y_parity,frame_length,term_flag) ; compute_gamma(m11,m10,systematic,y_parity,frame_length,term_flag) ;
stop_meas(gamma_stats); stop_meas(gamma_stats);
...@@ -139,19 +132,15 @@ void log_map(llr_t* systematic, ...@@ -139,19 +132,15 @@ void log_map(llr_t* systematic,
start_meas(ext_stats) ; start_meas(ext_stats) ;
compute_ext(alpha,beta,m11,m10,ext,systematic,frame_length) ; compute_ext(alpha,beta,m11,m10,ext,systematic,frame_length) ;
stop_meas(ext_stats); stop_meas(ext_stats);
} }
void compute_gamma(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity, void compute_gamma(llr_t *m11,llr_t *m10,llr_t *systematic,channel_t *y_parity,
unsigned short frame_length,unsigned char term_flag) unsigned short frame_length,unsigned char term_flag) {
{
int k,K1; int k,K1;
__m128i *systematic128 = (__m128i *)systematic; __m128i *systematic128 = (__m128i *)systematic;
__m128i *y_parity128 = (__m128i *)y_parity; __m128i *y_parity128 = (__m128i *)y_parity;
__m128i *m10_128 = (__m128i *)m10; __m128i *m10_128 = (__m128i *)m10;
__m128i *m11_128 = (__m128i *)m11; __m128i *m11_128 = (__m128i *)m11;
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
msg("compute_gamma, %p,%p,%p,%p,framelength %d\n",m11,m10,systematic,y_parity,frame_length); msg("compute_gamma, %p,%p,%p,%p,framelength %d\n",m11,m10,systematic,y_parity,frame_length);
#endif #endif
...@@ -159,7 +148,6 @@ void compute_gamma(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity, ...@@ -159,7 +148,6 @@ void compute_gamma(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity,
K1=frame_length>>3; K1=frame_length>>3;
for (k=0; k<K1; k++) { for (k=0; k<K1; k++) {
m11_128[k] = _mm_srai_epi16(_mm_adds_epi16(systematic128[k],y_parity128[k]),1); m11_128[k] = _mm_srai_epi16(_mm_adds_epi16(systematic128[k],y_parity128[k]),1);
m10_128[k] = _mm_srai_epi16(_mm_subs_epi16(systematic128[k],y_parity128[k]),1); m10_128[k] = _mm_srai_epi16(_mm_subs_epi16(systematic128[k],y_parity128[k]),1);
/* /*
...@@ -206,13 +194,11 @@ void compute_gamma(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity, ...@@ -206,13 +194,11 @@ void compute_gamma(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity,
(int16_t)_mm_extract_epi16(m10_128[k],6), (int16_t)_mm_extract_epi16(m10_128[k],6),
(int16_t)_mm_extract_epi16(m10_128[k],7)); (int16_t)_mm_extract_epi16(m10_128[k],7));
*/ */
} }
// Termination // Termination
m11_128[k] = _mm_srai_epi16(_mm_adds_epi16(systematic128[k+term_flag],y_parity128[k]),1); m11_128[k] = _mm_srai_epi16(_mm_adds_epi16(systematic128[k+term_flag],y_parity128[k]),1);
m10_128[k] = _mm_srai_epi16(_mm_subs_epi16(systematic128[k+term_flag],y_parity128[k]),1); m10_128[k] = _mm_srai_epi16(_mm_subs_epi16(systematic128[k+term_flag],y_parity128[k]),1);
// printf("gamma (term): %d,%d, %d,%d, %d,%d\n",m11[k<<3],m10[k<<3],m11[1+(k<<3)],m10[1+(k<<3)],m11[2+(k<<3)],m10[2+(k<<3)]); // printf("gamma (term): %d,%d, %d,%d, %d,%d\n",m11[k<<3],m10[k<<3],m11[1+(k<<3)],m10[1+(k<<3)],m11[2+(k<<3)],m10[2+(k<<3)]);
#else #else
register __m128i sl,sh,ypl,yph; //K128=_mm_set1_epi8(-128); register __m128i sl,sh,ypl,yph; //K128=_mm_set1_epi8(-128);
...@@ -231,7 +217,6 @@ void compute_gamma(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity, ...@@ -231,7 +217,6 @@ void compute_gamma(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity,
// m10_128[k] = _mm_subs_epi8(systematic128[k],y_parity128[k]); // m10_128[k] = _mm_subs_epi8(systematic128[k],y_parity128[k]);
// m11_128[k] = _mm_sub_epi8(_mm_avg_epu8(_mm_add_epi8(systematic128[k],K128),_mm_add_epi8(y_parity128[k],K128)),K128); // m11_128[k] = _mm_sub_epi8(_mm_avg_epu8(_mm_add_epi8(systematic128[k],K128),_mm_add_epi8(y_parity128[k],K128)),K128);
// m10_128[k] = _mm_sub_epi8(_mm_avg_epu8(_mm_add_epi8(systematic128[k],K128),_mm_add_epi8(_mm_sign_epi8(y_parity128[k],K128),K128)),K128); // m10_128[k] = _mm_sub_epi8(_mm_avg_epu8(_mm_add_epi8(systematic128[k],K128),_mm_add_epi8(_mm_sign_epi8(y_parity128[k],K128),K128)),K128);
/* /*
printf("gamma %d: s %d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d\n", printf("gamma %d: s %d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d\n",
k, k,
...@@ -309,7 +294,6 @@ void compute_gamma(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity, ...@@ -309,7 +294,6 @@ void compute_gamma(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity,
} }
// Termination // Termination
sl = _mm_cvtepi8_epi16(systematic128[k+term_flag]); sl = _mm_cvtepi8_epi16(systematic128[k+term_flag]);
sh = _mm_cvtepi8_epi16(_mm_srli_si128(systematic128[k],8)); sh = _mm_cvtepi8_epi16(_mm_srli_si128(systematic128[k],8));
ypl = _mm_cvtepi8_epi16(y_parity128[k+term_flag]); ypl = _mm_cvtepi8_epi16(y_parity128[k+term_flag]);
...@@ -318,7 +302,6 @@ void compute_gamma(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity, ...@@ -318,7 +302,6 @@ void compute_gamma(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity,
_mm_srai_epi16(_mm_adds_epi16(sh,yph),1)); _mm_srai_epi16(_mm_adds_epi16(sh,yph),1));
m10_128[k] = _mm_packs_epi16(_mm_srai_epi16(_mm_subs_epi16(sl,ypl),1), m10_128[k] = _mm_packs_epi16(_mm_srai_epi16(_mm_subs_epi16(sl,ypl),1),
_mm_srai_epi16(_mm_subs_epi16(sh,yph),1)); _mm_srai_epi16(_mm_subs_epi16(sh,yph),1));
// m11_128[k] = _mm_adds_epi8(systematic128[k+term_flag],y_parity128[k]); // m11_128[k] = _mm_adds_epi8(systematic128[k+term_flag],y_parity128[k]);
// m10_128[k] = _mm_subs_epi8(systematic128[k+term_flag],y_parity128[k]); // m10_128[k] = _mm_subs_epi8(systematic128[k+term_flag],y_parity128[k]);
// m11_128[k] = _mm_sub_epi8(_mm_avg_epu8(_mm_add_epi8(systematic128[k+term_flag],K128),_mm_add_epi8(y_parity128[k],K128)),K128); // m11_128[k] = _mm_sub_epi8(_mm_avg_epu8(_mm_add_epi8(systematic128[k+term_flag],K128),_mm_add_epi8(y_parity128[k],K128)),K128);
...@@ -383,20 +366,17 @@ void compute_gamma(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity, ...@@ -383,20 +366,17 @@ void compute_gamma(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity,
#endif #endif
_mm_empty(); _mm_empty();
_m_empty(); _m_empty();
} }
#define L 40 #define L 40
void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned short frame_length,unsigned char F) void compute_alpha(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,unsigned short frame_length,unsigned char F) {
{
int k,l,l2,K1,rerun_flag=0; int k,l,l2,K1,rerun_flag=0;
__m128i *alpha128=(__m128i *)alpha,*alpha_ptr; __m128i *alpha128=(__m128i *)alpha,*alpha_ptr;
__m128i a0,a1,a2,a3,a4,a5,a6,a7,*m11p,*m10p; __m128i a0,a1,a2,a3,a4,a5,a6,a7,*m11p,*m10p;
__m128i m_b0,m_b1,m_b2,m_b3,m_b4,m_b5,m_b6,m_b7; __m128i m_b0,m_b1,m_b2,m_b3,m_b4,m_b5,m_b6,m_b7;
__m128i new0,new1,new2,new3,new4,new5,new6,new7; __m128i new0,new1,new2,new3,new4,new5,new6,new7;
__m128i alpha_max; __m128i alpha_max;
#ifndef LLR8 #ifndef LLR8
l2 = L>>3; l2 = L>>3;
K1 = (frame_length>>3); K1 = (frame_length>>3);
...@@ -439,19 +419,16 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho ...@@ -439,19 +419,16 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho
} }
alpha_ptr = &alpha128[0]; alpha_ptr = &alpha128[0];
m11p = (__m128i *)m_11;
m11p = (__m128i*)m_11; m10p = (__m128i *)m_10;
m10p = (__m128i*)m_10;
for (k=0; for (k=0;
k<l; k<l;
k++) { k++) {
a1=_mm_load_si128(&alpha_ptr[1]); a1=_mm_load_si128(&alpha_ptr[1]);
a3=_mm_load_si128(&alpha_ptr[3]); a3=_mm_load_si128(&alpha_ptr[3]);
a5=_mm_load_si128(&alpha_ptr[5]); a5=_mm_load_si128(&alpha_ptr[5]);
a7=_mm_load_si128(&alpha_ptr[7]); a7=_mm_load_si128(&alpha_ptr[7]);
m_b0 = _mm_adds_epi16(a1,*m11p); // m11 m_b0 = _mm_adds_epi16(a1,*m11p); // m11
m_b4 = _mm_subs_epi16(a1,*m11p); // m00=-m11 m_b4 = _mm_subs_epi16(a1,*m11p); // m00=-m11
m_b1 = _mm_subs_epi16(a3,*m10p); // m01=-m10 m_b1 = _mm_subs_epi16(a3,*m10p); // m01=-m10
...@@ -460,12 +437,10 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho ...@@ -460,12 +437,10 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho
m_b6 = _mm_subs_epi16(a5,*m10p); // m01=-m10 m_b6 = _mm_subs_epi16(a5,*m10p); // m01=-m10
m_b3 = _mm_subs_epi16(a7,*m11p); // m00=-m11 m_b3 = _mm_subs_epi16(a7,*m11p); // m00=-m11
m_b7 = _mm_adds_epi16(a7,*m11p); // m11 m_b7 = _mm_adds_epi16(a7,*m11p); // m11
a0=_mm_load_si128(&alpha_ptr[0]); a0=_mm_load_si128(&alpha_ptr[0]);
a2=_mm_load_si128(&alpha_ptr[2]); a2=_mm_load_si128(&alpha_ptr[2]);
a4=_mm_load_si128(&alpha_ptr[4]); a4=_mm_load_si128(&alpha_ptr[4]);
a6=_mm_load_si128(&alpha_ptr[6]); a6=_mm_load_si128(&alpha_ptr[6]);
new0 = _mm_subs_epi16(a0,*m11p); // m00=-m11 new0 = _mm_subs_epi16(a0,*m11p); // m00=-m11
new4 = _mm_adds_epi16(a0,*m11p); // m11 new4 = _mm_adds_epi16(a0,*m11p); // m11
new1 = _mm_adds_epi16(a2,*m10p); // m10 new1 = _mm_adds_epi16(a2,*m10p); // m10
...@@ -474,7 +449,6 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho ...@@ -474,7 +449,6 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho
new6 = _mm_adds_epi16(a4,*m10p); // m10 new6 = _mm_adds_epi16(a4,*m10p); // m10
new3 = _mm_adds_epi16(a6,*m11p); // m11 new3 = _mm_adds_epi16(a6,*m11p); // m11
new7 = _mm_subs_epi16(a6,*m11p); // m00=-m11 new7 = _mm_subs_epi16(a6,*m11p); // m00=-m11
a0 = _mm_max_epi16(m_b0,new0); a0 = _mm_max_epi16(m_b0,new0);
a1 = _mm_max_epi16(m_b1,new1); a1 = _mm_max_epi16(m_b1,new1);
a2 = _mm_max_epi16(m_b2,new2); a2 = _mm_max_epi16(m_b2,new2);
...@@ -483,7 +457,6 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho ...@@ -483,7 +457,6 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho
a5 = _mm_max_epi16(m_b5,new5); a5 = _mm_max_epi16(m_b5,new5);
a6 = _mm_max_epi16(m_b6,new6); a6 = _mm_max_epi16(m_b6,new6);
a7 = _mm_max_epi16(m_b7,new7); a7 = _mm_max_epi16(m_b7,new7);
alpha_max = _mm_max_epi16(a0,a1); alpha_max = _mm_max_epi16(a0,a1);
alpha_max = _mm_max_epi16(alpha_max,a2); alpha_max = _mm_max_epi16(alpha_max,a2);
alpha_max = _mm_max_epi16(alpha_max,a3); alpha_max = _mm_max_epi16(alpha_max,a3);
...@@ -491,7 +464,6 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho ...@@ -491,7 +464,6 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho
alpha_max = _mm_max_epi16(alpha_max,a5); alpha_max = _mm_max_epi16(alpha_max,a5);
alpha_max = _mm_max_epi16(alpha_max,a6); alpha_max = _mm_max_epi16(alpha_max,a6);
alpha_max = _mm_max_epi16(alpha_max,a7); alpha_max = _mm_max_epi16(alpha_max,a7);
alpha_ptr+=8; alpha_ptr+=8;
m11p++; m11p++;
m10p++; m10p++;
...@@ -503,7 +475,6 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho ...@@ -503,7 +475,6 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho
alpha_ptr[5] = _mm_subs_epi16(a5,alpha_max); alpha_ptr[5] = _mm_subs_epi16(a5,alpha_max);
alpha_ptr[6] = _mm_subs_epi16(a6,alpha_max); alpha_ptr[6] = _mm_subs_epi16(a6,alpha_max);
alpha_ptr[7] = _mm_subs_epi16(a7,alpha_max); alpha_ptr[7] = _mm_subs_epi16(a7,alpha_max);
} }
/* /*
...@@ -981,9 +952,7 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho ...@@ -981,9 +952,7 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho
*/ */
#else #else
if (rerun_flag == 0) { if (rerun_flag == 0) {
alpha128[0] = _mm_set_epi8(-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,0); alpha128[0] = _mm_set_epi8(-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,0);
alpha128[1] = _mm_set_epi8(-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2); alpha128[1] = _mm_set_epi8(-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2);
alpha128[2] = _mm_set_epi8(-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2); alpha128[2] = _mm_set_epi8(-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2);
...@@ -992,8 +961,6 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho ...@@ -992,8 +961,6 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho
alpha128[5] = _mm_set_epi8(-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2); alpha128[5] = _mm_set_epi8(-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2);
alpha128[6] = _mm_set_epi8(-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2); alpha128[6] = _mm_set_epi8(-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2);
alpha128[7] = _mm_set_epi8(-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2); alpha128[7] = _mm_set_epi8(-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2);
} else { } else {
alpha128[0] = _mm_slli_si128(alpha128[(K1<<3)],1); alpha128[0] = _mm_slli_si128(alpha128[(K1<<3)],1);
alpha128[1] = _mm_slli_si128(alpha128[1+(K1<<3)],1); alpha128[1] = _mm_slli_si128(alpha128[1+(K1<<3)],1);
...@@ -1025,15 +992,12 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho ...@@ -1025,15 +992,12 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho
print_bytes("a6:",&alpha_ptr[6]); print_bytes("a6:",&alpha_ptr[6]);
print_bytes("a7:",&alpha_ptr[7]); print_bytes("a7:",&alpha_ptr[7]);
*/ */
m11p = (__m128i *)m_11;
m11p = (__m128i*)m_11; m10p = (__m128i *)m_10;
m10p = (__m128i*)m_10;
for (k=0; for (k=0;
k<l; k<l;
k++) { k++) {
m_b0 = _mm_adds_epi8(alpha_ptr[1],*m11p); // m11 m_b0 = _mm_adds_epi8(alpha_ptr[1],*m11p); // m11
m_b4 = _mm_subs_epi8(alpha_ptr[1],*m11p); // m00=-m11 m_b4 = _mm_subs_epi8(alpha_ptr[1],*m11p); // m00=-m11
m_b1 = _mm_subs_epi8(alpha_ptr[3],*m10p); // m01=-m10 m_b1 = _mm_subs_epi8(alpha_ptr[3],*m10p); // m01=-m10
...@@ -1042,7 +1006,6 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho ...@@ -1042,7 +1006,6 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho
m_b6 = _mm_subs_epi8(alpha_ptr[5],*m10p); // m01=-m10 m_b6 = _mm_subs_epi8(alpha_ptr[5],*m10p); // m01=-m10
m_b3 = _mm_subs_epi8(alpha_ptr[7],*m11p); // m00=-m11 m_b3 = _mm_subs_epi8(alpha_ptr[7],*m11p); // m00=-m11
m_b7 = _mm_adds_epi8(alpha_ptr[7],*m11p); // m11 m_b7 = _mm_adds_epi8(alpha_ptr[7],*m11p); // m11
new0 = _mm_subs_epi8(alpha_ptr[0],*m11p); // m00=-m11 new0 = _mm_subs_epi8(alpha_ptr[0],*m11p); // m00=-m11
new4 = _mm_adds_epi8(alpha_ptr[0],*m11p); // m11 new4 = _mm_adds_epi8(alpha_ptr[0],*m11p); // m11
new1 = _mm_adds_epi8(alpha_ptr[2],*m10p); // m10 new1 = _mm_adds_epi8(alpha_ptr[2],*m10p); // m10
...@@ -1051,7 +1014,6 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho ...@@ -1051,7 +1014,6 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho
new6 = _mm_adds_epi8(alpha_ptr[4],*m10p); // m10 new6 = _mm_adds_epi8(alpha_ptr[4],*m10p); // m10
new3 = _mm_adds_epi8(alpha_ptr[6],*m11p); // m11 new3 = _mm_adds_epi8(alpha_ptr[6],*m11p); // m11
new7 = _mm_subs_epi8(alpha_ptr[6],*m11p); // m00=-m11 new7 = _mm_subs_epi8(alpha_ptr[6],*m11p); // m00=-m11
alpha_ptr += 8; alpha_ptr += 8;
m11p++; m11p++;
m10p++; m10p++;
...@@ -1063,8 +1025,6 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho ...@@ -1063,8 +1025,6 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho
alpha_ptr[5] = _mm_max_epi8(m_b5,new5); alpha_ptr[5] = _mm_max_epi8(m_b5,new5);
alpha_ptr[6] = _mm_max_epi8(m_b6,new6); alpha_ptr[6] = _mm_max_epi8(m_b6,new6);
alpha_ptr[7] = _mm_max_epi8(m_b7,new7); alpha_ptr[7] = _mm_max_epi8(m_b7,new7);
// compute and subtract maxima // compute and subtract maxima
alpha_max = _mm_max_epi8(alpha_ptr[0],alpha_ptr[1]); alpha_max = _mm_max_epi8(alpha_ptr[0],alpha_ptr[1]);
alpha_max = _mm_max_epi8(alpha_max,alpha_ptr[2]); alpha_max = _mm_max_epi8(alpha_max,alpha_ptr[2]);
...@@ -1073,7 +1033,6 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho ...@@ -1073,7 +1033,6 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho
alpha_max = _mm_max_epi8(alpha_max,alpha_ptr[5]); alpha_max = _mm_max_epi8(alpha_max,alpha_ptr[5]);
alpha_max = _mm_max_epi8(alpha_max,alpha_ptr[6]); alpha_max = _mm_max_epi8(alpha_max,alpha_ptr[6]);
alpha_max = _mm_max_epi8(alpha_max,alpha_ptr[7]); alpha_max = _mm_max_epi8(alpha_max,alpha_ptr[7]);
alpha_ptr[0] = _mm_subs_epi8(alpha_ptr[0],alpha_max); alpha_ptr[0] = _mm_subs_epi8(alpha_ptr[0],alpha_max);
alpha_ptr[1] = _mm_subs_epi8(alpha_ptr[1],alpha_max); alpha_ptr[1] = _mm_subs_epi8(alpha_ptr[1],alpha_max);
alpha_ptr[2] = _mm_subs_epi8(alpha_ptr[2],alpha_max); alpha_ptr[2] = _mm_subs_epi8(alpha_ptr[2],alpha_max);
...@@ -1109,14 +1068,11 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho ...@@ -1109,14 +1068,11 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho
} }
void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned short frame_length,unsigned char F,int offset8_flag) void compute_beta(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,unsigned short frame_length,unsigned char F,int offset8_flag) {
{
int k,rerun_flag=0; int k,rerun_flag=0;
__m128i m11_128,m10_128; __m128i m11_128,m10_128;
__m128i m_b0,m_b1,m_b2,m_b3,m_b4,m_b5,m_b6,m_b7; __m128i m_b0,m_b1,m_b2,m_b3,m_b4,m_b5,m_b6,m_b7;
__m128i new0,new1,new2,new3,new4,new5,new6,new7; __m128i new0,new1,new2,new3,new4,new5,new6,new7;
__m128i *beta128,*alpha128,*beta_ptr; __m128i *beta128,*alpha128,*beta_ptr;
__m128i beta_max; __m128i beta_max;
int16_t m11,m10,beta0_16,beta1_16,beta2_16,beta3_16,beta4_16,beta5_16,beta6_16,beta7_16,beta0_2,beta1_2,beta2_2,beta3_2,beta_m; int16_t m11,m10,beta0_16,beta1_16,beta2_16,beta3_16,beta4_16,beta5_16,beta6_16,beta7_16,beta0_2,beta1_2,beta2_2,beta3_2,beta_m;
...@@ -1124,30 +1080,21 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor ...@@ -1124,30 +1080,21 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
#ifdef LLR8 #ifdef LLR8
llr_t beta2,beta3,beta4,beta5,beta6,beta7; llr_t beta2,beta3,beta4,beta5,beta6,beta7;
__m128i beta_16; __m128i beta_16;
#endif #endif
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
msg("compute_beta, %p,%p,%p,%p,framelength %d,F %d\n", msg("compute_beta, %p,%p,%p,%p,framelength %d,F %d\n",
beta,m_11,m_10,alpha,frame_length,F); beta,m_11,m_10,alpha,frame_length,F);
#endif #endif
// termination for beta initialization // termination for beta initialization
// printf("beta init: offset8 %d\n",offset8_flag); // printf("beta init: offset8 %d\n",offset8_flag);
m11=(int16_t)m_11[2+frame_length]; m11=(int16_t)m_11[2+frame_length];
m10=(int16_t)m_10[2+frame_length]; m10=(int16_t)m_10[2+frame_length];
// printf("m11,m10 %d,%d\n",m11,m10); // printf("m11,m10 %d,%d\n",m11,m10);
beta0 = -m11;//M0T_TERM; beta0 = -m11;//M0T_TERM;
beta1 = m11;//M1T_TERM; beta1 = m11;//M1T_TERM;
m11=(int16_t)m_11[1+frame_length]; m11=(int16_t)m_11[1+frame_length];
m10=(int16_t)m_10[1+frame_length]; m10=(int16_t)m_10[1+frame_length];
// printf("m11,m10 %d,%d\n",m11,m10); // printf("m11,m10 %d,%d\n",m11,m10);
beta0_2 = beta0-m11;//+M0T_TERM; beta0_2 = beta0-m11;//+M0T_TERM;
beta1_2 = beta0+m11;//+M1T_TERM; beta1_2 = beta0+m11;//+M1T_TERM;
beta2_2 = beta1+m10;//M2T_TERM; beta2_2 = beta1+m10;//M2T_TERM;
...@@ -1155,7 +1102,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor ...@@ -1155,7 +1102,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
m11=(int16_t)m_11[frame_length]; m11=(int16_t)m_11[frame_length];
m10=(int16_t)m_10[frame_length]; m10=(int16_t)m_10[frame_length];
// printf("m11,m10 %d,%d (%p)\n",m11,m10,m_11+frame_length); // printf("m11,m10 %d,%d (%p)\n",m11,m10,m_11+frame_length);
beta0_16 = beta0_2-m11;//+M0T_TERM; beta0_16 = beta0_2-m11;//+M0T_TERM;
beta1_16 = beta0_2+m11;//+M1T_TERM; beta1_16 = beta0_2+m11;//+M1T_TERM;
beta2_16 = beta1_2+m10;//+M2T_TERM; beta2_16 = beta1_2+m10;//+M2T_TERM;
...@@ -1164,8 +1110,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor ...@@ -1164,8 +1110,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
beta5_16 = beta2_2+m10;//+M5T_TERM; beta5_16 = beta2_2+m10;//+M5T_TERM;
beta6_16 = beta3_2+m11;//+M6T_TERM; beta6_16 = beta3_2+m11;//+M6T_TERM;
beta7_16 = beta3_2-m11;//+M7T_TERM; beta7_16 = beta3_2-m11;//+M7T_TERM;
beta_m = (beta0_16>beta1_16) ? beta0_16 : beta1_16; beta_m = (beta0_16>beta1_16) ? beta0_16 : beta1_16;
beta_m = (beta_m>beta2_16) ? beta_m : beta2_16; beta_m = (beta_m>beta2_16) ? beta_m : beta2_16;
beta_m = (beta_m>beta3_16) ? beta_m : beta3_16; beta_m = (beta_m>beta3_16) ? beta_m : beta3_16;
...@@ -1173,8 +1117,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor ...@@ -1173,8 +1117,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
beta_m = (beta_m>beta5_16) ? beta_m : beta5_16; beta_m = (beta_m>beta5_16) ? beta_m : beta5_16;
beta_m = (beta_m>beta6_16) ? beta_m : beta6_16; beta_m = (beta_m>beta6_16) ? beta_m : beta6_16;
beta_m = (beta_m>beta7_16) ? beta_m : beta7_16; beta_m = (beta_m>beta7_16) ? beta_m : beta7_16;
beta0_16=beta0_16-beta_m; beta0_16=beta0_16-beta_m;
beta1_16=beta1_16-beta_m; beta1_16=beta1_16-beta_m;
beta2_16=beta2_16-beta_m; beta2_16=beta2_16-beta_m;
...@@ -1183,7 +1125,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor ...@@ -1183,7 +1125,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
beta5_16=beta5_16-beta_m; beta5_16=beta5_16-beta_m;
beta6_16=beta6_16-beta_m; beta6_16=beta6_16-beta_m;
beta7_16=beta7_16-beta_m; beta7_16=beta7_16-beta_m;
#ifdef LLR8 #ifdef LLR8
beta_16 = _mm_set_epi16(beta7_16,beta6_16,beta5_16,beta4_16,beta3_16,beta2_16,beta1_16,beta0_16); beta_16 = _mm_set_epi16(beta7_16,beta6_16,beta5_16,beta4_16,beta3_16,beta2_16,beta1_16,beta0_16);
beta_16 = _mm_packs_epi16(beta_16,beta_16); beta_16 = _mm_packs_epi16(beta_16,beta_16);
...@@ -1199,8 +1140,8 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor ...@@ -1199,8 +1140,8 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
#endif #endif
for (rerun_flag=0;; rerun_flag=1) { for (rerun_flag=0;; rerun_flag=1) {
beta_ptr = (__m128i*)&beta[frame_length<<3]; beta_ptr = (__m128i *)&beta[frame_length<<3];
alpha128 = (__m128i*)&alpha[0]; alpha128 = (__m128i *)&alpha[0];
if (rerun_flag == 0) { if (rerun_flag == 0) {
#ifndef LLR8 #ifndef LLR8
...@@ -1223,9 +1164,8 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor ...@@ -1223,9 +1164,8 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
beta_ptr[7] = alpha128[7+(frame_length>>1)]; beta_ptr[7] = alpha128[7+(frame_length>>1)];
#endif #endif
} else { } else {
beta128 = (__m128i*)&beta[0]; beta128 = (__m128i *)&beta[0];
#ifndef LLR8 #ifndef LLR8
beta_ptr[0] = _mm_srli_si128(beta128[0],2); beta_ptr[0] = _mm_srli_si128(beta128[0],2);
beta_ptr[1] = _mm_srli_si128(beta128[1],2); beta_ptr[1] = _mm_srli_si128(beta128[1],2);
beta_ptr[2] = _mm_srli_si128(beta128[2],2); beta_ptr[2] = _mm_srli_si128(beta128[2],2);
...@@ -1255,7 +1195,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor ...@@ -1255,7 +1195,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
beta_ptr[5] = _mm_insert_epi16(beta_ptr[5],beta5_16,7); beta_ptr[5] = _mm_insert_epi16(beta_ptr[5],beta5_16,7);
beta_ptr[6] = _mm_insert_epi16(beta_ptr[6],beta6_16,7); beta_ptr[6] = _mm_insert_epi16(beta_ptr[6],beta6_16,7);
beta_ptr[7] = _mm_insert_epi16(beta_ptr[7],beta7_16,7); beta_ptr[7] = _mm_insert_epi16(beta_ptr[7],beta7_16,7);
/* /*
beta[7+(frame_length<<3)] = beta0_16; beta[7+(frame_length<<3)] = beta0_16;
beta[15+(frame_length<<3)] = beta1_16; beta[15+(frame_length<<3)] = beta1_16;
...@@ -1277,18 +1216,15 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor ...@@ -1277,18 +1216,15 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
beta_ptr[6] = _mm_insert_epi8(beta_ptr[6],beta6,15); beta_ptr[6] = _mm_insert_epi8(beta_ptr[6],beta6,15);
beta_ptr[7] = _mm_insert_epi8(beta_ptr[7],beta7,15); beta_ptr[7] = _mm_insert_epi8(beta_ptr[7],beta7,15);
} else { } else {
} }
#endif #endif
#ifndef LLR8 #ifndef LLR8
int loopval=((rerun_flag==0)?0:((frame_length-L)>>3)); int loopval=((rerun_flag==0)?0:((frame_length-L)>>3));
for (k=(frame_length>>3)-1; k>=loopval; k--) { for (k=(frame_length>>3)-1; k>=loopval; k--) {
m11_128=((__m128i*)m_11)[k]; m11_128=((__m128i *)m_11)[k];
m10_128=((__m128i*)m_10)[k]; m10_128=((__m128i *)m_10)[k];
m_b0 = _mm_adds_epi16(beta_ptr[4],m11_128); //m11 m_b0 = _mm_adds_epi16(beta_ptr[4],m11_128); //m11
m_b1 = _mm_subs_epi16(beta_ptr[4],m11_128); //m00 m_b1 = _mm_subs_epi16(beta_ptr[4],m11_128); //m00
m_b2 = _mm_subs_epi16(beta_ptr[5],m10_128); //m01 m_b2 = _mm_subs_epi16(beta_ptr[5],m10_128); //m01
...@@ -1297,7 +1233,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor ...@@ -1297,7 +1233,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
m_b5 = _mm_subs_epi16(beta_ptr[6],m10_128); //m01 m_b5 = _mm_subs_epi16(beta_ptr[6],m10_128); //m01
m_b6 = _mm_subs_epi16(beta_ptr[7],m11_128); //m00 m_b6 = _mm_subs_epi16(beta_ptr[7],m11_128); //m00
m_b7 = _mm_adds_epi16(beta_ptr[7],m11_128); //m11 m_b7 = _mm_adds_epi16(beta_ptr[7],m11_128); //m11
new0 = _mm_subs_epi16(beta_ptr[0],m11_128); //m00 new0 = _mm_subs_epi16(beta_ptr[0],m11_128); //m00
new1 = _mm_adds_epi16(beta_ptr[0],m11_128); //m11 new1 = _mm_adds_epi16(beta_ptr[0],m11_128); //m11
new2 = _mm_adds_epi16(beta_ptr[1],m10_128); //m10 new2 = _mm_adds_epi16(beta_ptr[1],m10_128); //m10
...@@ -1306,9 +1241,7 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor ...@@ -1306,9 +1241,7 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
new5 = _mm_adds_epi16(beta_ptr[2],m10_128); //m10 new5 = _mm_adds_epi16(beta_ptr[2],m10_128); //m10
new6 = _mm_adds_epi16(beta_ptr[3],m11_128); //m11 new6 = _mm_adds_epi16(beta_ptr[3],m11_128); //m11
new7 = _mm_subs_epi16(beta_ptr[3],m11_128); //m00 new7 = _mm_subs_epi16(beta_ptr[3],m11_128); //m00
beta_ptr-=8; beta_ptr-=8;
beta_ptr[0] = _mm_max_epi16(m_b0,new0); beta_ptr[0] = _mm_max_epi16(m_b0,new0);
beta_ptr[1] = _mm_max_epi16(m_b1,new1); beta_ptr[1] = _mm_max_epi16(m_b1,new1);
beta_ptr[2] = _mm_max_epi16(m_b2,new2); beta_ptr[2] = _mm_max_epi16(m_b2,new2);
...@@ -1317,7 +1250,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor ...@@ -1317,7 +1250,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
beta_ptr[5] = _mm_max_epi16(m_b5,new5); beta_ptr[5] = _mm_max_epi16(m_b5,new5);
beta_ptr[6] = _mm_max_epi16(m_b6,new6); beta_ptr[6] = _mm_max_epi16(m_b6,new6);
beta_ptr[7] = _mm_max_epi16(m_b7,new7); beta_ptr[7] = _mm_max_epi16(m_b7,new7);
beta_max = _mm_max_epi16(beta_ptr[0],beta_ptr[1]); beta_max = _mm_max_epi16(beta_ptr[0],beta_ptr[1]);
beta_max = _mm_max_epi16(beta_max ,beta_ptr[2]); beta_max = _mm_max_epi16(beta_max ,beta_ptr[2]);
beta_max = _mm_max_epi16(beta_max ,beta_ptr[3]); beta_max = _mm_max_epi16(beta_max ,beta_ptr[3]);
...@@ -1325,7 +1257,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor ...@@ -1325,7 +1257,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
beta_max = _mm_max_epi16(beta_max ,beta_ptr[5]); beta_max = _mm_max_epi16(beta_max ,beta_ptr[5]);
beta_max = _mm_max_epi16(beta_max ,beta_ptr[6]); beta_max = _mm_max_epi16(beta_max ,beta_ptr[6]);
beta_max = _mm_max_epi16(beta_max ,beta_ptr[7]); beta_max = _mm_max_epi16(beta_max ,beta_ptr[7]);
beta_ptr[0] = _mm_subs_epi16(beta_ptr[0],beta_max); beta_ptr[0] = _mm_subs_epi16(beta_ptr[0],beta_max);
beta_ptr[1] = _mm_subs_epi16(beta_ptr[1],beta_max); beta_ptr[1] = _mm_subs_epi16(beta_ptr[1],beta_max);
beta_ptr[2] = _mm_subs_epi16(beta_ptr[2],beta_max); beta_ptr[2] = _mm_subs_epi16(beta_ptr[2],beta_max);
...@@ -1334,14 +1265,11 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor ...@@ -1334,14 +1265,11 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
beta_ptr[5] = _mm_subs_epi16(beta_ptr[5],beta_max); beta_ptr[5] = _mm_subs_epi16(beta_ptr[5],beta_max);
beta_ptr[6] = _mm_subs_epi16(beta_ptr[6],beta_max); beta_ptr[6] = _mm_subs_epi16(beta_ptr[6],beta_max);
beta_ptr[7] = _mm_subs_epi16(beta_ptr[7],beta_max); beta_ptr[7] = _mm_subs_epi16(beta_ptr[7],beta_max);
} }
#else #else
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
printf("beta0 %d: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d\n", printf("beta0 %u: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d\n",
(frame_length>>4), (frame_length>>4),
_mm_extract_epi8(beta_ptr[0],0), _mm_extract_epi8(beta_ptr[0],0),
_mm_extract_epi8(beta_ptr[0],1), _mm_extract_epi8(beta_ptr[0],1),
...@@ -1359,7 +1287,7 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor ...@@ -1359,7 +1287,7 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
_mm_extract_epi8(beta_ptr[0],13), _mm_extract_epi8(beta_ptr[0],13),
_mm_extract_epi8(beta_ptr[0],14), _mm_extract_epi8(beta_ptr[0],14),
_mm_extract_epi8(beta_ptr[0],15)); _mm_extract_epi8(beta_ptr[0],15));
printf("beta1 %d: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d\n", printf("beta1 %u: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d\n",
(frame_length>>4), (frame_length>>4),
_mm_extract_epi8(beta_ptr[1],0), _mm_extract_epi8(beta_ptr[1],0),
_mm_extract_epi8(beta_ptr[1],1), _mm_extract_epi8(beta_ptr[1],1),
...@@ -1377,7 +1305,7 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor ...@@ -1377,7 +1305,7 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
_mm_extract_epi8(beta_ptr[1],13), _mm_extract_epi8(beta_ptr[1],13),
_mm_extract_epi8(beta_ptr[1],14), _mm_extract_epi8(beta_ptr[1],14),
_mm_extract_epi8(beta_ptr[1],15)); _mm_extract_epi8(beta_ptr[1],15));
printf("beta2 %d: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d\n", printf("beta2 %u: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d\n",
(frame_length>>4), (frame_length>>4),
_mm_extract_epi8(beta_ptr[2],0), _mm_extract_epi8(beta_ptr[2],0),
_mm_extract_epi8(beta_ptr[2],1), _mm_extract_epi8(beta_ptr[2],1),
...@@ -1395,7 +1323,7 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor ...@@ -1395,7 +1323,7 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
_mm_extract_epi8(beta_ptr[2],13), _mm_extract_epi8(beta_ptr[2],13),
_mm_extract_epi8(beta_ptr[2],14), _mm_extract_epi8(beta_ptr[2],14),
_mm_extract_epi8(beta_ptr[2],15)); _mm_extract_epi8(beta_ptr[2],15));
printf("beta3 %d: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d\n", printf("beta3 %u: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d\n",
(frame_length>>4), (frame_length>>4),
_mm_extract_epi8(beta_ptr[3],0), _mm_extract_epi8(beta_ptr[3],0),
_mm_extract_epi8(beta_ptr[3],1), _mm_extract_epi8(beta_ptr[3],1),
...@@ -1413,7 +1341,7 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor ...@@ -1413,7 +1341,7 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
_mm_extract_epi8(beta_ptr[3],13), _mm_extract_epi8(beta_ptr[3],13),
_mm_extract_epi8(beta_ptr[3],14), _mm_extract_epi8(beta_ptr[3],14),
_mm_extract_epi8(beta_ptr[3],15)); _mm_extract_epi8(beta_ptr[3],15));
printf("beta4 %d: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d\n", printf("beta4 %u: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d\n",
(frame_length>>4), (frame_length>>4),
_mm_extract_epi8(beta_ptr[4],0), _mm_extract_epi8(beta_ptr[4],0),
_mm_extract_epi8(beta_ptr[4],1), _mm_extract_epi8(beta_ptr[4],1),
...@@ -1431,7 +1359,7 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor ...@@ -1431,7 +1359,7 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
_mm_extract_epi8(beta_ptr[4],13), _mm_extract_epi8(beta_ptr[4],13),
_mm_extract_epi8(beta_ptr[4],14), _mm_extract_epi8(beta_ptr[4],14),
_mm_extract_epi8(beta_ptr[4],15)); _mm_extract_epi8(beta_ptr[4],15));
printf("beta5 %d: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d\n", printf("beta5 %u: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d\n",
(frame_length>>4), (frame_length>>4),
_mm_extract_epi8(beta_ptr[5],0), _mm_extract_epi8(beta_ptr[5],0),
_mm_extract_epi8(beta_ptr[5],1), _mm_extract_epi8(beta_ptr[5],1),
...@@ -1449,7 +1377,7 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor ...@@ -1449,7 +1377,7 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
_mm_extract_epi8(beta_ptr[5],13), _mm_extract_epi8(beta_ptr[5],13),
_mm_extract_epi8(beta_ptr[5],14), _mm_extract_epi8(beta_ptr[5],14),
_mm_extract_epi8(beta_ptr[5],15)); _mm_extract_epi8(beta_ptr[5],15));
printf("beta6 %d: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d\n", printf("beta6 %u: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d\n",
(frame_length>>4), (frame_length>>4),
_mm_extract_epi8(beta_ptr[6],0), _mm_extract_epi8(beta_ptr[6],0),
_mm_extract_epi8(beta_ptr[6],1), _mm_extract_epi8(beta_ptr[6],1),
...@@ -1467,7 +1395,7 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor ...@@ -1467,7 +1395,7 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
_mm_extract_epi8(beta_ptr[6],13), _mm_extract_epi8(beta_ptr[6],13),
_mm_extract_epi8(beta_ptr[6],14), _mm_extract_epi8(beta_ptr[6],14),
_mm_extract_epi8(beta_ptr[6],15)); _mm_extract_epi8(beta_ptr[6],15));
printf("beta7 %d: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d\n", printf("beta7 %u: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d\n",
(frame_length>>4), (frame_length>>4),
_mm_extract_epi8(beta_ptr[7],0), _mm_extract_epi8(beta_ptr[7],0),
_mm_extract_epi8(beta_ptr[7],1), _mm_extract_epi8(beta_ptr[7],1),
...@@ -1491,9 +1419,8 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor ...@@ -1491,9 +1419,8 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
__m128i zeros=_mm_set1_epi8(0); __m128i zeros=_mm_set1_epi8(0);
for (k=(frame_length>>4)-1; k>=loopval; k--) { for (k=(frame_length>>4)-1; k>=loopval; k--) {
m11_128=((__m128i *)m_11)[k];
m11_128=((__m128i*)m_11)[k]; m10_128=((__m128i *)m_10)[k];
m10_128=((__m128i*)m_10)[k];
/* /*
if ((offset8_flag==1) && (k==((frame_length>>4)-9))) { if ((offset8_flag==1) && (k==((frame_length>>4)-9))) {
beta_ptr[0] = _mm_insert_epi8(beta_ptr[0],beta0,15); beta_ptr[0] = _mm_insert_epi8(beta_ptr[0],beta0,15);
...@@ -1506,9 +1433,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor ...@@ -1506,9 +1433,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
beta_ptr[7] = _mm_insert_epi8(beta_ptr[7],beta7,15); beta_ptr[7] = _mm_insert_epi8(beta_ptr[7],beta7,15);
}*/ }*/
// print_bytes("m11:",&m11_128); // print_bytes("m11:",&m11_128);
m_b0 = _mm_adds_epi8(beta_ptr[4],m11_128); //m11 m_b0 = _mm_adds_epi8(beta_ptr[4],m11_128); //m11
m_b1 = _mm_subs_epi8(beta_ptr[4],m11_128); //m00 m_b1 = _mm_subs_epi8(beta_ptr[4],m11_128); //m00
m_b2 = _mm_subs_epi8(beta_ptr[5],m10_128); //m01 m_b2 = _mm_subs_epi8(beta_ptr[5],m10_128); //m01
...@@ -1517,7 +1441,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor ...@@ -1517,7 +1441,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
m_b5 = _mm_subs_epi8(beta_ptr[6],m10_128); //m01 m_b5 = _mm_subs_epi8(beta_ptr[6],m10_128); //m01
m_b6 = _mm_subs_epi8(beta_ptr[7],m11_128); //m00 m_b6 = _mm_subs_epi8(beta_ptr[7],m11_128); //m00
m_b7 = _mm_adds_epi8(beta_ptr[7],m11_128); //m11 m_b7 = _mm_adds_epi8(beta_ptr[7],m11_128); //m11
new0 = _mm_subs_epi8(beta_ptr[0],m11_128); //m00 new0 = _mm_subs_epi8(beta_ptr[0],m11_128); //m00
new1 = _mm_adds_epi8(beta_ptr[0],m11_128); //m11 new1 = _mm_adds_epi8(beta_ptr[0],m11_128); //m11
new2 = _mm_adds_epi8(beta_ptr[1],m10_128); //m10 new2 = _mm_adds_epi8(beta_ptr[1],m10_128); //m10
...@@ -1526,9 +1449,7 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor ...@@ -1526,9 +1449,7 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
new5 = _mm_adds_epi8(beta_ptr[2],m10_128); //m10 new5 = _mm_adds_epi8(beta_ptr[2],m10_128); //m10
new6 = _mm_adds_epi8(beta_ptr[3],m11_128); //m11 new6 = _mm_adds_epi8(beta_ptr[3],m11_128); //m11
new7 = _mm_subs_epi8(beta_ptr[3],m11_128); //m00 new7 = _mm_subs_epi8(beta_ptr[3],m11_128); //m00
beta_ptr-=8; beta_ptr-=8;
beta_ptr[0] = _mm_max_epi8(m_b0,new0); beta_ptr[0] = _mm_max_epi8(m_b0,new0);
beta_ptr[1] = _mm_max_epi8(m_b1,new1); beta_ptr[1] = _mm_max_epi8(m_b1,new1);
beta_ptr[2] = _mm_max_epi8(m_b2,new2); beta_ptr[2] = _mm_max_epi8(m_b2,new2);
...@@ -1537,7 +1458,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor ...@@ -1537,7 +1458,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
beta_ptr[5] = _mm_max_epi8(m_b5,new5); beta_ptr[5] = _mm_max_epi8(m_b5,new5);
beta_ptr[6] = _mm_max_epi8(m_b6,new6); beta_ptr[6] = _mm_max_epi8(m_b6,new6);
beta_ptr[7] = _mm_max_epi8(m_b7,new7); beta_ptr[7] = _mm_max_epi8(m_b7,new7);
beta_max = _mm_max_epi8(beta_ptr[0],beta_ptr[1]); beta_max = _mm_max_epi8(beta_ptr[0],beta_ptr[1]);
beta_max = _mm_max_epi8(beta_max ,beta_ptr[2]); beta_max = _mm_max_epi8(beta_max ,beta_ptr[2]);
beta_max = _mm_max_epi8(beta_max ,beta_ptr[3]); beta_max = _mm_max_epi8(beta_max ,beta_ptr[3]);
...@@ -1553,7 +1473,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor ...@@ -1553,7 +1473,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
beta_ptr[5] = _mm_subs_epi8(beta_ptr[5],beta_max); beta_ptr[5] = _mm_subs_epi8(beta_ptr[5],beta_max);
beta_ptr[6] = _mm_subs_epi8(beta_ptr[6],beta_max); beta_ptr[6] = _mm_subs_epi8(beta_ptr[6],beta_max);
beta_ptr[7] = _mm_subs_epi8(beta_ptr[7],beta_max); beta_ptr[7] = _mm_subs_epi8(beta_ptr[7],beta_max);
/* /*
printf("beta0 %d: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d\n", printf("beta0 %d: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d\n",
k, k,
...@@ -1700,7 +1619,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor ...@@ -1700,7 +1619,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
_mm_extract_epi8(beta_ptr[7],14), _mm_extract_epi8(beta_ptr[7],14),
_mm_extract_epi8(beta_ptr[7],15)); _mm_extract_epi8(beta_ptr[7],15));
*/ */
} }
#endif #endif
...@@ -1713,8 +1631,7 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor ...@@ -1713,8 +1631,7 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor
_m_empty(); _m_empty();
} }
void compute_ext(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, llr_t* systematic,unsigned short frame_length) void compute_ext(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,llr_t *ext, llr_t *systematic,unsigned short frame_length) {
{
__m128i *alpha128=(__m128i *)alpha; __m128i *alpha128=(__m128i *)alpha;
__m128i *beta128=(__m128i *)beta; __m128i *beta128=(__m128i *)beta;
__m128i *m11_128,*m10_128,*ext_128; __m128i *m11_128,*m10_128,*ext_128;
...@@ -1724,26 +1641,20 @@ void compute_ext(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, ll ...@@ -1724,26 +1641,20 @@ void compute_ext(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, ll
__m128i m10_1,m10_2,m10_3,m10_4; __m128i m10_1,m10_2,m10_3,m10_4;
__m128i m11_1,m11_2,m11_3,m11_4; __m128i m11_1,m11_2,m11_3,m11_4;
int k; int k;
// //
// LLR computation, 8 consequtive bits per loop // LLR computation, 8 consequtive bits per loop
// //
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
msg("compute_ext, %p, %p, %p, %p, %p, %p ,framelength %d\n",alpha,beta,m_11,m_10,ext,systematic,frame_length); msg("compute_ext, %p, %p, %p, %p, %p, %p ,framelength %d\n",alpha,beta,m_11,m_10,ext,systematic,frame_length);
#endif #endif
alpha_ptr = alpha128; alpha_ptr = alpha128;
beta_ptr = &beta128[8]; beta_ptr = &beta128[8];
#ifndef LLR8 #ifndef LLR8
for (k=0; k<(frame_length>>3); k++) { for (k=0; k<(frame_length>>3); k++) {
m11_128 = (__m128i *)&m_11[k<<3];
m11_128 = (__m128i*)&m_11[k<<3]; m10_128 = (__m128i *)&m_10[k<<3];
m10_128 = (__m128i*)&m_10[k<<3]; ext_128 = (__m128i *)&ext[k<<3];
ext_128 = (__m128i*)&ext[k<<3];
/* /*
printf("EXT %03d\n",k); printf("EXT %03d\n",k);
print_shorts("a0:",&alpha_ptr[0]); print_shorts("a0:",&alpha_ptr[0]);
...@@ -1809,23 +1720,18 @@ void compute_ext(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, ll ...@@ -1809,23 +1720,18 @@ void compute_ext(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, ll
m11_1 = _mm_max_epi16(m11_1,m11_2); m11_1 = _mm_max_epi16(m11_1,m11_2);
m11_1 = _mm_max_epi16(m11_1,m11_3); m11_1 = _mm_max_epi16(m11_1,m11_3);
m11_1 = _mm_max_epi16(m11_1,m11_4); m11_1 = _mm_max_epi16(m11_1,m11_4);
// print_shorts("m11_1:",&m11_1); // print_shorts("m11_1:",&m11_1);
m01_1 = _mm_subs_epi16(m01_1,*m10_128); m01_1 = _mm_subs_epi16(m01_1,*m10_128);
m00_1 = _mm_subs_epi16(m00_1,*m11_128); m00_1 = _mm_subs_epi16(m00_1,*m11_128);
m10_1 = _mm_adds_epi16(m10_1,*m10_128); m10_1 = _mm_adds_epi16(m10_1,*m10_128);
m11_1 = _mm_adds_epi16(m11_1,*m11_128); m11_1 = _mm_adds_epi16(m11_1,*m11_128);
// print_shorts("m10_1:",&m10_1); // print_shorts("m10_1:",&m10_1);
// print_shorts("m11_1:",&m11_1); // print_shorts("m11_1:",&m11_1);
m01_1 = _mm_max_epi16(m01_1,m00_1); m01_1 = _mm_max_epi16(m01_1,m00_1);
m10_1 = _mm_max_epi16(m10_1,m11_1); m10_1 = _mm_max_epi16(m10_1,m11_1);
// print_shorts("m01_1:",&m01_1); // print_shorts("m01_1:",&m01_1);
// print_shorts("m10_1:",&m10_1); // print_shorts("m10_1:",&m10_1);
*ext_128 = _mm_subs_epi16(m10_1,m01_1); *ext_128 = _mm_subs_epi16(m10_1,m01_1);
/* /*
print_shorts("ext:",ext_128); print_shorts("ext:",ext_128);
print_shorts("m11:",m11_128); print_shorts("m11:",m11_128);
...@@ -1834,7 +1740,6 @@ void compute_ext(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, ll ...@@ -1834,7 +1740,6 @@ void compute_ext(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, ll
print_shorts("m01_1:",&m01_1); print_shorts("m01_1:",&m01_1);
print_shorts("syst:",systematic_128); print_shorts("syst:",systematic_128);
*/ */
alpha_ptr+=8; alpha_ptr+=8;
beta_ptr+=8; beta_ptr+=8;
} }
...@@ -1842,11 +1747,9 @@ void compute_ext(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, ll ...@@ -1842,11 +1747,9 @@ void compute_ext(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, ll
#else #else
for (k=0; k<(frame_length>>4); k++) { for (k=0; k<(frame_length>>4); k++) {
m11_128 = (__m128i *)&m_11[k<<4];
m11_128 = (__m128i*)&m_11[k<<4]; m10_128 = (__m128i *)&m_10[k<<4];
m10_128 = (__m128i*)&m_10[k<<4]; ext_128 = (__m128i *)&ext[k<<4];
ext_128 = (__m128i*)&ext[k<<4];
m00_4 = _mm_adds_epi8(alpha_ptr[7],beta_ptr[3]); //ALPHA_BETA_4m00; m00_4 = _mm_adds_epi8(alpha_ptr[7],beta_ptr[3]); //ALPHA_BETA_4m00;
m11_4 = _mm_adds_epi8(alpha_ptr[7],beta_ptr[7]); //ALPHA_BETA_4m11; m11_4 = _mm_adds_epi8(alpha_ptr[7],beta_ptr[7]); //ALPHA_BETA_4m11;
m00_3 = _mm_adds_epi8(alpha_ptr[6],beta_ptr[7]); //ALPHA_BETA_3m00; m00_3 = _mm_adds_epi8(alpha_ptr[6],beta_ptr[7]); //ALPHA_BETA_3m00;
...@@ -1863,7 +1766,6 @@ void compute_ext(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, ll ...@@ -1863,7 +1766,6 @@ void compute_ext(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, ll
m10_2 = _mm_adds_epi8(alpha_ptr[3],beta_ptr[5]); //ALPHA_BETA_2m10; m10_2 = _mm_adds_epi8(alpha_ptr[3],beta_ptr[5]); //ALPHA_BETA_2m10;
m10_1 = _mm_adds_epi8(alpha_ptr[2],beta_ptr[1]); //ALPHA_BETA_1m10; m10_1 = _mm_adds_epi8(alpha_ptr[2],beta_ptr[1]); //ALPHA_BETA_1m10;
m01_1 = _mm_adds_epi8(alpha_ptr[2],beta_ptr[5]); //ALPHA_BETA_1m01; m01_1 = _mm_adds_epi8(alpha_ptr[2],beta_ptr[5]); //ALPHA_BETA_1m01;
m01_1 = _mm_max_epi8(m01_1,m01_2); m01_1 = _mm_max_epi8(m01_1,m01_2);
m01_1 = _mm_max_epi8(m01_1,m01_3); m01_1 = _mm_max_epi8(m01_1,m01_3);
m01_1 = _mm_max_epi8(m01_1,m01_4); m01_1 = _mm_max_epi8(m01_1,m01_4);
...@@ -1876,29 +1778,20 @@ void compute_ext(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, ll ...@@ -1876,29 +1778,20 @@ void compute_ext(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, ll
m11_1 = _mm_max_epi8(m11_1,m11_2); m11_1 = _mm_max_epi8(m11_1,m11_2);
m11_1 = _mm_max_epi8(m11_1,m11_3); m11_1 = _mm_max_epi8(m11_1,m11_3);
m11_1 = _mm_max_epi8(m11_1,m11_4); m11_1 = _mm_max_epi8(m11_1,m11_4);
m01_1 = _mm_subs_epi8(m01_1,*m10_128); m01_1 = _mm_subs_epi8(m01_1,*m10_128);
m00_1 = _mm_subs_epi8(m00_1,*m11_128); m00_1 = _mm_subs_epi8(m00_1,*m11_128);
m10_1 = _mm_adds_epi8(m10_1,*m10_128); m10_1 = _mm_adds_epi8(m10_1,*m10_128);
m11_1 = _mm_adds_epi8(m11_1,*m11_128); m11_1 = _mm_adds_epi8(m11_1,*m11_128);
m01_1 = _mm_max_epi8(m01_1,m00_1); m01_1 = _mm_max_epi8(m01_1,m00_1);
m10_1 = _mm_max_epi8(m10_1,m11_1); m10_1 = _mm_max_epi8(m10_1,m11_1);
*ext_128 = _mm_subs_epi8(m10_1,m01_1); *ext_128 = _mm_subs_epi8(m10_1,m01_1);
alpha_ptr+=8; alpha_ptr+=8;
beta_ptr+=8; beta_ptr+=8;
} }
#endif #endif
_mm_empty(); _mm_empty();
_m_empty(); _m_empty();
} }
...@@ -1906,8 +1799,7 @@ void compute_ext(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, ll ...@@ -1906,8 +1799,7 @@ void compute_ext(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, ll
//int pi2[n],pi3[n+8],pi5[n+8],pi4[n+8],pi6[n+8], //int pi2[n],pi3[n+8],pi5[n+8],pi4[n+8],pi6[n+8],
int *pi2tab[188],*pi5tab[188],*pi4tab[188],*pi6tab[188]; int *pi2tab[188],*pi5tab[188],*pi4tab[188],*pi6tab[188];
void free_td() void free_td() {
{
int ind; int ind;
for (ind = 0; ind < 188; ind++) { for (ind = 0; ind < 188; ind++) {
...@@ -1918,21 +1810,17 @@ void free_td() ...@@ -1918,21 +1810,17 @@ void free_td()
} }
} }
void init_td() void init_td() {
{
int ind,i,i2,i3,j,n,n2,pi,pi3; int ind,i,i2,i3,j,n,n2,pi,pi3;
short * base_interleaver; short *base_interleaver;
for (ind=0; ind<188; ind++) { for (ind=0; ind<188; ind++) {
n = f1f2mat[ind].nb_bits; n = f1f2mat[ind].nb_bits;
base_interleaver=il_tb+f1f2mat[ind].beg_index; base_interleaver=il_tb+f1f2mat[ind].beg_index;
pi2tab[ind] = malloc((n+8)*sizeof(int)); pi2tab[ind] = malloc((n+8)*sizeof(int));
pi5tab[ind] = malloc((n+8)*sizeof(int)); pi5tab[ind] = malloc((n+8)*sizeof(int));
pi4tab[ind] = malloc((n+8)*sizeof(int)); pi4tab[ind] = malloc((n+8)*sizeof(int));
pi6tab[ind] = malloc((n+8)*sizeof(int)); pi6tab[ind] = malloc((n+8)*sizeof(int));
#ifdef LLR8 #ifdef LLR8
if ((n&15)>0) { if ((n&15)>0) {
...@@ -1941,7 +1829,6 @@ void init_td() ...@@ -1941,7 +1829,6 @@ void init_td()
n2 = n; n2 = n;
for (j=0,i=0; i<n2; i++,j+=16) { for (j=0,i=0; i<n2; i++,j+=16) {
if (j>=n2) if (j>=n2)
j-=(n2-1); j-=(n2-1);
...@@ -1956,10 +1843,8 @@ void init_td() ...@@ -1956,10 +1843,8 @@ void init_td()
j=i2; j=i2;
for (i3=0; i3<(n>>3); i3++,i++,j+=8) { for (i3=0; i3<(n>>3); i3++,i++,j+=8) {
// if (j>=n) // if (j>=n)
// j-=(n-1); // j-=(n-1);
pi2tab[ind][i] = j; pi2tab[ind][i] = j;
// printf("pi2[%d] = %d\n",i,j); // printf("pi2[%d] = %d\n",i,j);
} }
...@@ -1967,7 +1852,6 @@ void init_td() ...@@ -1967,7 +1852,6 @@ void init_td()
#endif #endif
for (i=0; i<n2; i++) { for (i=0; i<n2; i++) {
pi = base_interleaver[i];//(unsigned int)threegpplte_interleaver(f1,f2,n); pi = base_interleaver[i];//(unsigned int)threegpplte_interleaver(f1,f2,n);
pi3 = pi2tab[ind][pi]; pi3 = pi2tab[ind][pi];
...@@ -1975,7 +1859,6 @@ void init_td() ...@@ -1975,7 +1859,6 @@ void init_td()
pi5tab[ind][pi3] = pi2tab[ind][i]; pi5tab[ind][pi3] = pi2tab[ind][i];
pi6tab[ind][pi] = pi2tab[ind][i]; pi6tab[ind][pi] = pi2tab[ind][i];
} }
} }
} }
...@@ -1991,33 +1874,25 @@ unsigned char phy_threegpplte_turbo_decoder(short *y, ...@@ -1991,33 +1874,25 @@ unsigned char phy_threegpplte_turbo_decoder(short *y,
time_stats_t *gamma_stats, time_stats_t *gamma_stats,
time_stats_t *ext_stats, time_stats_t *ext_stats,
time_stats_t *intl1_stats, time_stats_t *intl1_stats,
time_stats_t *intl2_stats) time_stats_t *intl2_stats) {
{
/* y is a pointer to the input /* y is a pointer to the input
decoded_bytes is a pointer to the decoded output decoded_bytes is a pointer to the decoded output
n is the size in bits of the coded block, with the tail */ n is the size in bits of the coded block, with the tail */
int n2; int n2;
#ifdef LLR8 #ifdef LLR8
llr_t y8[3*(n+16)] __attribute__((aligned(16))); llr_t y8[3*(n+16)] __attribute__((aligned(16)));
#endif #endif
llr_t systematic0[n+16] __attribute__ ((aligned(16))); llr_t systematic0[n+16] __attribute__ ((aligned(16)));
llr_t systematic1[n+16] __attribute__ ((aligned(16))); llr_t systematic1[n+16] __attribute__ ((aligned(16)));
llr_t systematic2[n+16] __attribute__ ((aligned(16))); llr_t systematic2[n+16] __attribute__ ((aligned(16)));
llr_t yparity1[n+16] __attribute__ ((aligned(16))); llr_t yparity1[n+16] __attribute__ ((aligned(16)));
llr_t yparity2[n+16] __attribute__ ((aligned(16))); llr_t yparity2[n+16] __attribute__ ((aligned(16)));
llr_t ext[n+128] __attribute__((aligned(16))); llr_t ext[n+128] __attribute__((aligned(16)));
llr_t ext2[n+128] __attribute__((aligned(16))); llr_t ext2[n+128] __attribute__((aligned(16)));
llr_t alpha[(n+16)*8] __attribute__ ((aligned(16))); llr_t alpha[(n+16)*8] __attribute__ ((aligned(16)));
llr_t beta[(n+16)*8] __attribute__ ((aligned(16))); llr_t beta[(n+16)*8] __attribute__ ((aligned(16)));
llr_t m11[n+16] __attribute__ ((aligned(16))); llr_t m11[n+16] __attribute__ ((aligned(16)));
llr_t m10[n+16] __attribute__ ((aligned(16))); llr_t m10[n+16] __attribute__ ((aligned(16)));
int *pi2_p,*pi4_p,*pi5_p,*pi6_p; int *pi2_p,*pi4_p,*pi5_p,*pi6_p;
llr_t *s,*s1,*s2,*yp1,*yp2,*yp; llr_t *s,*s1,*s2,*yp1,*yp2,*yp;
__m128i *yp128; __m128i *yp128;
...@@ -2026,12 +1901,10 @@ unsigned char phy_threegpplte_turbo_decoder(short *y, ...@@ -2026,12 +1901,10 @@ unsigned char phy_threegpplte_turbo_decoder(short *y,
unsigned int crc,oldcrc,crc_len; unsigned int crc,oldcrc,crc_len;
uint8_t temp; uint8_t temp;
__m128i tmp128[(n+8)>>3]; __m128i tmp128[(n+8)>>3];
__m128i tmp, zeros=_mm_setzero_si128(); __m128i tmp, zeros=_mm_setzero_si128();
#ifdef LLR8 #ifdef LLR8
__m128i MAX128=_mm_set1_epi16(MAX/2); __m128i MAX128=_mm_set1_epi16(MAX/2);
#endif #endif
register __m128i tmpe; register __m128i tmpe;
int offset8_flag=0; int offset8_flag=0;
...@@ -2040,9 +1913,7 @@ unsigned char phy_threegpplte_turbo_decoder(short *y, ...@@ -2040,9 +1913,7 @@ unsigned char phy_threegpplte_turbo_decoder(short *y,
return 255; return 255;
} }
start_meas(init_stats); start_meas(init_stats);
#ifdef LLR8 #ifdef LLR8
if ((n&15)>0) { if ((n&15)>0) {
...@@ -2063,21 +1934,21 @@ unsigned char phy_threegpplte_turbo_decoder(short *y, ...@@ -2063,21 +1934,21 @@ unsigned char phy_threegpplte_turbo_decoder(short *y,
} }
switch (crc_type) { switch (crc_type) {
case CRC24_A: case CRC24_A:
case CRC24_B: case CRC24_B:
crc_len=3; crc_len=3;
break; break;
case CRC16: case CRC16:
crc_len=2; crc_len=2;
break; break;
case CRC8: case CRC8:
crc_len=1; crc_len=1;
break; break;
default: default:
crc_len=3; crc_len=3;
} }
#ifdef LLR8 #ifdef LLR8
...@@ -2087,199 +1958,154 @@ unsigned char phy_threegpplte_turbo_decoder(short *y, ...@@ -2087,199 +1958,154 @@ unsigned char phy_threegpplte_turbo_decoder(short *y,
//((__m128i *)y8)[i] = _mm_packs_epi16(((__m128i *)y)[j],((__m128i *)y)[j+1]); //((__m128i *)y8)[i] = _mm_packs_epi16(((__m128i *)y)[j],((__m128i *)y)[j+1]);
} }
yp128 = (__m128i*)y8; yp128 = (__m128i *)y8;
#else #else
yp128 = (__m128i*)y; yp128 = (__m128i *)y;
#endif #endif
s = systematic0; s = systematic0;
s1 = systematic1; s1 = systematic1;
s2 = systematic2; s2 = systematic2;
yp1 = yparity1; yp1 = yparity1;
yp2 = yparity2; yp2 = yparity2;
#ifndef LLR8 #ifndef LLR8
for (i=0; i<n2; i+=8) { for (i=0; i<n2; i+=8) {
pi2_p = &pi2tab[iind][i]; pi2_p = &pi2tab[iind][i];
j=pi2_p[0]; j=pi2_p[0];
tmpe = _mm_load_si128(yp128); tmpe = _mm_load_si128(yp128);
s[j] = _mm_extract_epi16(tmpe,0); s[j] = _mm_extract_epi16(tmpe,0);
yp1[j] = _mm_extract_epi16(tmpe,1); yp1[j] = _mm_extract_epi16(tmpe,1);
yp2[j] = _mm_extract_epi16(tmpe,2); yp2[j] = _mm_extract_epi16(tmpe,2);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j=pi2_p[1]; j=pi2_p[1];
s[j] = _mm_extract_epi16(tmpe,3); s[j] = _mm_extract_epi16(tmpe,3);
yp1[j] = _mm_extract_epi16(tmpe,4); yp1[j] = _mm_extract_epi16(tmpe,4);
yp2[j] = _mm_extract_epi16(tmpe,5); yp2[j] = _mm_extract_epi16(tmpe,5);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j=pi2_p[2]; j=pi2_p[2];
s[j] = _mm_extract_epi16(tmpe,6); s[j] = _mm_extract_epi16(tmpe,6);
yp1[j] = _mm_extract_epi16(tmpe,7); yp1[j] = _mm_extract_epi16(tmpe,7);
tmpe = _mm_load_si128(&yp128[1]); tmpe = _mm_load_si128(&yp128[1]);
yp2[j] = _mm_extract_epi16(tmpe,0); yp2[j] = _mm_extract_epi16(tmpe,0);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j=pi2_p[3]; j=pi2_p[3];
s[j] = _mm_extract_epi16(tmpe,1); s[j] = _mm_extract_epi16(tmpe,1);
yp1[j] = _mm_extract_epi16(tmpe,2); yp1[j] = _mm_extract_epi16(tmpe,2);
yp2[j] = _mm_extract_epi16(tmpe,3); yp2[j] = _mm_extract_epi16(tmpe,3);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j=pi2_p[4]; j=pi2_p[4];
s[j] = _mm_extract_epi16(tmpe,4); s[j] = _mm_extract_epi16(tmpe,4);
yp1[j] = _mm_extract_epi16(tmpe,5); yp1[j] = _mm_extract_epi16(tmpe,5);
yp2[j] = _mm_extract_epi16(tmpe,6); yp2[j] = _mm_extract_epi16(tmpe,6);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j=pi2_p[5]; j=pi2_p[5];
s[j] = _mm_extract_epi16(tmpe,7); s[j] = _mm_extract_epi16(tmpe,7);
tmpe = _mm_load_si128(&yp128[2]); tmpe = _mm_load_si128(&yp128[2]);
yp1[j] = _mm_extract_epi16(tmpe,0); yp1[j] = _mm_extract_epi16(tmpe,0);
yp2[j] = _mm_extract_epi16(tmpe,1); yp2[j] = _mm_extract_epi16(tmpe,1);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j=pi2_p[6]; j=pi2_p[6];
s[j] = _mm_extract_epi16(tmpe,2); s[j] = _mm_extract_epi16(tmpe,2);
yp1[j] = _mm_extract_epi16(tmpe,3); yp1[j] = _mm_extract_epi16(tmpe,3);
yp2[j] = _mm_extract_epi16(tmpe,4); yp2[j] = _mm_extract_epi16(tmpe,4);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j=pi2_p[7]; j=pi2_p[7];
s[j] = _mm_extract_epi16(tmpe,5); s[j] = _mm_extract_epi16(tmpe,5);
yp1[j] = _mm_extract_epi16(tmpe,6); yp1[j] = _mm_extract_epi16(tmpe,6);
yp2[j] = _mm_extract_epi16(tmpe,7); yp2[j] = _mm_extract_epi16(tmpe,7);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
yp128+=3; yp128+=3;
} }
#else #else
for (i=0; i<n2; i+=16) { for (i=0; i<n2; i+=16) {
pi2_p = &pi2tab[iind][i]; pi2_p = &pi2tab[iind][i];
j=pi2_p[0]; j=pi2_p[0];
s[j] = _mm_extract_epi8(yp128[0],0); s[j] = _mm_extract_epi8(yp128[0],0);
yp1[j] = _mm_extract_epi8(yp128[0],1); yp1[j] = _mm_extract_epi8(yp128[0],1);
yp2[j] = _mm_extract_epi8(yp128[0],2); yp2[j] = _mm_extract_epi8(yp128[0],2);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j=pi2_p[1]; j=pi2_p[1];
s[j] = _mm_extract_epi8(yp128[0],3); s[j] = _mm_extract_epi8(yp128[0],3);
yp1[j] = _mm_extract_epi8(yp128[0],4); yp1[j] = _mm_extract_epi8(yp128[0],4);
yp2[j] = _mm_extract_epi8(yp128[0],5); yp2[j] = _mm_extract_epi8(yp128[0],5);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j=pi2_p[2]; j=pi2_p[2];
s[j] = _mm_extract_epi8(yp128[0],6); s[j] = _mm_extract_epi8(yp128[0],6);
yp1[j] = _mm_extract_epi8(yp128[0],7); yp1[j] = _mm_extract_epi8(yp128[0],7);
yp2[j] = _mm_extract_epi8(yp128[0],8); yp2[j] = _mm_extract_epi8(yp128[0],8);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j=pi2_p[3]; j=pi2_p[3];
s[j] = _mm_extract_epi8(yp128[0],9); s[j] = _mm_extract_epi8(yp128[0],9);
yp1[j] = _mm_extract_epi8(yp128[0],10); yp1[j] = _mm_extract_epi8(yp128[0],10);
yp2[j] = _mm_extract_epi8(yp128[0],11); yp2[j] = _mm_extract_epi8(yp128[0],11);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j=pi2_p[4]; j=pi2_p[4];
s[j] = _mm_extract_epi8(yp128[0],12); s[j] = _mm_extract_epi8(yp128[0],12);
yp1[j] = _mm_extract_epi8(yp128[0],13); yp1[j] = _mm_extract_epi8(yp128[0],13);
yp2[j] = _mm_extract_epi8(yp128[0],14); yp2[j] = _mm_extract_epi8(yp128[0],14);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j=pi2_p[5]; j=pi2_p[5];
s[j] = _mm_extract_epi8(yp128[0],15); s[j] = _mm_extract_epi8(yp128[0],15);
yp1[j] = _mm_extract_epi8(yp128[1],0); yp1[j] = _mm_extract_epi8(yp128[1],0);
yp2[j] = _mm_extract_epi8(yp128[1],1); yp2[j] = _mm_extract_epi8(yp128[1],1);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j=pi2_p[6]; j=pi2_p[6];
s[j] = _mm_extract_epi8(yp128[1],2); s[j] = _mm_extract_epi8(yp128[1],2);
yp1[j] = _mm_extract_epi8(yp128[1],3); yp1[j] = _mm_extract_epi8(yp128[1],3);
yp2[j] = _mm_extract_epi8(yp128[1],4); yp2[j] = _mm_extract_epi8(yp128[1],4);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j=pi2_p[7]; j=pi2_p[7];
s[j] = _mm_extract_epi8(yp128[1],5); s[j] = _mm_extract_epi8(yp128[1],5);
yp1[j] = _mm_extract_epi8(yp128[1],6); yp1[j] = _mm_extract_epi8(yp128[1],6);
yp2[j] = _mm_extract_epi8(yp128[1],7); yp2[j] = _mm_extract_epi8(yp128[1],7);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j=pi2_p[8]; j=pi2_p[8];
s[j] = _mm_extract_epi8(yp128[1],8); s[j] = _mm_extract_epi8(yp128[1],8);
yp1[j] = _mm_extract_epi8(yp128[1],9); yp1[j] = _mm_extract_epi8(yp128[1],9);
yp2[j] = _mm_extract_epi8(yp128[1],10); yp2[j] = _mm_extract_epi8(yp128[1],10);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j=pi2_p[9]; j=pi2_p[9];
s[j] = _mm_extract_epi8(yp128[1],11); s[j] = _mm_extract_epi8(yp128[1],11);
yp1[j] = _mm_extract_epi8(yp128[1],12); yp1[j] = _mm_extract_epi8(yp128[1],12);
yp2[j] = _mm_extract_epi8(yp128[1],13); yp2[j] = _mm_extract_epi8(yp128[1],13);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j=pi2_p[10]; j=pi2_p[10];
s[j] = _mm_extract_epi8(yp128[1],14); s[j] = _mm_extract_epi8(yp128[1],14);
yp1[j] = _mm_extract_epi8(yp128[1],15); yp1[j] = _mm_extract_epi8(yp128[1],15);
yp2[j] = _mm_extract_epi8(yp128[2],0); yp2[j] = _mm_extract_epi8(yp128[2],0);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j=pi2_p[11]; j=pi2_p[11];
s[j] = _mm_extract_epi8(yp128[2],1); s[j] = _mm_extract_epi8(yp128[2],1);
yp1[j] = _mm_extract_epi8(yp128[2],2); yp1[j] = _mm_extract_epi8(yp128[2],2);
yp2[j] = _mm_extract_epi8(yp128[2],3); yp2[j] = _mm_extract_epi8(yp128[2],3);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j=pi2_p[12]; j=pi2_p[12];
s[j] = _mm_extract_epi8(yp128[2],4); s[j] = _mm_extract_epi8(yp128[2],4);
yp1[j] = _mm_extract_epi8(yp128[2],5); yp1[j] = _mm_extract_epi8(yp128[2],5);
yp2[j] = _mm_extract_epi8(yp128[2],6); yp2[j] = _mm_extract_epi8(yp128[2],6);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j=pi2_p[13]; j=pi2_p[13];
s[j] = _mm_extract_epi8(yp128[2],7); s[j] = _mm_extract_epi8(yp128[2],7);
yp1[j] = _mm_extract_epi8(yp128[2],8); yp1[j] = _mm_extract_epi8(yp128[2],8);
yp2[j] = _mm_extract_epi8(yp128[2],9); yp2[j] = _mm_extract_epi8(yp128[2],9);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j=pi2_p[14]; j=pi2_p[14];
s[j] = _mm_extract_epi8(yp128[2],10); s[j] = _mm_extract_epi8(yp128[2],10);
yp1[j] = _mm_extract_epi8(yp128[2],11); yp1[j] = _mm_extract_epi8(yp128[2],11);
yp2[j] = _mm_extract_epi8(yp128[2],12); yp2[j] = _mm_extract_epi8(yp128[2],12);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
j=pi2_p[15]; j=pi2_p[15];
s[j] = _mm_extract_epi8(yp128[2],13); s[j] = _mm_extract_epi8(yp128[2],13);
yp1[j] = _mm_extract_epi8(yp128[2],14); yp1[j] = _mm_extract_epi8(yp128[2],14);
yp2[j] = _mm_extract_epi8(yp128[2],15); yp2[j] = _mm_extract_epi8(yp128[2],15);
// printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
yp128+=3; yp128+=3;
} }
#endif #endif
yp=(llr_t *)yp128;
yp=(llr_t*)yp128;
#ifdef LLR8 #ifdef LLR8
if (n2>n) { if (n2>n) {
...@@ -2290,7 +2116,7 @@ unsigned char phy_threegpplte_turbo_decoder(short *y, ...@@ -2290,7 +2116,7 @@ unsigned char phy_threegpplte_turbo_decoder(short *y,
s1[n+4]=0;s1[n+5]=0;s1[n+6]=0;s1[n+7]=0; s1[n+4]=0;s1[n+5]=0;s1[n+6]=0;s1[n+7]=0;
s2[n]=0;s2[n+1]=0;s2[n+2]=0;s2[n+3]=0; s2[n]=0;s2[n+1]=0;s2[n+2]=0;s2[n+3]=0;
s2[n+4]=0;s2[n+5]=0;s2[n+6]=0;s2[n+7]=0;*/ s2[n+4]=0;s2[n+5]=0;s2[n+6]=0;s2[n+7]=0;*/
yp=(llr_t*)(y8+n); yp=(llr_t *)(y8+n);
} }
#endif #endif
...@@ -2341,68 +2167,55 @@ unsigned char phy_threegpplte_turbo_decoder(short *y, ...@@ -2341,68 +2167,55 @@ unsigned char phy_threegpplte_turbo_decoder(short *y,
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
msg("\n"); msg("\n");
#endif //DEBUG_LOGMAP #endif //DEBUG_LOGMAP
stop_meas(init_stats); stop_meas(init_stats);
// do log_map from first parity bit // do log_map from first parity bit
log_map(systematic0,yparity1,m11,m10,alpha,beta,ext,n2,0,F,offset8_flag,alpha_stats,beta_stats,gamma_stats,ext_stats); log_map(systematic0,yparity1,m11,m10,alpha,beta,ext,n2,0,F,offset8_flag,alpha_stats,beta_stats,gamma_stats,ext_stats);
while (iteration_cnt++ < max_iterations) { while (iteration_cnt++ < max_iterations) {
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
printf("\n*******************ITERATION %d (n %d), ext %p\n\n",iteration_cnt,n,ext); printf("\n*******************ITERATION %d (n %d), ext %p\n\n",iteration_cnt,n,ext);
#endif //DEBUG_LOGMAP #endif //DEBUG_LOGMAP
start_meas(intl1_stats); start_meas(intl1_stats);
#ifndef LLR8 #ifndef LLR8
pi4_p=pi4tab[iind]; pi4_p=pi4tab[iind];
for (i=0; i<(n2>>3); i++) { // steady-state portion for (i=0; i<(n2>>3); i++) { // steady-state portion
((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],((llr_t *)ext)[*pi4_p++],0);
((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],((llr_t*)ext)[*pi4_p++],0); ((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],((llr_t *)ext)[*pi4_p++],1);
((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],((llr_t*)ext)[*pi4_p++],1); ((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],((llr_t *)ext)[*pi4_p++],2);
((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],((llr_t*)ext)[*pi4_p++],2); ((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],((llr_t *)ext)[*pi4_p++],3);
((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],((llr_t*)ext)[*pi4_p++],3); ((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],((llr_t *)ext)[*pi4_p++],4);
((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],((llr_t*)ext)[*pi4_p++],4); ((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],((llr_t *)ext)[*pi4_p++],5);
((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],((llr_t*)ext)[*pi4_p++],5); ((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],((llr_t *)ext)[*pi4_p++],6);
((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],((llr_t*)ext)[*pi4_p++],6); ((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],((llr_t *)ext)[*pi4_p++],7);
((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],((llr_t*)ext)[*pi4_p++],7);
} }
#else #else
pi4_p=pi4tab[iind]; pi4_p=pi4tab[iind];
for (i=0; i<(n2>>4); i++) { // steady-state portion for (i=0; i<(n2>>4); i++) { // steady-state portion
tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],0); tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],0);
tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],1); tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],1);
tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],2); tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],2);
tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],3); tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],3);
tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],4); tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],4);
tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],5); tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],5);
tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],6); tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],6);
tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],7); tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],7);
tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],8); tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],8);
tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],9); tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],9);
tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],10); tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],10);
tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],11); tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],11);
tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],12); tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],12);
tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],13); tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],13);
tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],14); tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],14);
((__m128i *)systematic2)[i]=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],15); ((__m128i *)systematic2)[i]=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],15);
} }
#endif #endif
stop_meas(intl1_stats); stop_meas(intl1_stats);
// do log_map from second parity bit // do log_map from second parity bit
log_map(systematic2,yparity2,m11,m10,alpha,beta,ext2,n2,1,F,offset8_flag,alpha_stats,beta_stats,gamma_stats,ext_stats); log_map(systematic2,yparity2,m11,m10,alpha,beta,ext2,n2,1,F,offset8_flag,alpha_stats,beta_stats,gamma_stats,ext_stats);
#ifndef LLR8 #ifndef LLR8
pi5_p=pi5tab[iind]; pi5_p=pi5tab[iind];
...@@ -2415,7 +2228,7 @@ unsigned char phy_threegpplte_turbo_decoder(short *y, ...@@ -2415,7 +2228,7 @@ unsigned char phy_threegpplte_turbo_decoder(short *y,
tmp=_mm_insert_epi16(tmp,ext2[*pi5_p++],5); tmp=_mm_insert_epi16(tmp,ext2[*pi5_p++],5);
tmp=_mm_insert_epi16(tmp,ext2[*pi5_p++],6); tmp=_mm_insert_epi16(tmp,ext2[*pi5_p++],6);
tmp=_mm_insert_epi16(tmp,ext2[*pi5_p++],7); tmp=_mm_insert_epi16(tmp,ext2[*pi5_p++],7);
((__m128i *)systematic1)[i] = _mm_adds_epi16(_mm_subs_epi16(tmp,((__m128i*)ext)[i]),((__m128i *)systematic0)[i]); ((__m128i *)systematic1)[i] = _mm_adds_epi16(_mm_subs_epi16(tmp,((__m128i *)ext)[i]),((__m128i *)systematic0)[i]);
} }
if (iteration_cnt>1) { if (iteration_cnt>1) {
...@@ -2423,17 +2236,16 @@ unsigned char phy_threegpplte_turbo_decoder(short *y, ...@@ -2423,17 +2236,16 @@ unsigned char phy_threegpplte_turbo_decoder(short *y,
pi6_p=pi6tab[iind]; pi6_p=pi6tab[iind];
for (i=0; i<(n2>>3); i++) { for (i=0; i<(n2>>3); i++) {
tmp=_mm_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p++],7); tmp=_mm_insert_epi16(tmp, ((llr_t *)ext2)[*pi6_p++],7);
tmp=_mm_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p++],6); tmp=_mm_insert_epi16(tmp, ((llr_t *)ext2)[*pi6_p++],6);
tmp=_mm_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p++],5); tmp=_mm_insert_epi16(tmp, ((llr_t *)ext2)[*pi6_p++],5);
tmp=_mm_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p++],4); tmp=_mm_insert_epi16(tmp, ((llr_t *)ext2)[*pi6_p++],4);
tmp=_mm_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p++],3); tmp=_mm_insert_epi16(tmp, ((llr_t *)ext2)[*pi6_p++],3);
tmp=_mm_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p++],2); tmp=_mm_insert_epi16(tmp, ((llr_t *)ext2)[*pi6_p++],2);
tmp=_mm_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p++],1); tmp=_mm_insert_epi16(tmp, ((llr_t *)ext2)[*pi6_p++],1);
tmp=_mm_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p++],0); tmp=_mm_insert_epi16(tmp, ((llr_t *)ext2)[*pi6_p++],0);
tmp=_mm_cmpgt_epi8(_mm_packs_epi16(tmp,zeros),zeros); tmp=_mm_cmpgt_epi8(_mm_packs_epi16(tmp,zeros),zeros);
decoded_bytes[i]=(unsigned char)_mm_movemask_epi8(tmp); decoded_bytes[i]=(unsigned char)_mm_movemask_epi8(tmp);
} }
} }
...@@ -2460,8 +2272,7 @@ unsigned char phy_threegpplte_turbo_decoder(short *y, ...@@ -2460,8 +2272,7 @@ unsigned char phy_threegpplte_turbo_decoder(short *y,
tmp=_mm_insert_epi8(tmp,ext2[*pi5_p++],15); tmp=_mm_insert_epi8(tmp,ext2[*pi5_p++],15);
//decoded_bytes_interl[i]=(uint16_t) _mm_movemask_epi8(_mm_cmpgt_epi8(tmp,zeros)); //decoded_bytes_interl[i]=(uint16_t) _mm_movemask_epi8(_mm_cmpgt_epi8(tmp,zeros));
tmp128[i] = _mm_adds_epi8(((__m128i *)ext2)[i],((__m128i *)systematic2)[i]); tmp128[i] = _mm_adds_epi8(((__m128i *)ext2)[i],((__m128i *)systematic2)[i]);
((__m128i *)systematic1)[i] = _mm_adds_epi8(_mm_subs_epi8(tmp,((__m128i *)ext)[i]),((__m128i *)systematic0)[i]);
((__m128i *)systematic1)[i] = _mm_adds_epi8(_mm_subs_epi8(tmp,((__m128i*)ext)[i]),((__m128i *)systematic0)[i]);
} }
/* LT modification, something wrong here /* LT modification, something wrong here
...@@ -2526,41 +2337,40 @@ unsigned char phy_threegpplte_turbo_decoder(short *y, ...@@ -2526,41 +2337,40 @@ unsigned char phy_threegpplte_turbo_decoder(short *y,
oldcrc= *((unsigned int *)(&decoded_bytes[(n>>3)-crc_len])); oldcrc= *((unsigned int *)(&decoded_bytes[(n>>3)-crc_len]));
switch (crc_type) { switch (crc_type) {
case CRC24_A:
case CRC24_A: oldcrc&=0x00ffffff;
oldcrc&=0x00ffffff; crc = crc24a(&decoded_bytes[F>>3],
crc = crc24a(&decoded_bytes[F>>3], n-24-F)>>8;
n-24-F)>>8; temp=((uint8_t *)&crc)[2];
temp=((uint8_t *)&crc)[2]; ((uint8_t *)&crc)[2] = ((uint8_t *)&crc)[0];
((uint8_t *)&crc)[2] = ((uint8_t *)&crc)[0]; ((uint8_t *)&crc)[0] = temp;
((uint8_t *)&crc)[0] = temp; break;
break;
case CRC24_B:
case CRC24_B: oldcrc&=0x00ffffff;
oldcrc&=0x00ffffff; crc = crc24b(decoded_bytes,
crc = crc24b(decoded_bytes, n-24)>>8;
n-24)>>8; temp=((uint8_t *)&crc)[2];
temp=((uint8_t *)&crc)[2]; ((uint8_t *)&crc)[2] = ((uint8_t *)&crc)[0];
((uint8_t *)&crc)[2] = ((uint8_t *)&crc)[0]; ((uint8_t *)&crc)[0] = temp;
((uint8_t *)&crc)[0] = temp; break;
break;
case CRC16:
case CRC16: oldcrc&=0x0000ffff;
oldcrc&=0x0000ffff; crc = crc16(decoded_bytes,
crc = crc16(decoded_bytes, n-16)>>16;
n-16)>>16; break;
break;
case CRC8:
case CRC8: oldcrc&=0x000000ff;
oldcrc&=0x000000ff; crc = crc8(decoded_bytes,
crc = crc8(decoded_bytes, n-8)>>24;
n-8)>>24; break;
break;
default:
default: printf("FATAL: 3gpplte_turbo_decoder_sse.c: Unknown CRC\n");
printf("FATAL: 3gpplte_turbo_decoder_sse.c: Unknown CRC\n"); return(255);
return(255); break;
break;
} }
stop_meas(intl2_stats); stop_meas(intl2_stats);
...@@ -2573,9 +2383,9 @@ unsigned char phy_threegpplte_turbo_decoder(short *y, ...@@ -2573,9 +2383,9 @@ unsigned char phy_threegpplte_turbo_decoder(short *y,
// do log_map from first parity bit // do log_map from first parity bit
if (iteration_cnt < max_iterations) { if (iteration_cnt < max_iterations) {
log_map(systematic1,yparity1,m11,m10,alpha,beta,ext,n2,0,F,offset8_flag,alpha_stats,beta_stats,gamma_stats,ext_stats); log_map(systematic1,yparity1,m11,m10,alpha,beta,ext,n2,0,F,offset8_flag,alpha_stats,beta_stats,gamma_stats,ext_stats);
__m128i* ext_128=(__m128i*) ext; __m128i *ext_128=(__m128i *) ext;
__m128i* s1_128=(__m128i*) systematic1; __m128i *s1_128=(__m128i *) systematic1;
__m128i* s0_128=(__m128i*) systematic0; __m128i *s0_128=(__m128i *) systematic0;
#ifndef LLR8 #ifndef LLR8
int myloop=n2>>3; int myloop=n2>>3;
...@@ -2601,27 +2411,21 @@ unsigned char phy_threegpplte_turbo_decoder(short *y, ...@@ -2601,27 +2411,21 @@ unsigned char phy_threegpplte_turbo_decoder(short *y,
#ifdef TEST_DEBUG #ifdef TEST_DEBUG
int test_logmap8() int test_logmap8() {
{
unsigned char test[8]; unsigned char test[8];
//_declspec(align(16)) char channel_output[512]; //_declspec(align(16)) char channel_output[512];
//_declspec(align(16)) unsigned char output[512],decoded_output[16], *inPtr, *outPtr; //_declspec(align(16)) unsigned char output[512],decoded_output[16], *inPtr, *outPtr;
short channel_output[512]; short channel_output[512];
unsigned char output[512],decoded_output[16]; unsigned char output[512],decoded_output[16];
unsigned int i,crc,ret; unsigned int i,crc,ret;
test[0] = 7; test[0] = 7;
test[1] = 0xa5; test[1] = 0xa5;
test[2] = 0x11; test[2] = 0x11;
test[3] = 0x92; test[3] = 0x92;
test[4] = 0xfe; test[4] = 0xfe;
crc = crc24a(test, crc = crc24a(test,
40)>>8; 40)>>8;
*(unsigned int *)(&test[5]) = crc;
*(unsigned int*)(&test[5]) = crc;
printf("crc24 = %x\n",crc); printf("crc24 = %x\n",crc);
threegpplte_turbo_encoder(test, //input threegpplte_turbo_encoder(test, //input
8, //input length bytes 8, //input length bytes
...@@ -2646,20 +2450,15 @@ int test_logmap8() ...@@ -2646,20 +2450,15 @@ int test_logmap8()
0, // filler bits 0, // filler bits
0); // decoder instance 0); // decoder instance
for (i=0; i<8; i++) for (i=0; i<8; i++)
printf("output %d => %x (input %x)\n",i,decoded_output[i],test[i]); printf("output %u => %x (input %x)\n",i,decoded_output[i],test[i]);
} }
int main() int main() {
{
test_logmap8(); test_logmap8();
return(0); return(0);
} }
......
...@@ -41,53 +41,53 @@ ...@@ -41,53 +41,53 @@
#include "PHY/sse_intrin.h" #include "PHY/sse_intrin.h"
#ifndef TEST_DEBUG #ifndef TEST_DEBUG
#include "PHY/impl_defs_top.h" #include "PHY/impl_defs_top.h"
#include "PHY/defs_common.h" #include "PHY/defs_common.h"
#include "PHY/CODING/coding_defs.h" #include "PHY/CODING/coding_defs.h"
#include "PHY/CODING/lte_interleaver_inline.h" #include "PHY/CODING/lte_interleaver_inline.h"
#else #else
#include "defs.h" #include "defs.h"
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#endif #endif
#ifdef MEX #ifdef MEX
#include "mex.h" #include "mex.h"
#endif #endif
//#define DEBUG_LOGMAP //#define DEBUG_LOGMAP
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
#define print_shorts(s,x) fprintf(fdsse4,"%s %d,%d,%d,%d,%d,%d,%d,%d\n",s,(x)[0],(x)[1],(x)[2],(x)[3],(x)[4],(x)[5],(x)[6],(x)[7]) #define print_shorts(s,x) fprintf(fdsse4,"%s %d,%d,%d,%d,%d,%d,%d,%d\n",s,(x)[0],(x)[1],(x)[2],(x)[3],(x)[4],(x)[5],(x)[6],(x)[7])
#endif #endif
#undef __AVX2__ #undef __AVX2__
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
FILE *fdsse4; FILE *fdsse4;
#endif #endif
typedef int16_t llr_t; // internal decoder LLR data is 16-bit fixed typedef int16_t llr_t; // internal decoder LLR data is 16-bit fixed
typedef int16_t channel_t; typedef int16_t channel_t;
#define MAX 256 #define MAX 256
void log_map16(llr_t* systematic,channel_t* y_parity, llr_t* m11, llr_t* m10, llr_t *alpha, llr_t *beta, llr_t* ext,unsigned short frame_length,unsigned char term_flag,unsigned char F, void log_map16(llr_t *systematic,channel_t *y_parity, llr_t *m11, llr_t *m10, llr_t *alpha, llr_t *beta, llr_t *ext,unsigned short frame_length,unsigned char term_flag,unsigned char F,
int offset8_flag,time_stats_t *alpha_stats,time_stats_t *beta_stats,time_stats_t *gamma_stats,time_stats_t *ext_stats); int offset8_flag,time_stats_t *alpha_stats,time_stats_t *beta_stats,time_stats_t *gamma_stats,time_stats_t *ext_stats);
void compute_gamma16(llr_t* m11,llr_t* m10,llr_t* systematic, channel_t* y_parity, unsigned short frame_length,unsigned char term_flag); void compute_gamma16(llr_t *m11,llr_t *m10,llr_t *systematic, channel_t *y_parity, unsigned short frame_length,unsigned char term_flag);
void compute_alpha16(llr_t*alpha,llr_t *beta, llr_t* m11,llr_t* m10, unsigned short frame_length,unsigned char F); void compute_alpha16(llr_t *alpha,llr_t *beta, llr_t *m11,llr_t *m10, unsigned short frame_length,unsigned char F);
void compute_beta16(llr_t*alpha, llr_t* beta,llr_t* m11,llr_t* m10, unsigned short frame_length,unsigned char F,int offset8_flag); void compute_beta16(llr_t *alpha, llr_t *beta,llr_t *m11,llr_t *m10, unsigned short frame_length,unsigned char F,int offset8_flag);
void compute_ext16(llr_t* alpha,llr_t* beta,llr_t* m11,llr_t* m10,llr_t* extrinsic, llr_t* ap, unsigned short frame_length); void compute_ext16(llr_t *alpha,llr_t *beta,llr_t *m11,llr_t *m10,llr_t *extrinsic, llr_t *ap, unsigned short frame_length);
void log_map16(llr_t* systematic, void log_map16(llr_t *systematic,
channel_t* y_parity, channel_t *y_parity,
llr_t* m11, llr_t *m11,
llr_t* m10, llr_t *m10,
llr_t *alpha, llr_t *alpha,
llr_t *beta, llr_t *beta,
llr_t* ext, llr_t *ext,
unsigned short frame_length, unsigned short frame_length,
unsigned char term_flag, unsigned char term_flag,
unsigned char F, unsigned char F,
...@@ -95,13 +95,10 @@ void log_map16(llr_t* systematic, ...@@ -95,13 +95,10 @@ void log_map16(llr_t* systematic,
time_stats_t *alpha_stats, time_stats_t *alpha_stats,
time_stats_t *beta_stats, time_stats_t *beta_stats,
time_stats_t *gamma_stats, time_stats_t *gamma_stats,
time_stats_t *ext_stats) time_stats_t *ext_stats) {
{
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
fprintf(fdsse4,"log_map, frame_length %d\n",frame_length); fprintf(fdsse4,"log_map, frame_length %d\n",frame_length);
#endif #endif
start_meas(gamma_stats) ; start_meas(gamma_stats) ;
compute_gamma16(m11,m10,systematic,y_parity,frame_length,term_flag) ; compute_gamma16(m11,m10,systematic,y_parity,frame_length,term_flag) ;
stop_meas(gamma_stats); stop_meas(gamma_stats);
...@@ -114,13 +111,10 @@ void log_map16(llr_t* systematic, ...@@ -114,13 +111,10 @@ void log_map16(llr_t* systematic,
start_meas(ext_stats) ; start_meas(ext_stats) ;
compute_ext16(alpha,beta,m11,m10,ext,systematic,frame_length) ; compute_ext16(alpha,beta,m11,m10,ext,systematic,frame_length) ;
stop_meas(ext_stats); stop_meas(ext_stats);
} }
void compute_gamma16(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity, void compute_gamma16(llr_t *m11,llr_t *m10,llr_t *systematic,channel_t *y_parity,
unsigned short frame_length,unsigned char term_flag) unsigned short frame_length,unsigned char term_flag) {
{
int k,K1; int k,K1;
#if defined(__x86_64__)||defined(__i386__) #if defined(__x86_64__)||defined(__i386__)
__m128i *systematic128 = (__m128i *)systematic; __m128i *systematic128 = (__m128i *)systematic;
...@@ -133,18 +127,18 @@ void compute_gamma16(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity ...@@ -133,18 +127,18 @@ void compute_gamma16(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity
int16x8_t *m10_128 = (int16x8_t *)m10; int16x8_t *m10_128 = (int16x8_t *)m10;
int16x8_t *m11_128 = (int16x8_t *)m11; int16x8_t *m11_128 = (int16x8_t *)m11;
#endif #endif
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
fprintf(fdsse4,"compute_gamma (sse_16bit), %p,%p,%p,%p,framelength %d\n",m11,m10,systematic,y_parity,frame_length); fprintf(fdsse4,"compute_gamma (sse_16bit), %p,%p,%p,%p,framelength %d\n",m11,m10,systematic,y_parity,frame_length);
#endif #endif
#ifndef __AVX2__ #ifndef __AVX2__
K1=frame_length>>3; K1=frame_length>>3;
#else #else
if ((frame_length&15) > 0) if ((frame_length&15) > 0)
K1=(frame_length+1)>>4; K1=(frame_length+1)>>4;
else else
K1=frame_length>>4; K1=frame_length>>4;
#endif #endif
for (k=0; k<K1; k++) { for (k=0; k<K1; k++) {
...@@ -153,21 +147,20 @@ void compute_gamma16(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity ...@@ -153,21 +147,20 @@ void compute_gamma16(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity
m11_128[k] = _mm_srai_epi16(_mm_adds_epi16(systematic128[k],y_parity128[k]),1); m11_128[k] = _mm_srai_epi16(_mm_adds_epi16(systematic128[k],y_parity128[k]),1);
m10_128[k] = _mm_srai_epi16(_mm_subs_epi16(systematic128[k],y_parity128[k]),1); m10_128[k] = _mm_srai_epi16(_mm_subs_epi16(systematic128[k],y_parity128[k]),1);
#else #else
((__m256i*)m11_128)[k] = _mm256_srai_epi16(_mm256_adds_epi16(((__m256i*)systematic128)[k],((__m256i*)y_parity128)[k]),1); ((__m256i *)m11_128)[k] = _mm256_srai_epi16(_mm256_adds_epi16(((__m256i *)systematic128)[k],((__m256i *)y_parity128)[k]),1);
// ((__m256i*)m10_128)[k] = _mm256_srai_epi16(_mm256_subs_epi16(((__m256i*)y_parity128)[k],((__m256i*)systematic128)[k]),1); // ((__m256i*)m10_128)[k] = _mm256_srai_epi16(_mm256_subs_epi16(((__m256i*)y_parity128)[k],((__m256i*)systematic128)[k]),1);
((__m256i*)m10_128)[k] = _mm256_srai_epi16(_mm256_subs_epi16(((__m256i*)systematic128)[k],((__m256i*)y_parity128)[k]),1); ((__m256i *)m10_128)[k] = _mm256_srai_epi16(_mm256_subs_epi16(((__m256i *)systematic128)[k],((__m256i *)y_parity128)[k]),1);
#endif #endif
#elif defined(__arm__) #elif defined(__arm__)
m11_128[k] = vhaddq_s16(systematic128[k],y_parity128[k]); m11_128[k] = vhaddq_s16(systematic128[k],y_parity128[k]);
m10_128[k] = vhsubq_s16(systematic128[k],y_parity128[k]); m10_128[k] = vhsubq_s16(systematic128[k],y_parity128[k]);
#endif #endif
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
fprintf(fdsse4,"Loop index k %d\n", k); fprintf(fdsse4,"Loop index k %d\n", k);
print_shorts("sys",(int16_t*)&systematic128[k]); print_shorts("sys",(int16_t *)&systematic128[k]);
print_shorts("yp",(int16_t*)&y_parity128[k]); print_shorts("yp",(int16_t *)&y_parity128[k]);
print_shorts("m11",(int16_t*)&m11_128[k]); print_shorts("m11",(int16_t *)&m11_128[k]);
print_shorts("m10",(int16_t*)&m10_128[k]); print_shorts("m10",(int16_t *)&m10_128[k]);
#endif #endif
} }
...@@ -185,20 +178,18 @@ void compute_gamma16(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity ...@@ -185,20 +178,18 @@ void compute_gamma16(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity
m11_128[k] = vhaddq_s16(systematic128[k+term_flag],y_parity128[k]); m11_128[k] = vhaddq_s16(systematic128[k+term_flag],y_parity128[k]);
m10_128[k] = vhsubq_s16(systematic128[k+term_flag],y_parity128[k]); m10_128[k] = vhsubq_s16(systematic128[k+term_flag],y_parity128[k]);
#endif #endif
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
fprintf(fdsse4,"Loop index k %d (term flag %d)\n", k,term_flag); fprintf(fdsse4,"Loop index k %d (term flag %d)\n", k,term_flag);
print_shorts("sys",(int16_t*)&systematic128[k]); print_shorts("sys",(int16_t *)&systematic128[k]);
print_shorts("yp",(int16_t*)&y_parity128[k]); print_shorts("yp",(int16_t *)&y_parity128[k]);
print_shorts("m11",(int16_t*)&m11_128[k]); print_shorts("m11",(int16_t *)&m11_128[k]);
print_shorts("m10",(int16_t*)&m10_128[k]); print_shorts("m10",(int16_t *)&m10_128[k]);
#endif #endif
} }
#define L 40 #define L 40
void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned short frame_length,unsigned char F) void compute_alpha16(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,unsigned short frame_length,unsigned char F) {
{
int k,l,l2,K1,rerun_flag=0; int k,l,l2,K1,rerun_flag=0;
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
__m128i *alpha128=(__m128i *)alpha,*alpha_ptr,*m11p,*m10p; __m128i *alpha128=(__m128i *)alpha,*alpha_ptr,*m11p,*m10p;
...@@ -215,7 +206,6 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s ...@@ -215,7 +206,6 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s
__m256i m11m10_256; __m256i m11m10_256;
__m256i alpha_max; __m256i alpha_max;
#endif #endif
#elif defined(__arm__) #elif defined(__arm__)
int16x8_t *alpha128=(int16x8_t *)alpha,*alpha_ptr; int16x8_t *alpha128=(int16x8_t *)alpha,*alpha_ptr;
int16x8_t a0,a1,a2,a3,a4,a5,a6,a7,*m11p,*m10p; int16x8_t a0,a1,a2,a3,a4,a5,a6,a7,*m11p,*m10p;
...@@ -228,6 +218,7 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s ...@@ -228,6 +218,7 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
fprintf(fdsse4,"compute_alpha (sse_16bit)\n"); fprintf(fdsse4,"compute_alpha (sse_16bit)\n");
#endif #endif
for (l=K1;; l=l2,rerun_flag=1) { for (l=K1;; l=l2,rerun_flag=1) {
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
alpha128 = (__m128i *)alpha; alpha128 = (__m128i *)alpha;
...@@ -259,14 +250,14 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s ...@@ -259,14 +250,14 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s
#endif #endif
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
fprintf(fdsse4,"Initial alpha\n"); fprintf(fdsse4,"Initial alpha\n");
print_shorts("a0",(int16_t*)&alpha128[0]); print_shorts("a0",(int16_t *)&alpha128[0]);
print_shorts("a1",(int16_t*)&alpha128[1]); print_shorts("a1",(int16_t *)&alpha128[1]);
print_shorts("a2",(int16_t*)&alpha128[2]); print_shorts("a2",(int16_t *)&alpha128[2]);
print_shorts("a3",(int16_t*)&alpha128[3]); print_shorts("a3",(int16_t *)&alpha128[3]);
print_shorts("a4",(int16_t*)&alpha128[4]); print_shorts("a4",(int16_t *)&alpha128[4]);
print_shorts("a5",(int16_t*)&alpha128[5]); print_shorts("a5",(int16_t *)&alpha128[5]);
print_shorts("a6",(int16_t*)&alpha128[6]); print_shorts("a6",(int16_t *)&alpha128[6]);
print_shorts("a7",(int16_t*)&alpha128[7]); print_shorts("a7",(int16_t *)&alpha128[7]);
#endif #endif
} else { } else {
//set initial alpha in columns 1-7 from final alpha from last run in columns 0-6 //set initial alpha in columns 1-7 from final alpha from last run in columns 0-6
...@@ -280,14 +271,22 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s ...@@ -280,14 +271,22 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s
alpha128[6] = _mm_slli_si128(alpha128[6+frame_length],2); alpha128[6] = _mm_slli_si128(alpha128[6+frame_length],2);
alpha128[7] = _mm_slli_si128(alpha128[7+frame_length],2); alpha128[7] = _mm_slli_si128(alpha128[7+frame_length],2);
#elif defined(__arm__) #elif defined(__arm__)
alpha128[0] = (int16x8_t)vshlq_n_s64((int64x2_t)alpha128[frame_length],16); alpha128[0] = vsetq_lane_s16(alpha[8],alpha128[0],3); alpha128[0] = (int16x8_t)vshlq_n_s64((int64x2_t)alpha128[frame_length],16);
alpha128[1] = (int16x8_t)vshlq_n_s64((int64x2_t)alpha128[1+frame_length],16); alpha128[1] = vsetq_lane_s16(alpha[24],alpha128[0],3); alpha128[0] = vsetq_lane_s16(alpha[8],alpha128[0],3);
alpha128[2] = (int16x8_t)vshlq_n_s64((int64x2_t)alpha128[2+frame_length],16); alpha128[2] = vsetq_lane_s16(alpha[40],alpha128[0],3); alpha128[1] = (int16x8_t)vshlq_n_s64((int64x2_t)alpha128[1+frame_length],16);
alpha128[3] = (int16x8_t)vshlq_n_s64((int64x2_t)alpha128[3+frame_length],16); alpha128[3] = vsetq_lane_s16(alpha[56],alpha128[0],3); alpha128[1] = vsetq_lane_s16(alpha[24],alpha128[0],3);
alpha128[4] = (int16x8_t)vshlq_n_s64((int64x2_t)alpha128[4+frame_length],16); alpha128[4] = vsetq_lane_s16(alpha[72],alpha128[0],3); alpha128[2] = (int16x8_t)vshlq_n_s64((int64x2_t)alpha128[2+frame_length],16);
alpha128[5] = (int16x8_t)vshlq_n_s64((int64x2_t)alpha128[5+frame_length],16); alpha128[5] = vsetq_lane_s16(alpha[88],alpha128[0],3); alpha128[2] = vsetq_lane_s16(alpha[40],alpha128[0],3);
alpha128[6] = (int16x8_t)vshlq_n_s64((int64x2_t)alpha128[6+frame_length],16); alpha128[6] = vsetq_lane_s16(alpha[104],alpha128[0],3); alpha128[3] = (int16x8_t)vshlq_n_s64((int64x2_t)alpha128[3+frame_length],16);
alpha128[7] = (int16x8_t)vshlq_n_s64((int64x2_t)alpha128[7+frame_length],16); alpha128[7] = vsetq_lane_s16(alpha[120],alpha128[0],3); alpha128[3] = vsetq_lane_s16(alpha[56],alpha128[0],3);
alpha128[4] = (int16x8_t)vshlq_n_s64((int64x2_t)alpha128[4+frame_length],16);
alpha128[4] = vsetq_lane_s16(alpha[72],alpha128[0],3);
alpha128[5] = (int16x8_t)vshlq_n_s64((int64x2_t)alpha128[5+frame_length],16);
alpha128[5] = vsetq_lane_s16(alpha[88],alpha128[0],3);
alpha128[6] = (int16x8_t)vshlq_n_s64((int64x2_t)alpha128[6+frame_length],16);
alpha128[6] = vsetq_lane_s16(alpha[104],alpha128[0],3);
alpha128[7] = (int16x8_t)vshlq_n_s64((int64x2_t)alpha128[7+frame_length],16);
alpha128[7] = vsetq_lane_s16(alpha[120],alpha128[0],3);
#endif #endif
// set initial alpha in column 0 to (0,-MAX/2,...,-MAX/2) // set initial alpha in column 0 to (0,-MAX/2,...,-MAX/2)
alpha[8] = -MAX/2; alpha[8] = -MAX/2;
...@@ -299,31 +298,30 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s ...@@ -299,31 +298,30 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s
alpha[56] = -MAX/2; alpha[56] = -MAX/2;
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
fprintf(fdsse4,"Second run\n"); fprintf(fdsse4,"Second run\n");
print_shorts("a0",(int16_t*)&alpha128[0]); print_shorts("a0",(int16_t *)&alpha128[0]);
print_shorts("a1",(int16_t*)&alpha128[1]); print_shorts("a1",(int16_t *)&alpha128[1]);
print_shorts("a2",(int16_t*)&alpha128[2]); print_shorts("a2",(int16_t *)&alpha128[2]);
print_shorts("a3",(int16_t*)&alpha128[3]); print_shorts("a3",(int16_t *)&alpha128[3]);
print_shorts("a4",(int16_t*)&alpha128[4]); print_shorts("a4",(int16_t *)&alpha128[4]);
print_shorts("a5",(int16_t*)&alpha128[5]); print_shorts("a5",(int16_t *)&alpha128[5]);
print_shorts("a6",(int16_t*)&alpha128[6]); print_shorts("a6",(int16_t *)&alpha128[6]);
print_shorts("a7",(int16_t*)&alpha128[7]); print_shorts("a7",(int16_t *)&alpha128[7]);
#endif #endif
} }
alpha_ptr = &alpha128[0]; alpha_ptr = &alpha128[0];
//#ifdef __AVX2__ //#ifdef __AVX2__
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
m11p = (__m128i*)m_11; m11p = (__m128i *)m_11;
m10p = (__m128i*)m_10; m10p = (__m128i *)m_10;
#elif defined(__arm__) #elif defined(__arm__)
m11p = (int16x8_t*)m_11; m11p = (int16x8_t *)m_11;
m10p = (int16x8_t*)m_10; m10p = (int16x8_t *)m_10;
#endif #endif
for (k=0; for (k=0;
k<l; k<l;
k++) { k++) {
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
//#ifndef __AVX2__ //#ifndef __AVX2__
#if 1 #if 1
...@@ -331,7 +329,6 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s ...@@ -331,7 +329,6 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s
a3=_mm_load_si128(&alpha_ptr[3]); a3=_mm_load_si128(&alpha_ptr[3]);
a5=_mm_load_si128(&alpha_ptr[5]); a5=_mm_load_si128(&alpha_ptr[5]);
a7=_mm_load_si128(&alpha_ptr[7]); a7=_mm_load_si128(&alpha_ptr[7]);
m_b0 = _mm_adds_epi16(a1,*m11p); // m11 m_b0 = _mm_adds_epi16(a1,*m11p); // m11
m_b4 = _mm_subs_epi16(a1,*m11p); // m00=-m11 m_b4 = _mm_subs_epi16(a1,*m11p); // m00=-m11
m_b1 = _mm_subs_epi16(a3,*m10p); // m01=-m10 m_b1 = _mm_subs_epi16(a3,*m10p); // m01=-m10
...@@ -340,12 +337,10 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s ...@@ -340,12 +337,10 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s
m_b6 = _mm_subs_epi16(a5,*m10p); // m01=-m10 m_b6 = _mm_subs_epi16(a5,*m10p); // m01=-m10
m_b3 = _mm_subs_epi16(a7,*m11p); // m00=-m11 m_b3 = _mm_subs_epi16(a7,*m11p); // m00=-m11
m_b7 = _mm_adds_epi16(a7,*m11p); // m11 m_b7 = _mm_adds_epi16(a7,*m11p); // m11
a0=_mm_load_si128(&alpha_ptr[0]); a0=_mm_load_si128(&alpha_ptr[0]);
a2=_mm_load_si128(&alpha_ptr[2]); a2=_mm_load_si128(&alpha_ptr[2]);
a4=_mm_load_si128(&alpha_ptr[4]); a4=_mm_load_si128(&alpha_ptr[4]);
a6=_mm_load_si128(&alpha_ptr[6]); a6=_mm_load_si128(&alpha_ptr[6]);
new0 = _mm_subs_epi16(a0,*m11p); // m00=-m11 new0 = _mm_subs_epi16(a0,*m11p); // m00=-m11
new4 = _mm_adds_epi16(a0,*m11p); // m11 new4 = _mm_adds_epi16(a0,*m11p); // m11
new1 = _mm_adds_epi16(a2,*m10p); // m10 new1 = _mm_adds_epi16(a2,*m10p); // m10
...@@ -354,7 +349,6 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s ...@@ -354,7 +349,6 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s
new6 = _mm_adds_epi16(a4,*m10p); // m10 new6 = _mm_adds_epi16(a4,*m10p); // m10
new3 = _mm_adds_epi16(a6,*m11p); // m11 new3 = _mm_adds_epi16(a6,*m11p); // m11
new7 = _mm_subs_epi16(a6,*m11p); // m00=-m11 new7 = _mm_subs_epi16(a6,*m11p); // m00=-m11
a0 = _mm_max_epi16(m_b0,new0); a0 = _mm_max_epi16(m_b0,new0);
a1 = _mm_max_epi16(m_b1,new1); a1 = _mm_max_epi16(m_b1,new1);
a2 = _mm_max_epi16(m_b2,new2); a2 = _mm_max_epi16(m_b2,new2);
...@@ -363,7 +357,6 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s ...@@ -363,7 +357,6 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s
a5 = _mm_max_epi16(m_b5,new5); a5 = _mm_max_epi16(m_b5,new5);
a6 = _mm_max_epi16(m_b6,new6); a6 = _mm_max_epi16(m_b6,new6);
a7 = _mm_max_epi16(m_b7,new7); a7 = _mm_max_epi16(m_b7,new7);
alpha_max = _mm_max_epi16(a0,a1); alpha_max = _mm_max_epi16(a0,a1);
alpha_max = _mm_max_epi16(alpha_max,a2); alpha_max = _mm_max_epi16(alpha_max,a2);
alpha_max = _mm_max_epi16(alpha_max,a3); alpha_max = _mm_max_epi16(alpha_max,a3);
...@@ -378,29 +371,22 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s ...@@ -378,29 +371,22 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s
a75=_mm256_load_si256(&alpha_ptr256[3]); a75=_mm256_load_si256(&alpha_ptr256[3]);
m11m10_256 = _mm256_insertf128_si256(m11m10_256,*m11p,0); m11m10_256 = _mm256_insertf128_si256(m11m10_256,*m11p,0);
m11m10_256 = _mm256_insertf128_si256(m11m10_256,*m10p,1); m11m10_256 = _mm256_insertf128_si256(m11m10_256,*m10p,1);
m_b01 = _mm256_adds_epi16(a13,m11m10_256); //negative m10 m_b01 = _mm256_adds_epi16(a13,m11m10_256); //negative m10
m_b23 = _mm256_subs_epi16(a75,m11m10_256); //negative m10 m_b23 = _mm256_subs_epi16(a75,m11m10_256); //negative m10
m_b45 = _mm256_subs_epi16(a13,m11m10_256); //negative m10 m_b45 = _mm256_subs_epi16(a13,m11m10_256); //negative m10
m_b67 = _mm256_adds_epi16(a75,m11m10_256); //negative m10 m_b67 = _mm256_adds_epi16(a75,m11m10_256); //negative m10
new01 = _mm256_subs_epi16(a02,m11m10_256); //negative m10 new01 = _mm256_subs_epi16(a02,m11m10_256); //negative m10
new23 = _mm256_adds_epi16(a64,m11m10_256); //negative m10 new23 = _mm256_adds_epi16(a64,m11m10_256); //negative m10
new45 = _mm256_adds_epi16(a02,m11m10_256); //negative m10 new45 = _mm256_adds_epi16(a02,m11m10_256); //negative m10
new67 = _mm256_subs_epi16(a64,m11m10_256); //negative m10 new67 = _mm256_subs_epi16(a64,m11m10_256); //negative m10
a01 = _mm256_max_epi16(m_b01,new01); a01 = _mm256_max_epi16(m_b01,new01);
a23 = _mm256_max_epi16(m_b23,new23); a23 = _mm256_max_epi16(m_b23,new23);
a45 = _mm256_max_epi16(m_b45,new45); a45 = _mm256_max_epi16(m_b45,new45);
a67 = _mm256_max_epi16(m_b67,new67); a67 = _mm256_max_epi16(m_b67,new67);
alpha_max = _mm256_max_epi16(a01,a23); alpha_max = _mm256_max_epi16(a01,a23);
alpha_max = _mm256_max_epi16(alpha_max,a45); alpha_max = _mm256_max_epi16(alpha_max,a45);
alpha_max = _mm256_max_epi16(alpha_max,a67); alpha_max = _mm256_max_epi16(alpha_max,a67);
alpha_max = _mm256_max_epi16(alpha_max,_mm256_permutevar8x32_epi32(alpha_max,_mm256_set_epi32(3,2,1,0,7,6,5,4))); alpha_max = _mm256_max_epi16(alpha_max,_mm256_permutevar8x32_epi32(alpha_max,_mm256_set_epi32(3,2,1,0,7,6,5,4)));
#endif #endif
#elif defined(__arm__) #elif defined(__arm__)
m_b0 = vqaddq_s16(alpha_ptr[1],*m11p); // m11 m_b0 = vqaddq_s16(alpha_ptr[1],*m11p); // m11
...@@ -411,7 +397,6 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s ...@@ -411,7 +397,6 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s
m_b6 = vqsubq_s16(alpha_ptr[5],*m10p); // m01=-m10 m_b6 = vqsubq_s16(alpha_ptr[5],*m10p); // m01=-m10
m_b3 = vqsubq_s16(alpha_ptr[7],*m11p); // m00=-m11 m_b3 = vqsubq_s16(alpha_ptr[7],*m11p); // m00=-m11
m_b7 = vqaddq_s16(alpha_ptr[7],*m11p); // m11 m_b7 = vqaddq_s16(alpha_ptr[7],*m11p); // m11
new0 = vqsubq_s16(alpha_ptr[0],*m11p); // m00=-m11 new0 = vqsubq_s16(alpha_ptr[0],*m11p); // m00=-m11
new4 = vqaddq_s16(alpha_ptr[0],*m11p); // m11 new4 = vqaddq_s16(alpha_ptr[0],*m11p); // m11
new1 = vqaddq_s16(alpha_ptr[2],*m10p); // m10 new1 = vqaddq_s16(alpha_ptr[2],*m10p); // m10
...@@ -428,7 +413,6 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s ...@@ -428,7 +413,6 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s
a5 = vmaxq_s16(m_b5,new5); a5 = vmaxq_s16(m_b5,new5);
a6 = vmaxq_s16(m_b6,new6); a6 = vmaxq_s16(m_b6,new6);
a7 = vmaxq_s16(m_b7,new7); a7 = vmaxq_s16(m_b7,new7);
// compute and subtract maxima // compute and subtract maxima
alpha_max = vmaxq_s16(a0,a1); alpha_max = vmaxq_s16(a0,a1);
alpha_max = vmaxq_s16(alpha_max,a2); alpha_max = vmaxq_s16(alpha_max,a2);
...@@ -437,9 +421,7 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s ...@@ -437,9 +421,7 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s
alpha_max = vmaxq_s16(alpha_max,a5); alpha_max = vmaxq_s16(alpha_max,a5);
alpha_max = vmaxq_s16(alpha_max,a6); alpha_max = vmaxq_s16(alpha_max,a6);
alpha_max = vmaxq_s16(alpha_max,a7); alpha_max = vmaxq_s16(alpha_max,a7);
#endif #endif
alpha_ptr+=8; alpha_ptr+=8;
//#ifdef __AVX2__ //#ifdef __AVX2__
m11p++; m11p++;
...@@ -456,12 +438,10 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s ...@@ -456,12 +438,10 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s
alpha_ptr[6] = _mm_subs_epi16(a6,alpha_max); alpha_ptr[6] = _mm_subs_epi16(a6,alpha_max);
alpha_ptr[7] = _mm_subs_epi16(a7,alpha_max); alpha_ptr[7] = _mm_subs_epi16(a7,alpha_max);
#else #else
a01 = _mm256_subs_epi16(a01,alpha_max); a01 = _mm256_subs_epi16(a01,alpha_max);
a23 = _mm256_subs_epi16(a23,alpha_max); a23 = _mm256_subs_epi16(a23,alpha_max);
a45 = _mm256_subs_epi16(a45,alpha_max); a45 = _mm256_subs_epi16(a45,alpha_max);
a67 = _mm256_subs_epi16(a67,alpha_max); a67 = _mm256_subs_epi16(a67,alpha_max);
alpha_ptr256[0] = _mm256_permute2x128_si256(a01,a23,0x20); //a02 alpha_ptr256[0] = _mm256_permute2x128_si256(a01,a23,0x20); //a02
alpha_ptr256[1] = _mm256_permute2x128_si256(a01,a23,0x13); //a13 alpha_ptr256[1] = _mm256_permute2x128_si256(a01,a23,0x13); //a13
alpha_ptr256[2] = _mm256_permute2x128_si256(a45,a67,0x02); //a64 alpha_ptr256[2] = _mm256_permute2x128_si256(a45,a67,0x02); //a64
...@@ -477,49 +457,44 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s ...@@ -477,49 +457,44 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s
alpha_ptr[6] = vqsubq_s16(a6,alpha_max); alpha_ptr[6] = vqsubq_s16(a6,alpha_max);
alpha_ptr[7] = vqsubq_s16(a7,alpha_max); alpha_ptr[7] = vqsubq_s16(a7,alpha_max);
#endif #endif
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
fprintf(fdsse4,"Loop index %d\n",k); fprintf(fdsse4,"Loop index %d\n",k);
print_shorts("mb0",(int16_t*)&m_b0); print_shorts("mb0",(int16_t *)&m_b0);
print_shorts("mb1",(int16_t*)&m_b1); print_shorts("mb1",(int16_t *)&m_b1);
print_shorts("mb2",(int16_t*)&m_b2); print_shorts("mb2",(int16_t *)&m_b2);
print_shorts("mb3",(int16_t*)&m_b3); print_shorts("mb3",(int16_t *)&m_b3);
print_shorts("mb4",(int16_t*)&m_b4); print_shorts("mb4",(int16_t *)&m_b4);
print_shorts("mb5",(int16_t*)&m_b5); print_shorts("mb5",(int16_t *)&m_b5);
print_shorts("mb6",(int16_t*)&m_b6); print_shorts("mb6",(int16_t *)&m_b6);
print_shorts("mb7",(int16_t*)&m_b7); print_shorts("mb7",(int16_t *)&m_b7);
fprintf(fdsse4,"Loop index %d, new\n",k); fprintf(fdsse4,"Loop index %d, new\n",k);
print_shorts("new0",(int16_t*)&new0); print_shorts("new0",(int16_t *)&new0);
print_shorts("new1",(int16_t*)&new1); print_shorts("new1",(int16_t *)&new1);
print_shorts("new2",(int16_t*)&new2); print_shorts("new2",(int16_t *)&new2);
print_shorts("new3",(int16_t*)&new3); print_shorts("new3",(int16_t *)&new3);
print_shorts("new4",(int16_t*)&new4); print_shorts("new4",(int16_t *)&new4);
print_shorts("new5",(int16_t*)&new5); print_shorts("new5",(int16_t *)&new5);
print_shorts("new6",(int16_t*)&new6); print_shorts("new6",(int16_t *)&new6);
print_shorts("new7",(int16_t*)&new7); print_shorts("new7",(int16_t *)&new7);
fprintf(fdsse4,"Loop index %d, after max\n",k); fprintf(fdsse4,"Loop index %d, after max\n",k);
print_shorts("a0",(int16_t*)&a0); print_shorts("a0",(int16_t *)&a0);
print_shorts("a1",(int16_t*)&a1); print_shorts("a1",(int16_t *)&a1);
print_shorts("a2",(int16_t*)&a2); print_shorts("a2",(int16_t *)&a2);
print_shorts("a3",(int16_t*)&a3); print_shorts("a3",(int16_t *)&a3);
print_shorts("a4",(int16_t*)&a4); print_shorts("a4",(int16_t *)&a4);
print_shorts("a5",(int16_t*)&a5); print_shorts("a5",(int16_t *)&a5);
print_shorts("a6",(int16_t*)&a6); print_shorts("a6",(int16_t *)&a6);
print_shorts("a7",(int16_t*)&a7); print_shorts("a7",(int16_t *)&a7);
fprintf(fdsse4,"Loop index %d\n",k); fprintf(fdsse4,"Loop index %d\n",k);
print_shorts("a0",(int16_t*)&alpha_ptr[0]); print_shorts("a0",(int16_t *)&alpha_ptr[0]);
print_shorts("a1",(int16_t*)&alpha_ptr[1]); print_shorts("a1",(int16_t *)&alpha_ptr[1]);
print_shorts("a2",(int16_t*)&alpha_ptr[2]); print_shorts("a2",(int16_t *)&alpha_ptr[2]);
print_shorts("a3",(int16_t*)&alpha_ptr[3]); print_shorts("a3",(int16_t *)&alpha_ptr[3]);
print_shorts("a4",(int16_t*)&alpha_ptr[4]); print_shorts("a4",(int16_t *)&alpha_ptr[4]);
print_shorts("a5",(int16_t*)&alpha_ptr[5]); print_shorts("a5",(int16_t *)&alpha_ptr[5]);
print_shorts("a6",(int16_t*)&alpha_ptr[6]); print_shorts("a6",(int16_t *)&alpha_ptr[6]);
print_shorts("a7",(int16_t*)&alpha_ptr[7]); print_shorts("a7",(int16_t *)&alpha_ptr[7]);
#endif #endif
} }
if (rerun_flag==1) if (rerun_flag==1)
...@@ -528,37 +503,28 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s ...@@ -528,37 +503,28 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s
} }
void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned short frame_length,unsigned char F,int offset8_flag) void compute_beta16(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,unsigned short frame_length,unsigned char F,int offset8_flag) {
{
int k,rerun_flag=0; int k,rerun_flag=0;
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
__m128i m11_128,m10_128; __m128i m11_128,m10_128;
__m128i m_b0,m_b1,m_b2,m_b3,m_b4,m_b5,m_b6,m_b7; __m128i m_b0,m_b1,m_b2,m_b3,m_b4,m_b5,m_b6,m_b7;
__m128i new0,new1,new2,new3,new4,new5,new6,new7; __m128i new0,new1,new2,new3,new4,new5,new6,new7;
__m128i *beta128,*alpha128,*beta_ptr; __m128i *beta128,*alpha128,*beta_ptr;
__m128i beta_max; __m128i beta_max;
#elif defined(__arm__) #elif defined(__arm__)
int16x8_t m11_128,m10_128; int16x8_t m11_128,m10_128;
int16x8_t m_b0,m_b1,m_b2,m_b3,m_b4,m_b5,m_b6,m_b7; int16x8_t m_b0,m_b1,m_b2,m_b3,m_b4,m_b5,m_b6,m_b7;
int16x8_t new0,new1,new2,new3,new4,new5,new6,new7; int16x8_t new0,new1,new2,new3,new4,new5,new6,new7;
int16x8_t *beta128,*alpha128,*beta_ptr; int16x8_t *beta128,*alpha128,*beta_ptr;
int16x8_t beta_max; int16x8_t beta_max;
#endif #endif
int16_t m11,m10,beta0_16,beta1_16,beta2_16,beta3_16,beta4_16,beta5_16,beta6_16,beta7_16,beta0_2,beta1_2,beta2_2,beta3_2,beta_m; int16_t m11,m10,beta0_16,beta1_16,beta2_16,beta3_16,beta4_16,beta5_16,beta6_16,beta7_16,beta0_2,beta1_2,beta2_2,beta3_2,beta_m;
llr_t beta0,beta1; llr_t beta0,beta1;
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
fprintf(fdsse4,"compute_beta, %p,%p,%p,%p,framelength %d,F %d\n", fprintf(fdsse4,"compute_beta, %p,%p,%p,%p,framelength %d,F %d\n",
beta,m_11,m_10,alpha,frame_length,F); beta,m_11,m_10,alpha,frame_length,F);
#endif #endif
// termination for beta initialization // termination for beta initialization
// fprintf(fdsse4,"beta init: offset8 %d\n",offset8_flag); // fprintf(fdsse4,"beta init: offset8 %d\n",offset8_flag);
m11=(int16_t)m_11[2+frame_length]; m11=(int16_t)m_11[2+frame_length];
//#ifndef __AVX2__ //#ifndef __AVX2__
...@@ -570,16 +536,13 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh ...@@ -570,16 +536,13 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
fprintf(fdsse4,"m11,m10 %d,%d\n",m11,m10); fprintf(fdsse4,"m11,m10 %d,%d\n",m11,m10);
#endif #endif
beta0 = -m11;//M0T_TERM; beta0 = -m11;//M0T_TERM;
beta1 = m11;//M1T_TERM; beta1 = m11;//M1T_TERM;
m11=(int16_t)m_11[1+frame_length]; m11=(int16_t)m_11[1+frame_length];
m10=(int16_t)m_10[1+frame_length]; m10=(int16_t)m_10[1+frame_length];
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
fprintf(fdsse4,"m11,m10 %d,%d\n",m11,m10); fprintf(fdsse4,"m11,m10 %d,%d\n",m11,m10);
#endif #endif
beta0_2 = beta0-m11;//+M0T_TERM; beta0_2 = beta0-m11;//+M0T_TERM;
beta1_2 = beta0+m11;//+M1T_TERM; beta1_2 = beta0+m11;//+M1T_TERM;
beta2_2 = beta1+m10;//M2T_TERM; beta2_2 = beta1+m10;//M2T_TERM;
...@@ -597,8 +560,6 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh ...@@ -597,8 +560,6 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh
beta5_16 = beta2_2+m10;//+M5T_TERM; beta5_16 = beta2_2+m10;//+M5T_TERM;
beta6_16 = beta3_2+m11;//+M6T_TERM; beta6_16 = beta3_2+m11;//+M6T_TERM;
beta7_16 = beta3_2-m11;//+M7T_TERM; beta7_16 = beta3_2-m11;//+M7T_TERM;
beta_m = (beta0_16>beta1_16) ? beta0_16 : beta1_16; beta_m = (beta0_16>beta1_16) ? beta0_16 : beta1_16;
beta_m = (beta_m>beta2_16) ? beta_m : beta2_16; beta_m = (beta_m>beta2_16) ? beta_m : beta2_16;
beta_m = (beta_m>beta3_16) ? beta_m : beta3_16; beta_m = (beta_m>beta3_16) ? beta_m : beta3_16;
...@@ -606,8 +567,6 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh ...@@ -606,8 +567,6 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh
beta_m = (beta_m>beta5_16) ? beta_m : beta5_16; beta_m = (beta_m>beta5_16) ? beta_m : beta5_16;
beta_m = (beta_m>beta6_16) ? beta_m : beta6_16; beta_m = (beta_m>beta6_16) ? beta_m : beta6_16;
beta_m = (beta_m>beta7_16) ? beta_m : beta7_16; beta_m = (beta_m>beta7_16) ? beta_m : beta7_16;
beta0_16=beta0_16-beta_m; beta0_16=beta0_16-beta_m;
beta1_16=beta1_16-beta_m; beta1_16=beta1_16-beta_m;
beta2_16=beta2_16-beta_m; beta2_16=beta2_16-beta_m;
...@@ -619,12 +578,13 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh ...@@ -619,12 +578,13 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh
for (rerun_flag=0;; rerun_flag=1) { for (rerun_flag=0;; rerun_flag=1) {
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
beta_ptr = (__m128i*)&beta[frame_length<<3]; beta_ptr = (__m128i *)&beta[frame_length<<3];
alpha128 = (__m128i*)&alpha[0]; alpha128 = (__m128i *)&alpha[0];
#elif defined(__arm__) #elif defined(__arm__)
beta_ptr = (int16x8_t*)&beta[frame_length<<3]; beta_ptr = (int16x8_t *)&beta[frame_length<<3];
alpha128 = (int16x8_t*)&alpha[0]; alpha128 = (int16x8_t *)&alpha[0];
#endif #endif
if (rerun_flag == 0) { if (rerun_flag == 0) {
beta_ptr[0] = alpha128[(frame_length)]; beta_ptr[0] = alpha128[(frame_length)];
beta_ptr[1] = alpha128[1+(frame_length)]; beta_ptr[1] = alpha128[1+(frame_length)];
...@@ -636,18 +596,18 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh ...@@ -636,18 +596,18 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh
beta_ptr[7] = alpha128[7+(frame_length)]; beta_ptr[7] = alpha128[7+(frame_length)];
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
fprintf(fdsse4,"beta init \n"); fprintf(fdsse4,"beta init \n");
print_shorts("b0",(int16_t*)&beta_ptr[0]); print_shorts("b0",(int16_t *)&beta_ptr[0]);
print_shorts("b1",(int16_t*)&beta_ptr[1]); print_shorts("b1",(int16_t *)&beta_ptr[1]);
print_shorts("b2",(int16_t*)&beta_ptr[2]); print_shorts("b2",(int16_t *)&beta_ptr[2]);
print_shorts("b3",(int16_t*)&beta_ptr[3]); print_shorts("b3",(int16_t *)&beta_ptr[3]);
print_shorts("b4",(int16_t*)&beta_ptr[4]); print_shorts("b4",(int16_t *)&beta_ptr[4]);
print_shorts("b5",(int16_t*)&beta_ptr[5]); print_shorts("b5",(int16_t *)&beta_ptr[5]);
print_shorts("b6",(int16_t*)&beta_ptr[6]); print_shorts("b6",(int16_t *)&beta_ptr[6]);
print_shorts("b7",(int16_t*)&beta_ptr[7]); print_shorts("b7",(int16_t *)&beta_ptr[7]);
#endif #endif
} else { } else {
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
beta128 = (__m128i*)&beta[0]; beta128 = (__m128i *)&beta[0];
beta_ptr[0] = _mm_srli_si128(beta128[0],2); beta_ptr[0] = _mm_srli_si128(beta128[0],2);
beta_ptr[1] = _mm_srli_si128(beta128[1],2); beta_ptr[1] = _mm_srli_si128(beta128[1],2);
beta_ptr[2] = _mm_srli_si128(beta128[2],2); beta_ptr[2] = _mm_srli_si128(beta128[2],2);
...@@ -657,31 +617,38 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh ...@@ -657,31 +617,38 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh
beta_ptr[6] = _mm_srli_si128(beta128[6],2); beta_ptr[6] = _mm_srli_si128(beta128[6],2);
beta_ptr[7] = _mm_srli_si128(beta128[7],2); beta_ptr[7] = _mm_srli_si128(beta128[7],2);
#elif defined(__arm__) #elif defined(__arm__)
beta128 = (int16x8_t*)&beta[0]; beta128 = (int16x8_t *)&beta[0];
beta_ptr = (int16x8_t*)&beta[frame_length<<3]; beta_ptr = (int16x8_t *)&beta[frame_length<<3];
beta_ptr[0] = (int16x8_t)vshrq_n_s64((int64x2_t)beta128[0],16); beta_ptr[0] = vsetq_lane_s16(beta[3],beta_ptr[0],4); beta_ptr[0] = (int16x8_t)vshrq_n_s64((int64x2_t)beta128[0],16);
beta_ptr[1] = (int16x8_t)vshrq_n_s64((int64x2_t)beta128[1],16); beta_ptr[1] = vsetq_lane_s16(beta[11],beta_ptr[1],4); beta_ptr[0] = vsetq_lane_s16(beta[3],beta_ptr[0],4);
beta_ptr[2] = (int16x8_t)vshrq_n_s64((int64x2_t)beta128[2],16); beta_ptr[2] = vsetq_lane_s16(beta[19],beta_ptr[2],4); beta_ptr[1] = (int16x8_t)vshrq_n_s64((int64x2_t)beta128[1],16);
beta_ptr[3] = (int16x8_t)vshrq_n_s64((int64x2_t)beta128[3],16); beta_ptr[3] = vsetq_lane_s16(beta[27],beta_ptr[3],4); beta_ptr[1] = vsetq_lane_s16(beta[11],beta_ptr[1],4);
beta_ptr[4] = (int16x8_t)vshrq_n_s64((int64x2_t)beta128[4],16); beta_ptr[4] = vsetq_lane_s16(beta[35],beta_ptr[4],4); beta_ptr[2] = (int16x8_t)vshrq_n_s64((int64x2_t)beta128[2],16);
beta_ptr[5] = (int16x8_t)vshrq_n_s64((int64x2_t)beta128[5],16); beta_ptr[5] = vsetq_lane_s16(beta[43],beta_ptr[5],4); beta_ptr[2] = vsetq_lane_s16(beta[19],beta_ptr[2],4);
beta_ptr[6] = (int16x8_t)vshrq_n_s64((int64x2_t)beta128[6],16); beta_ptr[6] = vsetq_lane_s16(beta[51],beta_ptr[6],4); beta_ptr[3] = (int16x8_t)vshrq_n_s64((int64x2_t)beta128[3],16);
beta_ptr[7] = (int16x8_t)vshrq_n_s64((int64x2_t)beta128[7],16); beta_ptr[7] = vsetq_lane_s16(beta[59],beta_ptr[7],4); beta_ptr[3] = vsetq_lane_s16(beta[27],beta_ptr[3],4);
beta_ptr[4] = (int16x8_t)vshrq_n_s64((int64x2_t)beta128[4],16);
beta_ptr[4] = vsetq_lane_s16(beta[35],beta_ptr[4],4);
beta_ptr[5] = (int16x8_t)vshrq_n_s64((int64x2_t)beta128[5],16);
beta_ptr[5] = vsetq_lane_s16(beta[43],beta_ptr[5],4);
beta_ptr[6] = (int16x8_t)vshrq_n_s64((int64x2_t)beta128[6],16);
beta_ptr[6] = vsetq_lane_s16(beta[51],beta_ptr[6],4);
beta_ptr[7] = (int16x8_t)vshrq_n_s64((int64x2_t)beta128[7],16);
beta_ptr[7] = vsetq_lane_s16(beta[59],beta_ptr[7],4);
#endif #endif
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
fprintf(fdsse4,"beta init (second run) \n"); fprintf(fdsse4,"beta init (second run) \n");
print_shorts("b0",(int16_t*)&beta_ptr[0]); print_shorts("b0",(int16_t *)&beta_ptr[0]);
print_shorts("b1",(int16_t*)&beta_ptr[1]); print_shorts("b1",(int16_t *)&beta_ptr[1]);
print_shorts("b2",(int16_t*)&beta_ptr[2]); print_shorts("b2",(int16_t *)&beta_ptr[2]);
print_shorts("b3",(int16_t*)&beta_ptr[3]); print_shorts("b3",(int16_t *)&beta_ptr[3]);
print_shorts("b4",(int16_t*)&beta_ptr[4]); print_shorts("b4",(int16_t *)&beta_ptr[4]);
print_shorts("b5",(int16_t*)&beta_ptr[5]); print_shorts("b5",(int16_t *)&beta_ptr[5]);
print_shorts("b6",(int16_t*)&beta_ptr[6]); print_shorts("b6",(int16_t *)&beta_ptr[6]);
print_shorts("b7",(int16_t*)&beta_ptr[7]); print_shorts("b7",(int16_t *)&beta_ptr[7]);
#endif #endif
} }
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
beta_ptr[0] = _mm_insert_epi16(beta_ptr[0],beta0_16,7); beta_ptr[0] = _mm_insert_epi16(beta_ptr[0],beta0_16,7);
beta_ptr[1] = _mm_insert_epi16(beta_ptr[1],beta1_16,7); beta_ptr[1] = _mm_insert_epi16(beta_ptr[1],beta1_16,7);
...@@ -701,26 +668,23 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh ...@@ -701,26 +668,23 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh
beta_ptr[6] = vsetq_lane_s16(beta6_16,beta_ptr[6],7); beta_ptr[6] = vsetq_lane_s16(beta6_16,beta_ptr[6],7);
beta_ptr[7] = vsetq_lane_s16(beta7_16,beta_ptr[7],7); beta_ptr[7] = vsetq_lane_s16(beta7_16,beta_ptr[7],7);
#endif #endif
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
fprintf(fdsse4,"beta init (after insert) \n"); fprintf(fdsse4,"beta init (after insert) \n");
print_shorts("b0",(int16_t*)&beta_ptr[0]); print_shorts("b0",(int16_t *)&beta_ptr[0]);
print_shorts("b1",(int16_t*)&beta_ptr[1]); print_shorts("b1",(int16_t *)&beta_ptr[1]);
print_shorts("b2",(int16_t*)&beta_ptr[2]); print_shorts("b2",(int16_t *)&beta_ptr[2]);
print_shorts("b3",(int16_t*)&beta_ptr[3]); print_shorts("b3",(int16_t *)&beta_ptr[3]);
print_shorts("b4",(int16_t*)&beta_ptr[4]); print_shorts("b4",(int16_t *)&beta_ptr[4]);
print_shorts("b5",(int16_t*)&beta_ptr[5]); print_shorts("b5",(int16_t *)&beta_ptr[5]);
print_shorts("b6",(int16_t*)&beta_ptr[6]); print_shorts("b6",(int16_t *)&beta_ptr[6]);
print_shorts("b7",(int16_t*)&beta_ptr[7]); print_shorts("b7",(int16_t *)&beta_ptr[7]);
#endif #endif
int loopval=((rerun_flag==0)?0:((frame_length-L)>>3)); int loopval=((rerun_flag==0)?0:((frame_length-L)>>3));
for (k=(frame_length>>3)-1; k>=loopval; k--) { for (k=(frame_length>>3)-1; k>=loopval; k--) {
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
m11_128=((__m128i*)m_11)[k]; m11_128=((__m128i *)m_11)[k];
m10_128=((__m128i*)m_10)[k]; m10_128=((__m128i *)m_10)[k];
//#ifndef __AVX2__ //#ifndef __AVX2__
#if 1 #if 1
m_b0 = _mm_adds_epi16(beta_ptr[4],m11_128); //m11 m_b0 = _mm_adds_epi16(beta_ptr[4],m11_128); //m11
...@@ -731,8 +695,6 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh ...@@ -731,8 +695,6 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh
m_b5 = _mm_subs_epi16(beta_ptr[6],m10_128); //m01 m_b5 = _mm_subs_epi16(beta_ptr[6],m10_128); //m01
m_b6 = _mm_subs_epi16(beta_ptr[7],m11_128); //m00 m_b6 = _mm_subs_epi16(beta_ptr[7],m11_128); //m00
m_b7 = _mm_adds_epi16(beta_ptr[7],m11_128); //m11 m_b7 = _mm_adds_epi16(beta_ptr[7],m11_128); //m11
new0 = _mm_subs_epi16(beta_ptr[0],m11_128); //m00 new0 = _mm_subs_epi16(beta_ptr[0],m11_128); //m00
new1 = _mm_adds_epi16(beta_ptr[0],m11_128); //m11 new1 = _mm_adds_epi16(beta_ptr[0],m11_128); //m11
new2 = _mm_adds_epi16(beta_ptr[1],m10_128); //m10 new2 = _mm_adds_epi16(beta_ptr[1],m10_128); //m10
...@@ -741,16 +703,13 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh ...@@ -741,16 +703,13 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh
new5 = _mm_adds_epi16(beta_ptr[2],m10_128); //m10 new5 = _mm_adds_epi16(beta_ptr[2],m10_128); //m10
new6 = _mm_adds_epi16(beta_ptr[3],m11_128); //m11 new6 = _mm_adds_epi16(beta_ptr[3],m11_128); //m11
new7 = _mm_subs_epi16(beta_ptr[3],m11_128); //m00 new7 = _mm_subs_epi16(beta_ptr[3],m11_128); //m00
#else #else
b01=_mm256_load_si256(&((_m256i*)beta_ptr)[0]); b01=_mm256_load_si256(&((_m256i *)beta_ptr)[0]);
b23=_mm256_load_si256(&((_m256i*)beta_ptr)[1]); b23=_mm256_load_si256(&((_m256i *)beta_ptr)[1]);
b45=_mm256_load_si256(&((_m256i*)beta_ptr)[2]); b45=_mm256_load_si256(&((_m256i *)beta_ptr)[2]);
b67=_mm256_load_si256(&((_m256i*)beta_ptr)[3]); b67=_mm256_load_si256(&((_m256i *)beta_ptr)[3]);
m11m10_256 = _mm256_insertf128_si256(m11m10_256,m11_128,0); m11m10_256 = _mm256_insertf128_si256(m11m10_256,m11_128,0);
m11m10_256 = _mm256_insertf128_si256(m11m10_256,m10_128,1); m11m10_256 = _mm256_insertf128_si256(m11m10_256,m10_128,1);
m_b02 = _mm256_adds_epi16(b45,m11m10_256); //negative m10 m_b02 = _mm256_adds_epi16(b45,m11m10_256); //negative m10
m_b13 = _mm256_subs_epi16(b45,m11m10_256); //negative m10 m_b13 = _mm256_subs_epi16(b45,m11m10_256); //negative m10
m_b64 = _mm256_subs_epi16(b67,m11m10_256); //negative m10 m_b64 = _mm256_subs_epi16(b67,m11m10_256); //negative m10
...@@ -760,9 +719,7 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh ...@@ -760,9 +719,7 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh
new64 = _mm256_adds_epi16(b23,m11m10_256); //negative m10 new64 = _mm256_adds_epi16(b23,m11m10_256); //negative m10
new75 = _mm256_subs_epi16(b24,m11m10_256); //negative m10 new75 = _mm256_subs_epi16(b24,m11m10_256); //negative m10
#endif #endif
beta_ptr-=8; beta_ptr-=8;
//#ifndef __AVX2__ //#ifndef __AVX2__
#if 1 #if 1
beta_ptr[0] = _mm_max_epi16(m_b0,new0); beta_ptr[0] = _mm_max_epi16(m_b0,new0);
...@@ -773,7 +730,6 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh ...@@ -773,7 +730,6 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh
beta_ptr[5] = _mm_max_epi16(m_b5,new5); beta_ptr[5] = _mm_max_epi16(m_b5,new5);
beta_ptr[6] = _mm_max_epi16(m_b6,new6); beta_ptr[6] = _mm_max_epi16(m_b6,new6);
beta_ptr[7] = _mm_max_epi16(m_b7,new7); beta_ptr[7] = _mm_max_epi16(m_b7,new7);
beta_max = _mm_max_epi16(beta_ptr[0],beta_ptr[1]); beta_max = _mm_max_epi16(beta_ptr[0],beta_ptr[1]);
beta_max = _mm_max_epi16(beta_max ,beta_ptr[2]); beta_max = _mm_max_epi16(beta_max ,beta_ptr[2]);
beta_max = _mm_max_epi16(beta_max ,beta_ptr[3]); beta_max = _mm_max_epi16(beta_max ,beta_ptr[3]);
...@@ -781,7 +737,6 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh ...@@ -781,7 +737,6 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh
beta_max = _mm_max_epi16(beta_max ,beta_ptr[5]); beta_max = _mm_max_epi16(beta_max ,beta_ptr[5]);
beta_max = _mm_max_epi16(beta_max ,beta_ptr[6]); beta_max = _mm_max_epi16(beta_max ,beta_ptr[6]);
beta_max = _mm_max_epi16(beta_max ,beta_ptr[7]); beta_max = _mm_max_epi16(beta_max ,beta_ptr[7]);
beta_ptr[0] = _mm_subs_epi16(beta_ptr[0],beta_max); beta_ptr[0] = _mm_subs_epi16(beta_ptr[0],beta_max);
beta_ptr[1] = _mm_subs_epi16(beta_ptr[1],beta_max); beta_ptr[1] = _mm_subs_epi16(beta_ptr[1],beta_max);
beta_ptr[2] = _mm_subs_epi16(beta_ptr[2],beta_max); beta_ptr[2] = _mm_subs_epi16(beta_ptr[2],beta_max);
...@@ -795,26 +750,22 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh ...@@ -795,26 +750,22 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh
b13 = _mm256_max_epi16(m_b13,new13); b13 = _mm256_max_epi16(m_b13,new13);
b64 = _mm256_max_epi16(m_b64,new64); b64 = _mm256_max_epi16(m_b64,new64);
b75 = _mm256_max_epi16(m_b75,new75); b75 = _mm256_max_epi16(m_b75,new75);
beta_max = _mm256_max_epi16(b02,b13); beta_max = _mm256_max_epi16(b02,b13);
beta_max = _mm256_max_epi16(beta_max,b64); beta_max = _mm256_max_epi16(beta_max,b64);
beta_max = _mm256_max_epi16(beta_max,b75); beta_max = _mm256_max_epi16(beta_max,b75);
beta_max = _mm256_max_epi16(beta_max,_mm256_permutevar8x32_epi32(betaa_max,_mm256_set_epi32(3,2,1,0,7,6,5,4))); beta_max = _mm256_max_epi16(beta_max,_mm256_permutevar8x32_epi32(betaa_max,_mm256_set_epi32(3,2,1,0,7,6,5,4)));
b02 = _mm256_subs_epi16(b02,beta_max); b02 = _mm256_subs_epi16(b02,beta_max);
b13 = _mm256_subs_epi16(b13,beta_max); b13 = _mm256_subs_epi16(b13,beta_max);
b64 = _mm256_subs_epi16(b64,beta_max); b64 = _mm256_subs_epi16(b64,beta_max);
b75 = _mm256_subs_epi16(b75,beta_max); b75 = _mm256_subs_epi16(b75,beta_max);
((_m256i *)beta_ptr)[0]) = _mm256_permute2x128_si256(b02,b13,0x02); //b01
((_m256i*)beta_ptr)[0]) = _mm256_permute2x128_si256(b02,b13,0x02); //b01 ((_m256i *)beta_ptr)[1]) = _mm256_permute2x128_si256(b02,b13,0x31); //b23
((_m256i*)beta_ptr)[1]) = _mm256_permute2x128_si256(b02,b13,0x31); //b23 ((_m256i *)beta_ptr)[2]) = _mm256_permute2x128_si256(b64,b75,0x13); //b45
((_m256i*)beta_ptr)[2]) = _mm256_permute2x128_si256(b64,b75,0x13); //b45 ((_m256i *)beta_ptr)[3]) = _mm256_permute2x128_si256(b64,b75,0x20); //b67
((_m256i*)beta_ptr)[3]) = _mm256_permute2x128_si256(b64,b75,0x20); //b67
#endif #endif
#elif defined(__arm__) #elif defined(__arm__)
m11_128=((int16x8_t*)m_11)[k]; m11_128=((int16x8_t *)m_11)[k];
m10_128=((int16x8_t*)m_10)[k]; m10_128=((int16x8_t *)m_10)[k];
m_b0 = vqaddq_s16(beta_ptr[4],m11_128); //m11 m_b0 = vqaddq_s16(beta_ptr[4],m11_128); //m11
m_b1 = vqsubq_s16(beta_ptr[4],m11_128); //m00 m_b1 = vqsubq_s16(beta_ptr[4],m11_128); //m00
m_b2 = vqsubq_s16(beta_ptr[5],m10_128); //m01 m_b2 = vqsubq_s16(beta_ptr[5],m10_128); //m01
...@@ -823,7 +774,6 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh ...@@ -823,7 +774,6 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh
m_b5 = vqsubq_s16(beta_ptr[6],m10_128); //m01 m_b5 = vqsubq_s16(beta_ptr[6],m10_128); //m01
m_b6 = vqsubq_s16(beta_ptr[7],m11_128); //m00 m_b6 = vqsubq_s16(beta_ptr[7],m11_128); //m00
m_b7 = vqaddq_s16(beta_ptr[7],m11_128); //m11 m_b7 = vqaddq_s16(beta_ptr[7],m11_128); //m11
new0 = vqsubq_s16(beta_ptr[0],m11_128); //m00 new0 = vqsubq_s16(beta_ptr[0],m11_128); //m00
new1 = vqaddq_s16(beta_ptr[0],m11_128); //m11 new1 = vqaddq_s16(beta_ptr[0],m11_128); //m11
new2 = vqaddq_s16(beta_ptr[1],m10_128); //m10 new2 = vqaddq_s16(beta_ptr[1],m10_128); //m10
...@@ -832,9 +782,7 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh ...@@ -832,9 +782,7 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh
new5 = vqaddq_s16(beta_ptr[2],m10_128); //m10 new5 = vqaddq_s16(beta_ptr[2],m10_128); //m10
new6 = vqaddq_s16(beta_ptr[3],m11_128); //m11 new6 = vqaddq_s16(beta_ptr[3],m11_128); //m11
new7 = vqsubq_s16(beta_ptr[3],m11_128); //m00 new7 = vqsubq_s16(beta_ptr[3],m11_128); //m00
beta_ptr-=8; beta_ptr-=8;
beta_ptr[0] = vmaxq_s16(m_b0,new0); beta_ptr[0] = vmaxq_s16(m_b0,new0);
beta_ptr[1] = vmaxq_s16(m_b1,new1); beta_ptr[1] = vmaxq_s16(m_b1,new1);
beta_ptr[2] = vmaxq_s16(m_b2,new2); beta_ptr[2] = vmaxq_s16(m_b2,new2);
...@@ -843,7 +791,6 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh ...@@ -843,7 +791,6 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh
beta_ptr[5] = vmaxq_s16(m_b5,new5); beta_ptr[5] = vmaxq_s16(m_b5,new5);
beta_ptr[6] = vmaxq_s16(m_b6,new6); beta_ptr[6] = vmaxq_s16(m_b6,new6);
beta_ptr[7] = vmaxq_s16(m_b7,new7); beta_ptr[7] = vmaxq_s16(m_b7,new7);
beta_max = vmaxq_s16(beta_ptr[0],beta_ptr[1]); beta_max = vmaxq_s16(beta_ptr[0],beta_ptr[1]);
beta_max = vmaxq_s16(beta_max ,beta_ptr[2]); beta_max = vmaxq_s16(beta_max ,beta_ptr[2]);
beta_max = vmaxq_s16(beta_max ,beta_ptr[3]); beta_max = vmaxq_s16(beta_max ,beta_ptr[3]);
...@@ -851,7 +798,6 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh ...@@ -851,7 +798,6 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh
beta_max = vmaxq_s16(beta_max ,beta_ptr[5]); beta_max = vmaxq_s16(beta_max ,beta_ptr[5]);
beta_max = vmaxq_s16(beta_max ,beta_ptr[6]); beta_max = vmaxq_s16(beta_max ,beta_ptr[6]);
beta_max = vmaxq_s16(beta_max ,beta_ptr[7]); beta_max = vmaxq_s16(beta_max ,beta_ptr[7]);
beta_ptr[0] = vqsubq_s16(beta_ptr[0],beta_max); beta_ptr[0] = vqsubq_s16(beta_ptr[0],beta_max);
beta_ptr[1] = vqsubq_s16(beta_ptr[1],beta_max); beta_ptr[1] = vqsubq_s16(beta_ptr[1],beta_max);
beta_ptr[2] = vqsubq_s16(beta_ptr[2],beta_max); beta_ptr[2] = vqsubq_s16(beta_ptr[2],beta_max);
...@@ -861,20 +807,18 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh ...@@ -861,20 +807,18 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh
beta_ptr[6] = vqsubq_s16(beta_ptr[6],beta_max); beta_ptr[6] = vqsubq_s16(beta_ptr[6],beta_max);
beta_ptr[7] = vqsubq_s16(beta_ptr[7],beta_max); beta_ptr[7] = vqsubq_s16(beta_ptr[7],beta_max);
#endif #endif
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
fprintf(fdsse4,"Loop index %d, mb\n",k); fprintf(fdsse4,"Loop index %d, mb\n",k);
fprintf(fdsse4,"beta init (after max)\n"); fprintf(fdsse4,"beta init (after max)\n");
print_shorts("b0",(int16_t*)&beta_ptr[0]); print_shorts("b0",(int16_t *)&beta_ptr[0]);
print_shorts("b1",(int16_t*)&beta_ptr[1]); print_shorts("b1",(int16_t *)&beta_ptr[1]);
print_shorts("b2",(int16_t*)&beta_ptr[2]); print_shorts("b2",(int16_t *)&beta_ptr[2]);
print_shorts("b3",(int16_t*)&beta_ptr[3]); print_shorts("b3",(int16_t *)&beta_ptr[3]);
print_shorts("b4",(int16_t*)&beta_ptr[4]); print_shorts("b4",(int16_t *)&beta_ptr[4]);
print_shorts("b5",(int16_t*)&beta_ptr[5]); print_shorts("b5",(int16_t *)&beta_ptr[5]);
print_shorts("b6",(int16_t*)&beta_ptr[6]); print_shorts("b6",(int16_t *)&beta_ptr[6]);
print_shorts("b7",(int16_t*)&beta_ptr[7]); print_shorts("b7",(int16_t *)&beta_ptr[7]);
#endif #endif
} }
if (rerun_flag==1) if (rerun_flag==1)
...@@ -882,8 +826,7 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh ...@@ -882,8 +826,7 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh
} }
} }
void compute_ext16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, llr_t* systematic,unsigned short frame_length) void compute_ext16(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,llr_t *ext, llr_t *systematic,unsigned short frame_length) {
{
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
__m128i *alpha128=(__m128i *)alpha; __m128i *alpha128=(__m128i *)alpha;
__m128i *beta128=(__m128i *)beta; __m128i *beta128=(__m128i *)beta;
...@@ -903,28 +846,21 @@ void compute_ext16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, ...@@ -903,28 +846,21 @@ void compute_ext16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext,
int16x8_t m10_1,m10_2,m10_3,m10_4; int16x8_t m10_1,m10_2,m10_3,m10_4;
int16x8_t m11_1,m11_2,m11_3,m11_4; int16x8_t m11_1,m11_2,m11_3,m11_4;
#endif #endif
int k; int k;
// //
// LLR computation, 8 consequtive bits per loop // LLR computation, 8 consequtive bits per loop
// //
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
fprintf(fdsse4,"compute_ext (sse_16bit), %p, %p, %p, %p, %p, %p ,framelength %d\n",alpha,beta,m_11,m_10,ext,systematic,frame_length); fprintf(fdsse4,"compute_ext (sse_16bit), %p, %p, %p, %p, %p, %p ,framelength %d\n",alpha,beta,m_11,m_10,ext,systematic,frame_length);
#endif #endif
alpha_ptr = alpha128; alpha_ptr = alpha128;
beta_ptr = &beta128[8]; beta_ptr = &beta128[8];
for (k=0; k<(frame_length>>3); k++) { for (k=0; k<(frame_length>>3); k++) {
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
m11_128 = (__m128i*)&m_11[k<<3]; m11_128 = (__m128i *)&m_11[k<<3];
m10_128 = (__m128i*)&m_10[k<<3]; m10_128 = (__m128i *)&m_10[k<<3];
ext_128 = (__m128i*)&ext[k<<3]; ext_128 = (__m128i *)&ext[k<<3];
/* /*
fprintf(fdsse4,"EXT %03d\n",k); fprintf(fdsse4,"EXT %03d\n",k);
print_shorts("a0:",&alpha_ptr[0]); print_shorts("a0:",&alpha_ptr[0]);
...@@ -944,7 +880,6 @@ void compute_ext16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, ...@@ -944,7 +880,6 @@ void compute_ext16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext,
print_shorts("b6:",&beta_ptr[6]); print_shorts("b6:",&beta_ptr[6]);
print_shorts("b7:",&beta_ptr[7]); print_shorts("b7:",&beta_ptr[7]);
*/ */
//#ifndef __AVX2__ //#ifndef __AVX2__
#if 1 #if 1
m00_4 = _mm_adds_epi16(alpha_ptr[7],beta_ptr[3]); //ALPHA_BETA_4m00; m00_4 = _mm_adds_epi16(alpha_ptr[7],beta_ptr[3]); //ALPHA_BETA_4m00;
...@@ -964,31 +899,23 @@ void compute_ext16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, ...@@ -964,31 +899,23 @@ void compute_ext16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext,
m10_1 = _mm_adds_epi16(alpha_ptr[2],beta_ptr[1]); //ALPHA_BETA_1m10; m10_1 = _mm_adds_epi16(alpha_ptr[2],beta_ptr[1]); //ALPHA_BETA_1m10;
m01_1 = _mm_adds_epi16(alpha_ptr[2],beta_ptr[5]); //ALPHA_BETA_1m01; m01_1 = _mm_adds_epi16(alpha_ptr[2],beta_ptr[5]); //ALPHA_BETA_1m01;
#else #else
m00_1 = _mm_adds_epi16(alpha_ptr[0],beta_ptr[0]); //ALPHA_BETA_1m00; m00_1 = _mm_adds_epi16(alpha_ptr[0],beta_ptr[0]); //ALPHA_BETA_1m00;
m10_1 = _mm_adds_epi16(alpha_ptr[2],beta_ptr[1]); //ALPHA_BETA_1m10; m10_1 = _mm_adds_epi16(alpha_ptr[2],beta_ptr[1]); //ALPHA_BETA_1m10;
m11_1 = _mm_adds_epi16(alpha_ptr[0],beta_ptr[4]); //ALPHA_BETA_1m11; m11_1 = _mm_adds_epi16(alpha_ptr[0],beta_ptr[4]); //ALPHA_BETA_1m11;
m01_1 = _mm_adds_epi16(alpha_ptr[2],beta_ptr[5]); //ALPHA_BETA_1m01; m01_1 = _mm_adds_epi16(alpha_ptr[2],beta_ptr[5]); //ALPHA_BETA_1m01;
m11_2 = _mm_adds_epi16(alpha_ptr[1],beta_ptr[0]); //ALPHA_BETA_2m11; m11_2 = _mm_adds_epi16(alpha_ptr[1],beta_ptr[0]); //ALPHA_BETA_2m11;
m01_2 = _mm_adds_epi16(alpha_ptr[3],beta_ptr[1]); //ALPHA_BETA_2m01; m01_2 = _mm_adds_epi16(alpha_ptr[3],beta_ptr[1]); //ALPHA_BETA_2m01;
m00_2 = _mm_adds_epi16(alpha_ptr[1],beta_ptr[4]); //ALPHA_BETA_2m00; m00_2 = _mm_adds_epi16(alpha_ptr[1],beta_ptr[4]); //ALPHA_BETA_2m00;
m10_2 = _mm_adds_epi16(alpha_ptr[3],beta_ptr[5]); //ALPHA_BETA_2m10; m10_2 = _mm_adds_epi16(alpha_ptr[3],beta_ptr[5]); //ALPHA_BETA_2m10;
m11_3 = _mm_adds_epi16(alpha_ptr[6],beta_ptr[3]); //ALPHA_BETA_3m11; m11_3 = _mm_adds_epi16(alpha_ptr[6],beta_ptr[3]); //ALPHA_BETA_3m11;
m01_3 = _mm_adds_epi16(alpha_ptr[4],beta_ptr[2]); //ALPHA_BETA_3m01; m01_3 = _mm_adds_epi16(alpha_ptr[4],beta_ptr[2]); //ALPHA_BETA_3m01;
m00_3 = _mm_adds_epi16(alpha_ptr[6],beta_ptr[7]); //ALPHA_BETA_3m00; m00_3 = _mm_adds_epi16(alpha_ptr[6],beta_ptr[7]); //ALPHA_BETA_3m00;
m10_3 = _mm_adds_epi16(alpha_ptr[4],beta_ptr[6]); //ALPHA_BETA_3m10; m10_3 = _mm_adds_epi16(alpha_ptr[4],beta_ptr[6]); //ALPHA_BETA_3m10;
m00_4 = _mm_adds_epi16(alpha_ptr[7],beta_ptr[3]); //ALPHA_BETA_4m00; m00_4 = _mm_adds_epi16(alpha_ptr[7],beta_ptr[3]); //ALPHA_BETA_4m00;
m10_4 = _mm_adds_epi16(alpha_ptr[5],beta_ptr[2]); //ALPHA_BETA_4m10; m10_4 = _mm_adds_epi16(alpha_ptr[5],beta_ptr[2]); //ALPHA_BETA_4m10;
m11_4 = _mm_adds_epi16(alpha_ptr[7],beta_ptr[7]); //ALPHA_BETA_4m11; m11_4 = _mm_adds_epi16(alpha_ptr[7],beta_ptr[7]); //ALPHA_BETA_4m11;
m01_4 = _mm_adds_epi16(alpha_ptr[5],beta_ptr[6]); //ALPHA_BETA_4m01; m01_4 = _mm_adds_epi16(alpha_ptr[5],beta_ptr[6]); //ALPHA_BETA_4m01;
#endif #endif
/* /*
print_shorts("m11_1:",&m11_1); print_shorts("m11_1:",&m11_1);
print_shorts("m11_2:",&m11_2); print_shorts("m11_2:",&m11_2);
...@@ -1019,36 +946,30 @@ void compute_ext16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, ...@@ -1019,36 +946,30 @@ void compute_ext16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext,
m11_1 = _mm_max_epi16(m11_1,m11_2); m11_1 = _mm_max_epi16(m11_1,m11_2);
m11_1 = _mm_max_epi16(m11_1,m11_3); m11_1 = _mm_max_epi16(m11_1,m11_3);
m11_1 = _mm_max_epi16(m11_1,m11_4); m11_1 = _mm_max_epi16(m11_1,m11_4);
// print_shorts("m11_1:",&m11_1); // print_shorts("m11_1:",&m11_1);
m01_1 = _mm_subs_epi16(m01_1,*m10_128); m01_1 = _mm_subs_epi16(m01_1,*m10_128);
m00_1 = _mm_subs_epi16(m00_1,*m11_128); m00_1 = _mm_subs_epi16(m00_1,*m11_128);
m10_1 = _mm_adds_epi16(m10_1,*m10_128); m10_1 = _mm_adds_epi16(m10_1,*m10_128);
m11_1 = _mm_adds_epi16(m11_1,*m11_128); m11_1 = _mm_adds_epi16(m11_1,*m11_128);
// print_shorts("m10_1:",&m10_1); // print_shorts("m10_1:",&m10_1);
// print_shorts("m11_1:",&m11_1); // print_shorts("m11_1:",&m11_1);
m01_1 = _mm_max_epi16(m01_1,m00_1); m01_1 = _mm_max_epi16(m01_1,m00_1);
m10_1 = _mm_max_epi16(m10_1,m11_1); m10_1 = _mm_max_epi16(m10_1,m11_1);
// print_shorts("m01_1:",&m01_1); // print_shorts("m01_1:",&m01_1);
// print_shorts("m10_1:",&m10_1); // print_shorts("m10_1:",&m10_1);
*ext_128 = _mm_subs_epi16(m10_1,m01_1); *ext_128 = _mm_subs_epi16(m10_1,m01_1);
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
fprintf(fdsse4,"ext %p\n",ext_128); fprintf(fdsse4,"ext %p\n",ext_128);
print_shorts("ext:",(int16_t*)ext_128); print_shorts("ext:",(int16_t *)ext_128);
print_shorts("m11:",(int16_t*)m11_128); print_shorts("m11:",(int16_t *)m11_128);
print_shorts("m10:",(int16_t*)m10_128); print_shorts("m10:",(int16_t *)m10_128);
print_shorts("m10_1:",(int16_t*)&m10_1); print_shorts("m10_1:",(int16_t *)&m10_1);
print_shorts("m01_1:",(int16_t*)&m01_1); print_shorts("m01_1:",(int16_t *)&m01_1);
#endif #endif
#elif defined(__arm__) #elif defined(__arm__)
m11_128 = (int16x8_t*)&m_11[k<<3]; m11_128 = (int16x8_t *)&m_11[k<<3];
m10_128 = (int16x8_t*)&m_10[k<<3]; m10_128 = (int16x8_t *)&m_10[k<<3];
ext_128 = (int16x8_t*)&ext[k<<3]; ext_128 = (int16x8_t *)&ext[k<<3];
m00_4 = vqaddq_s16(alpha_ptr[7],beta_ptr[3]); //ALPHA_BETA_4m00; m00_4 = vqaddq_s16(alpha_ptr[7],beta_ptr[3]); //ALPHA_BETA_4m00;
m11_4 = vqaddq_s16(alpha_ptr[7],beta_ptr[7]); //ALPHA_BETA_4m11; m11_4 = vqaddq_s16(alpha_ptr[7],beta_ptr[7]); //ALPHA_BETA_4m11;
m00_3 = vqaddq_s16(alpha_ptr[6],beta_ptr[7]); //ALPHA_BETA_3m00; m00_3 = vqaddq_s16(alpha_ptr[6],beta_ptr[7]); //ALPHA_BETA_3m00;
...@@ -1065,7 +986,6 @@ void compute_ext16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, ...@@ -1065,7 +986,6 @@ void compute_ext16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext,
m10_2 = vqaddq_s16(alpha_ptr[3],beta_ptr[5]); //ALPHA_BETA_2m10; m10_2 = vqaddq_s16(alpha_ptr[3],beta_ptr[5]); //ALPHA_BETA_2m10;
m10_1 = vqaddq_s16(alpha_ptr[2],beta_ptr[1]); //ALPHA_BETA_1m10; m10_1 = vqaddq_s16(alpha_ptr[2],beta_ptr[1]); //ALPHA_BETA_1m10;
m01_1 = vqaddq_s16(alpha_ptr[2],beta_ptr[5]); //ALPHA_BETA_1m01; m01_1 = vqaddq_s16(alpha_ptr[2],beta_ptr[5]); //ALPHA_BETA_1m01;
m01_1 = vmaxq_s16(m01_1,m01_2); m01_1 = vmaxq_s16(m01_1,m01_2);
m01_1 = vmaxq_s16(m01_1,m01_3); m01_1 = vmaxq_s16(m01_1,m01_3);
m01_1 = vmaxq_s16(m01_1,m01_4); m01_1 = vmaxq_s16(m01_1,m01_4);
...@@ -1078,18 +998,12 @@ void compute_ext16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, ...@@ -1078,18 +998,12 @@ void compute_ext16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext,
m11_1 = vmaxq_s16(m11_1,m11_2); m11_1 = vmaxq_s16(m11_1,m11_2);
m11_1 = vmaxq_s16(m11_1,m11_3); m11_1 = vmaxq_s16(m11_1,m11_3);
m11_1 = vmaxq_s16(m11_1,m11_4); m11_1 = vmaxq_s16(m11_1,m11_4);
m01_1 = vqsubq_s16(m01_1,*m10_128); m01_1 = vqsubq_s16(m01_1,*m10_128);
m00_1 = vqsubq_s16(m00_1,*m11_128); m00_1 = vqsubq_s16(m00_1,*m11_128);
m10_1 = vqaddq_s16(m10_1,*m10_128); m10_1 = vqaddq_s16(m10_1,*m10_128);
m11_1 = vqaddq_s16(m11_1,*m11_128); m11_1 = vqaddq_s16(m11_1,*m11_128);
m01_1 = vmaxq_s16(m01_1,m00_1); m01_1 = vmaxq_s16(m01_1,m00_1);
m10_1 = vmaxq_s16(m10_1,m11_1); m10_1 = vmaxq_s16(m10_1,m11_1);
*ext_128 = vqsubq_s16(m10_1,m01_1); *ext_128 = vqsubq_s16(m10_1,m01_1);
#endif #endif
alpha_ptr+=8; alpha_ptr+=8;
...@@ -1102,8 +1016,7 @@ void compute_ext16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, ...@@ -1102,8 +1016,7 @@ void compute_ext16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext,
//int pi2[n],pi3[n+8],pi5[n+8],pi4[n+8],pi6[n+8], //int pi2[n],pi3[n+8],pi5[n+8],pi4[n+8],pi6[n+8],
int *pi2tab16[188],*pi5tab16[188],*pi4tab16[188],*pi6tab16[188]; int *pi2tab16[188],*pi5tab16[188],*pi4tab16[188],*pi6tab16[188];
void free_td16(void) void free_td16(void) {
{
int ind; int ind;
for (ind=0; ind<188; ind++) { for (ind=0; ind<188; ind++) {
...@@ -1114,14 +1027,11 @@ void free_td16(void) ...@@ -1114,14 +1027,11 @@ void free_td16(void)
} }
} }
void init_td16(void) void init_td16(void) {
{
int ind,i,i2,i3,j,n,pi,pi3; int ind,i,i2,i3,j,n,pi,pi3;
short * base_interleaver; short *base_interleaver;
for (ind=0; ind<188; ind++) { for (ind=0; ind<188; ind++) {
n = f1f2mat[ind].nb_bits; n = f1f2mat[ind].nb_bits;
base_interleaver=il_tb+f1f2mat[ind].beg_index; base_interleaver=il_tb+f1f2mat[ind].beg_index;
#ifdef MEX #ifdef MEX
...@@ -1141,10 +1051,8 @@ void init_td16(void) ...@@ -1141,10 +1051,8 @@ void init_td16(void)
j=i2; j=i2;
for (i3=0; i3<(n>>3); i3++,i++,j+=8) { for (i3=0; i3<(n>>3); i3++,i++,j+=8) {
// if (j>=n) // if (j>=n)
// j-=(n-1); // j-=(n-1);
pi2tab16[ind][i] = j; pi2tab16[ind][i] = j;
// fprintf(fdsse4,"pi2[%d] = %d\n",i,j); // fprintf(fdsse4,"pi2[%d] = %d\n",i,j);
} }
...@@ -1157,71 +1065,59 @@ void init_td16(void) ...@@ -1157,71 +1065,59 @@ void init_td16(void)
pi5tab16[ind][pi3] = pi2tab16[ind][i]; pi5tab16[ind][pi3] = pi2tab16[ind][i];
pi6tab16[ind][pi] = pi2tab16[ind][i]; pi6tab16[ind][pi] = pi2tab16[ind][i];
} }
} }
} }
uint8_t phy_threegpplte_turbo_decoder16(int16_t *y, uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
int16_t *y2, int16_t *y2,
uint8_t *decoded_bytes, uint8_t *decoded_bytes,
uint8_t *decoded_bytes2, uint8_t *decoded_bytes2,
uint16_t n, uint16_t n,
uint8_t max_iterations, uint8_t max_iterations,
uint8_t crc_type, uint8_t crc_type,
uint8_t F, uint8_t F,
time_stats_t *init_stats, time_stats_t *init_stats,
time_stats_t *alpha_stats, time_stats_t *alpha_stats,
time_stats_t *beta_stats, time_stats_t *beta_stats,
time_stats_t *gamma_stats, time_stats_t *gamma_stats,
time_stats_t *ext_stats, time_stats_t *ext_stats,
time_stats_t *intl1_stats, time_stats_t *intl1_stats,
time_stats_t *intl2_stats) { time_stats_t *intl2_stats) {
/* y is a pointer to the input /* y is a pointer to the input
decoded_bytes is a pointer to the decoded output decoded_bytes is a pointer to the decoded output
n is the size in bits of the coded block, with the tail */ n is the size in bits of the coded block, with the tail */
llr_t systematic0[n+16] __attribute__ ((aligned(32))); llr_t systematic0[n+16] __attribute__ ((aligned(32)));
llr_t systematic1[n+16] __attribute__ ((aligned(32))); llr_t systematic1[n+16] __attribute__ ((aligned(32)));
llr_t systematic2[n+16] __attribute__ ((aligned(32))); llr_t systematic2[n+16] __attribute__ ((aligned(32)));
llr_t yparity1[n+16] __attribute__ ((aligned(32))); llr_t yparity1[n+16] __attribute__ ((aligned(32)));
llr_t yparity2[n+16] __attribute__ ((aligned(32))); llr_t yparity2[n+16] __attribute__ ((aligned(32)));
llr_t ext[n+128] __attribute__((aligned(32))); llr_t ext[n+128] __attribute__((aligned(32)));
llr_t ext2[n+128] __attribute__((aligned(32))); llr_t ext2[n+128] __attribute__((aligned(32)));
llr_t alpha[(n+16)*8] __attribute__ ((aligned(32))); llr_t alpha[(n+16)*8] __attribute__ ((aligned(32)));
llr_t beta[(n+16)*8] __attribute__ ((aligned(32))); llr_t beta[(n+16)*8] __attribute__ ((aligned(32)));
llr_t m11[n+32] __attribute__ ((aligned(32))); llr_t m11[n+32] __attribute__ ((aligned(32)));
llr_t m10[n+32] __attribute__ ((aligned(32))); llr_t m10[n+32] __attribute__ ((aligned(32)));
int *pi2_p,*pi4_p,*pi5_p,*pi6_p; int *pi2_p,*pi4_p,*pi5_p,*pi6_p;
llr_t *s,*s1,*s2,*yp1,*yp2,*yp; llr_t *s,*s1,*s2,*yp1,*yp2,*yp;
unsigned int i,j,iind;//,pi; unsigned int i,j,iind;//,pi;
unsigned char iteration_cnt=0; unsigned char iteration_cnt=0;
unsigned int crc,oldcrc,crc_len; unsigned int crc,oldcrc,crc_len;
uint8_t temp; uint8_t temp;
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
__m128i *yp128; __m128i *yp128;
__m128i tmp, zeros=_mm_setzero_si128(); __m128i tmp, zeros=_mm_setzero_si128();
__m128i tmpe; __m128i tmpe;
#elif defined(__arm__) #elif defined(__arm__)
int16x8_t *yp128; int16x8_t *yp128;
// int16x8_t tmp128[(n+8)>>3]; // int16x8_t tmp128[(n+8)>>3];
int16x8_t tmp, zeros=vdupq_n_s16(0); int16x8_t tmp, zeros=vdupq_n_s16(0);
const uint16_t __attribute__ ((aligned (16))) _Powers[8]= const uint16_t __attribute__ ((aligned (16))) _Powers[8]=
{ 1, 2, 4, 8, 16, 32, 64, 128}; { 1, 2, 4, 8, 16, 32, 64, 128};
uint16x8_t Powers= vld1q_u16(_Powers); uint16x8_t Powers= vld1q_u16(_Powers);
#endif #endif
int offset8_flag=0; int offset8_flag=0;
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
fdsse4 = fopen("dump_sse4.txt","w"); fdsse4 = fopen("dump_sse4.txt","w");
printf("tc sse4_16 (y) %p\n",y); printf("tc sse4_16 (y) %p\n",y);
#endif #endif
...@@ -1230,12 +1126,8 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y, ...@@ -1230,12 +1126,8 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
return 255; return 255;
} }
start_meas(init_stats); start_meas(init_stats);
for (iind=0; iind < 188 && f1f2mat[iind].nb_bits != n; iind++); for (iind=0; iind < 188 && f1f2mat[iind].nb_bits != n; iind++);
if ( iind == 188 ) { if ( iind == 188 ) {
...@@ -1244,50 +1136,41 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y, ...@@ -1244,50 +1136,41 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
} }
switch (crc_type) { switch (crc_type) {
case CRC24_A: case CRC24_A:
case CRC24_B: case CRC24_B:
crc_len=3; crc_len=3;
break; break;
case CRC16: case CRC16:
crc_len=2; crc_len=2;
break; break;
case CRC8: case CRC8:
crc_len=1; crc_len=1;
break; break;
default: default:
crc_len=3; crc_len=3;
} }
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
yp128 = (__m128i*)y; yp128 = (__m128i *)y;
#elif defined(__arm__) #elif defined(__arm__)
yp128 = (int16x8_t*)y; yp128 = (int16x8_t *)y;
#endif #endif
s = systematic0; s = systematic0;
s1 = systematic1; s1 = systematic1;
s2 = systematic2; s2 = systematic2;
yp1 = yparity1; yp1 = yparity1;
yp2 = yparity2; yp2 = yparity2;
for (i=0; i<n; i+=8) { for (i=0; i<n; i+=8) {
pi2_p = &pi2tab16[iind][i]; pi2_p = &pi2tab16[iind][i];
j=pi2_p[0]; j=pi2_p[0];
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
tmpe = _mm_load_si128(yp128); tmpe = _mm_load_si128(yp128);
// fprintf(fdsse4,"yp128 %p\n",yp128); // fprintf(fdsse4,"yp128 %p\n",yp128);
// print_shorts("tmpe",(int16_t *)&tmpe); // print_shorts("tmpe",(int16_t *)&tmpe);
s[j] = _mm_extract_epi16(tmpe,0); s[j] = _mm_extract_epi16(tmpe,0);
yp1[j] = _mm_extract_epi16(tmpe,1); yp1[j] = _mm_extract_epi16(tmpe,1);
yp2[j] = _mm_extract_epi16(tmpe,2); yp2[j] = _mm_extract_epi16(tmpe,2);
...@@ -1295,7 +1178,6 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y, ...@@ -1295,7 +1178,6 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
fprintf(fdsse4,"init0: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); fprintf(fdsse4,"init0: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
#endif #endif
j=pi2_p[1]; j=pi2_p[1];
s[j] = _mm_extract_epi16(tmpe,3); s[j] = _mm_extract_epi16(tmpe,3);
yp1[j] = _mm_extract_epi16(tmpe,4); yp1[j] = _mm_extract_epi16(tmpe,4);
yp2[j] = _mm_extract_epi16(tmpe,5); yp2[j] = _mm_extract_epi16(tmpe,5);
...@@ -1303,7 +1185,6 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y, ...@@ -1303,7 +1185,6 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
fprintf(fdsse4,"init1: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); fprintf(fdsse4,"init1: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
#endif #endif
j=pi2_p[2]; j=pi2_p[2];
s[j] = _mm_extract_epi16(tmpe,6); s[j] = _mm_extract_epi16(tmpe,6);
yp1[j] = _mm_extract_epi16(tmpe,7); yp1[j] = _mm_extract_epi16(tmpe,7);
tmpe = _mm_load_si128(&yp128[1]); tmpe = _mm_load_si128(&yp128[1]);
...@@ -1312,7 +1193,6 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y, ...@@ -1312,7 +1193,6 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
fprintf(fdsse4,"init2: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); fprintf(fdsse4,"init2: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
#endif #endif
j=pi2_p[3]; j=pi2_p[3];
s[j] = _mm_extract_epi16(tmpe,1); s[j] = _mm_extract_epi16(tmpe,1);
yp1[j] = _mm_extract_epi16(tmpe,2); yp1[j] = _mm_extract_epi16(tmpe,2);
yp2[j] = _mm_extract_epi16(tmpe,3); yp2[j] = _mm_extract_epi16(tmpe,3);
...@@ -1320,7 +1200,6 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y, ...@@ -1320,7 +1200,6 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
fprintf(fdsse4,"init3: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); fprintf(fdsse4,"init3: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
#endif #endif
j=pi2_p[4]; j=pi2_p[4];
s[j] = _mm_extract_epi16(tmpe,4); s[j] = _mm_extract_epi16(tmpe,4);
yp1[j] = _mm_extract_epi16(tmpe,5); yp1[j] = _mm_extract_epi16(tmpe,5);
yp2[j] = _mm_extract_epi16(tmpe,6); yp2[j] = _mm_extract_epi16(tmpe,6);
...@@ -1328,7 +1207,6 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y, ...@@ -1328,7 +1207,6 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
fprintf(fdsse4,"init4: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); fprintf(fdsse4,"init4: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
#endif #endif
j=pi2_p[5]; j=pi2_p[5];
s[j] = _mm_extract_epi16(tmpe,7); s[j] = _mm_extract_epi16(tmpe,7);
tmpe = _mm_load_si128(&yp128[2]); tmpe = _mm_load_si128(&yp128[2]);
yp1[j] = _mm_extract_epi16(tmpe,0); yp1[j] = _mm_extract_epi16(tmpe,0);
...@@ -1336,9 +1214,7 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y, ...@@ -1336,9 +1214,7 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
fprintf(fdsse4,"init5: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); fprintf(fdsse4,"init5: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
#endif #endif
j=pi2_p[6]; j=pi2_p[6];
s[j] = _mm_extract_epi16(tmpe,2); s[j] = _mm_extract_epi16(tmpe,2);
yp1[j] = _mm_extract_epi16(tmpe,3); yp1[j] = _mm_extract_epi16(tmpe,3);
yp2[j] = _mm_extract_epi16(tmpe,4); yp2[j] = _mm_extract_epi16(tmpe,4);
...@@ -1346,60 +1222,49 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y, ...@@ -1346,60 +1222,49 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
fprintf(fdsse4,"init6: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); fprintf(fdsse4,"init6: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
#endif #endif
j=pi2_p[7]; j=pi2_p[7];
s[j] = _mm_extract_epi16(tmpe,5); s[j] = _mm_extract_epi16(tmpe,5);
yp1[j] = _mm_extract_epi16(tmpe,6); yp1[j] = _mm_extract_epi16(tmpe,6);
yp2[j] = _mm_extract_epi16(tmpe,7); yp2[j] = _mm_extract_epi16(tmpe,7);
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
fprintf(fdsse4,"init7: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); fprintf(fdsse4,"init7: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
#endif #endif
#elif defined(__arm__) #elif defined(__arm__)
s[j] = vgetq_lane_s16(yp128[0],0); s[j] = vgetq_lane_s16(yp128[0],0);
yp1[j] = vgetq_lane_s16(yp128[0],1); yp1[j] = vgetq_lane_s16(yp128[0],1);
yp2[j] = vgetq_lane_s16(yp128[0],2); yp2[j] = vgetq_lane_s16(yp128[0],2);
j=pi2_p[1]; j=pi2_p[1];
s[j] = vgetq_lane_s16(yp128[0],3); s[j] = vgetq_lane_s16(yp128[0],3);
yp1[j] = vgetq_lane_s16(yp128[0],4); yp1[j] = vgetq_lane_s16(yp128[0],4);
yp2[j] = vgetq_lane_s16(yp128[0],5); yp2[j] = vgetq_lane_s16(yp128[0],5);
j=pi2_p[2]; j=pi2_p[2];
s[j] = vgetq_lane_s16(yp128[0],6); s[j] = vgetq_lane_s16(yp128[0],6);
yp1[j] = vgetq_lane_s16(yp128[0],7); yp1[j] = vgetq_lane_s16(yp128[0],7);
yp2[j] = vgetq_lane_s16(yp128[1],0); yp2[j] = vgetq_lane_s16(yp128[1],0);
j=pi2_p[3]; j=pi2_p[3];
s[j] = vgetq_lane_s16(yp128[1],1); s[j] = vgetq_lane_s16(yp128[1],1);
yp1[j] = vgetq_lane_s16(yp128[1],2); yp1[j] = vgetq_lane_s16(yp128[1],2);
yp2[j] = vgetq_lane_s16(yp128[1],3); yp2[j] = vgetq_lane_s16(yp128[1],3);
j=pi2_p[4]; j=pi2_p[4];
s[j] = vgetq_lane_s16(yp128[1],4); s[j] = vgetq_lane_s16(yp128[1],4);
yp1[j] = vgetq_lane_s16(yp128[1],5); yp1[j] = vgetq_lane_s16(yp128[1],5);
yp2[j] = vgetq_lane_s16(yp128[1],6); yp2[j] = vgetq_lane_s16(yp128[1],6);
j=pi2_p[5]; j=pi2_p[5];
s[j] = vgetq_lane_s16(yp128[1],7); s[j] = vgetq_lane_s16(yp128[1],7);
yp1[j] = vgetq_lane_s16(yp128[2],0); yp1[j] = vgetq_lane_s16(yp128[2],0);
yp2[j] = vgetq_lane_s16(yp128[2],1); yp2[j] = vgetq_lane_s16(yp128[2],1);
j=pi2_p[6]; j=pi2_p[6];
s[j] = vgetq_lane_s16(yp128[2],2); s[j] = vgetq_lane_s16(yp128[2],2);
yp1[j] = vgetq_lane_s16(yp128[2],3); yp1[j] = vgetq_lane_s16(yp128[2],3);
yp2[j] = vgetq_lane_s16(yp128[2],4); yp2[j] = vgetq_lane_s16(yp128[2],4);
j=pi2_p[7]; j=pi2_p[7];
s[j] = vgetq_lane_s16(yp128[2],5); s[j] = vgetq_lane_s16(yp128[2],5);
yp1[j] = vgetq_lane_s16(yp128[2],6); yp1[j] = vgetq_lane_s16(yp128[2],6);
yp2[j] = vgetq_lane_s16(yp128[2],7); yp2[j] = vgetq_lane_s16(yp128[2],7);
#endif #endif
yp128+=3; yp128+=3;
} }
yp=(llr_t*)yp128; yp=(llr_t *)yp128;
// Termination // Termination
for (i=n; i<n+3; i++) { for (i=n; i<n+3; i++) {
...@@ -1410,7 +1275,7 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y, ...@@ -1410,7 +1275,7 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
yp1[i] = *yp; yp1[i] = *yp;
yp++; yp++;
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
fprintf(fdsse4,"Term 1 (%d): %d %d\n",i,s[i],yp1[i]); fprintf(fdsse4,"Term 1 (%u): %d %d\n",i,s[i],yp1[i]);
#endif //DEBUG_LOGMAP #endif //DEBUG_LOGMAP
} }
...@@ -1422,32 +1287,25 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y, ...@@ -1422,32 +1287,25 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
yp2[i-8] = *yp; yp2[i-8] = *yp;
yp++; yp++;
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
fprintf(fdsse4,"Term 2 (%d): %d %d\n",i-3,s[i],yp2[i-8]); fprintf(fdsse4,"Term 2 (%u): %d %d\n",i-3,s[i],yp2[i-8]);
#endif //DEBUG_LOGMAP #endif //DEBUG_LOGMAP
} }
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
fprintf(fdsse4,"\n"); fprintf(fdsse4,"\n");
#endif //DEBUG_LOGMAP #endif //DEBUG_LOGMAP
stop_meas(init_stats); stop_meas(init_stats);
// do log_map from first parity bit // do log_map from first parity bit
log_map16(systematic0,yparity1,m11,m10,alpha,beta,ext,n,0,F,offset8_flag,alpha_stats,beta_stats,gamma_stats,ext_stats); log_map16(systematic0,yparity1,m11,m10,alpha,beta,ext,n,0,F,offset8_flag,alpha_stats,beta_stats,gamma_stats,ext_stats);
while (iteration_cnt++ < max_iterations) { while (iteration_cnt++ < max_iterations) {
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
fprintf(fdsse4,"\n*******************ITERATION %d (n %d), ext %p\n\n",iteration_cnt,n,ext); fprintf(fdsse4,"\n*******************ITERATION %d (n %d), ext %p\n\n",iteration_cnt,n,ext);
#endif //DEBUG_LOGMAP #endif //DEBUG_LOGMAP
start_meas(intl1_stats); start_meas(intl1_stats);
pi4_p=pi4tab16[iind]; pi4_p=pi4tab16[iind];
for (i=0; i<(n>>3); i++) { // steady-state portion for (i=0; i<(n>>3); i++) { // steady-state portion
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],ext[*pi4_p++],0); ((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],ext[*pi4_p++],0);
((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],ext[*pi4_p++],1); ((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],ext[*pi4_p++],1);
...@@ -1457,30 +1315,24 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y, ...@@ -1457,30 +1315,24 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],ext[*pi4_p++],5); ((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],ext[*pi4_p++],5);
((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],ext[*pi4_p++],6); ((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],ext[*pi4_p++],6);
((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],ext[*pi4_p++],7); ((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],ext[*pi4_p++],7);
#elif defined(__arm__) #elif defined(__arm__)
((int16x8_t*)systematic2)[i]=vsetq_lane_s16(ext[*pi4_p++],((int16x8_t*)systematic2)[i],0); ((int16x8_t *)systematic2)[i]=vsetq_lane_s16(ext[*pi4_p++],((int16x8_t *)systematic2)[i],0);
((int16x8_t*)systematic2)[i]=vsetq_lane_s16(ext[*pi4_p++],((int16x8_t*)systematic2)[i],1); ((int16x8_t *)systematic2)[i]=vsetq_lane_s16(ext[*pi4_p++],((int16x8_t *)systematic2)[i],1);
((int16x8_t*)systematic2)[i]=vsetq_lane_s16(ext[*pi4_p++],((int16x8_t*)systematic2)[i],2); ((int16x8_t *)systematic2)[i]=vsetq_lane_s16(ext[*pi4_p++],((int16x8_t *)systematic2)[i],2);
((int16x8_t*)systematic2)[i]=vsetq_lane_s16(ext[*pi4_p++],((int16x8_t*)systematic2)[i],3); ((int16x8_t *)systematic2)[i]=vsetq_lane_s16(ext[*pi4_p++],((int16x8_t *)systematic2)[i],3);
((int16x8_t*)systematic2)[i]=vsetq_lane_s16(ext[*pi4_p++],((int16x8_t*)systematic2)[i],4); ((int16x8_t *)systematic2)[i]=vsetq_lane_s16(ext[*pi4_p++],((int16x8_t *)systematic2)[i],4);
((int16x8_t*)systematic2)[i]=vsetq_lane_s16(ext[*pi4_p++],((int16x8_t*)systematic2)[i],5); ((int16x8_t *)systematic2)[i]=vsetq_lane_s16(ext[*pi4_p++],((int16x8_t *)systematic2)[i],5);
((int16x8_t*)systematic2)[i]=vsetq_lane_s16(ext[*pi4_p++],((int16x8_t*)systematic2)[i],6); ((int16x8_t *)systematic2)[i]=vsetq_lane_s16(ext[*pi4_p++],((int16x8_t *)systematic2)[i],6);
((int16x8_t*)systematic2)[i]=vsetq_lane_s16(ext[*pi4_p++],((int16x8_t*)systematic2)[i],7); ((int16x8_t *)systematic2)[i]=vsetq_lane_s16(ext[*pi4_p++],((int16x8_t *)systematic2)[i],7);
#endif #endif
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
print_shorts("syst2",(int16_t*)&((__m128i *)systematic2)[i]); print_shorts("syst2",(int16_t *)&((__m128i *)systematic2)[i]);
#endif #endif
} }
stop_meas(intl1_stats); stop_meas(intl1_stats);
// do log_map from second parity bit // do log_map from second parity bit
log_map16(systematic2,yparity2,m11,m10,alpha,beta,ext2,n,1,F,offset8_flag,alpha_stats,beta_stats,gamma_stats,ext_stats); log_map16(systematic2,yparity2,m11,m10,alpha,beta,ext2,n,1,F,offset8_flag,alpha_stats,beta_stats,gamma_stats,ext_stats);
pi5_p=pi5tab16[iind]; pi5_p=pi5tab16[iind];
for (i=0; i<(n>>3); i++) { for (i=0; i<(n>>3); i++) {
...@@ -1493,20 +1345,20 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y, ...@@ -1493,20 +1345,20 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
tmp=_mm_insert_epi16(tmp,ext2[*pi5_p++],5); tmp=_mm_insert_epi16(tmp,ext2[*pi5_p++],5);
tmp=_mm_insert_epi16(tmp,ext2[*pi5_p++],6); tmp=_mm_insert_epi16(tmp,ext2[*pi5_p++],6);
tmp=_mm_insert_epi16(tmp,ext2[*pi5_p++],7); tmp=_mm_insert_epi16(tmp,ext2[*pi5_p++],7);
((__m128i *)systematic1)[i] = _mm_adds_epi16(_mm_subs_epi16(tmp,((__m128i*)ext)[i]),((__m128i *)systematic0)[i]); ((__m128i *)systematic1)[i] = _mm_adds_epi16(_mm_subs_epi16(tmp,((__m128i *)ext)[i]),((__m128i *)systematic0)[i]);
#elif defined(__arm__) #elif defined(__arm__)
tmp=vsetq_lane_s16(ext2[*pi5_p++],tmp,0); tmp=vsetq_lane_s16(ext2[*pi5_p++],tmp,0);
tmp=vsetq_lane_s16(ext2[*pi5_p++],tmp,1); tmp=vsetq_lane_s16(ext2[*pi5_p++],tmp,1);
tmp=vsetq_lane_s16(ext2[*pi5_p++],tmp,2); tmp=vsetq_lane_s16(ext2[*pi5_p++],tmp,2);
tmp=vsetq_lane_s16(ext2[*pi5_p++],tmp,3); tmp=vsetq_lane_s16(ext2[*pi5_p++],tmp,3);
tmp=vsetq_lane_s16(ext2[*pi5_p++],tmp,4); tmp=vsetq_lane_s16(ext2[*pi5_p++],tmp,4);
tmp=vsetq_lane_s16(ext2[*pi5_p++],tmp,5); tmp=vsetq_lane_s16(ext2[*pi5_p++],tmp,5);
tmp=vsetq_lane_s16(ext2[*pi5_p++],tmp,6); tmp=vsetq_lane_s16(ext2[*pi5_p++],tmp,6);
tmp=vsetq_lane_s16(ext2[*pi5_p++],tmp,7); tmp=vsetq_lane_s16(ext2[*pi5_p++],tmp,7);
((int16x8_t *)systematic1)[i] = vqaddq_s16(vqsubq_s16(tmp,((int16x8_t*)ext)[i]),((int16x8_t *)systematic0)[i]); ((int16x8_t *)systematic1)[i] = vqaddq_s16(vqsubq_s16(tmp,((int16x8_t *)ext)[i]),((int16x8_t *)systematic0)[i]);
#endif #endif
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
print_shorts("syst1",(int16_t*)&((__m128i *)systematic1)[i]); print_shorts("syst1",(int16_t *)&((__m128i *)systematic1)[i]);
#endif #endif
} }
...@@ -1516,16 +1368,16 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y, ...@@ -1516,16 +1368,16 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
for (i=0; i<(n>>3); i++) { for (i=0; i<(n>>3); i++) {
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
tmp=_mm_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p++],7); tmp=_mm_insert_epi16(tmp, ((llr_t *)ext2)[*pi6_p++],7);
tmp=_mm_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p++],6); tmp=_mm_insert_epi16(tmp, ((llr_t *)ext2)[*pi6_p++],6);
tmp=_mm_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p++],5); tmp=_mm_insert_epi16(tmp, ((llr_t *)ext2)[*pi6_p++],5);
tmp=_mm_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p++],4); tmp=_mm_insert_epi16(tmp, ((llr_t *)ext2)[*pi6_p++],4);
tmp=_mm_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p++],3); tmp=_mm_insert_epi16(tmp, ((llr_t *)ext2)[*pi6_p++],3);
tmp=_mm_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p++],2); tmp=_mm_insert_epi16(tmp, ((llr_t *)ext2)[*pi6_p++],2);
tmp=_mm_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p++],1); tmp=_mm_insert_epi16(tmp, ((llr_t *)ext2)[*pi6_p++],1);
tmp=_mm_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p++],0); tmp=_mm_insert_epi16(tmp, ((llr_t *)ext2)[*pi6_p++],0);
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
print_shorts("tmp",(int16_t*)&tmp); print_shorts("tmp",(int16_t *)&tmp);
#endif #endif
tmp=_mm_cmpgt_epi8(_mm_packs_epi16(tmp,zeros),zeros); tmp=_mm_cmpgt_epi8(_mm_packs_epi16(tmp,zeros),zeros);
decoded_bytes[i]=(unsigned char)_mm_movemask_epi8(tmp); decoded_bytes[i]=(unsigned char)_mm_movemask_epi8(tmp);
...@@ -1538,18 +1390,18 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y, ...@@ -1538,18 +1390,18 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
tmp=vsetq_lane_s16(ext2[*pi6_p++],tmp,2); tmp=vsetq_lane_s16(ext2[*pi6_p++],tmp,2);
tmp=vsetq_lane_s16(ext2[*pi6_p++],tmp,1); tmp=vsetq_lane_s16(ext2[*pi6_p++],tmp,1);
tmp=vsetq_lane_s16(ext2[*pi6_p++],tmp,0); tmp=vsetq_lane_s16(ext2[*pi6_p++],tmp,0);
// This does: // This does:
// [1 2 4 8 16 32 64 128] .* I(ext_i > 0) = 2.^[b0 b1 b2 b3 b4 b5 b6 b7], where bi =I(ext_i > 0) // [1 2 4 8 16 32 64 128] .* I(ext_i > 0) = 2.^[b0 b1 b2 b3 b4 b5 b6 b7], where bi =I(ext_i > 0)
// [2^b0 + 2^b1 2^b2 + 2^b3 2^b4 + 2^b5 2^b6 + 2^b7] // [2^b0 + 2^b1 2^b2 + 2^b3 2^b4 + 2^b5 2^b6 + 2^b7]
// [2^b0 + 2^b1 + 2^b2 + 2^b3 2^b4 + 2^b5 + 2^b6 + 2^b7] // [2^b0 + 2^b1 + 2^b2 + 2^b3 2^b4 + 2^b5 + 2^b6 + 2^b7]
// Mask64 = 2^b0 + 2^b1 + 2^b2 + 2^b3 + 2^b4 + 2^b5 + 2^b6 + 2^b7 // Mask64 = 2^b0 + 2^b1 + 2^b2 + 2^b3 + 2^b4 + 2^b5 + 2^b6 + 2^b7
uint64x2_t Mask = vpaddlq_u32(vpaddlq_u16(vandq_u16(vcgtq_s16(tmp,zeros), Powers))); uint64x2_t Mask = vpaddlq_u32(vpaddlq_u16(vandq_u16(vcgtq_s16(tmp,zeros), Powers)));
uint64x1_t Mask64 = vget_high_u64(Mask)+vget_low_u64(Mask); uint64x1_t Mask64 = vget_high_u64(Mask)+vget_low_u64(Mask);
decoded_bytes[i] = (uint8_t)Mask64; decoded_bytes[i] = (uint8_t)Mask64;
#endif #endif
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
print_shorts("tmp",(int16_t*)&tmp); print_shorts("tmp",(int16_t *)&tmp);
fprintf(fdsse4,"decoded_bytes[%d] %x\n",i,decoded_bytes[i]); fprintf(fdsse4,"decoded_bytes[%u] %x\n",i,decoded_bytes[i]);
#endif #endif
} }
} }
...@@ -1559,41 +1411,40 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y, ...@@ -1559,41 +1411,40 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
oldcrc= *((unsigned int *)(&decoded_bytes[(n>>3)-crc_len])); oldcrc= *((unsigned int *)(&decoded_bytes[(n>>3)-crc_len]));
switch (crc_type) { switch (crc_type) {
case CRC24_A:
case CRC24_A: oldcrc&=0x00ffffff;
oldcrc&=0x00ffffff; crc = crc24a(&decoded_bytes[F>>3],
crc = crc24a(&decoded_bytes[F>>3], n-24-F)>>8;
n-24-F)>>8; temp=((uint8_t *)&crc)[2];
temp=((uint8_t *)&crc)[2]; ((uint8_t *)&crc)[2] = ((uint8_t *)&crc)[0];
((uint8_t *)&crc)[2] = ((uint8_t *)&crc)[0]; ((uint8_t *)&crc)[0] = temp;
((uint8_t *)&crc)[0] = temp; break;
break;
case CRC24_B:
case CRC24_B: oldcrc&=0x00ffffff;
oldcrc&=0x00ffffff; crc = crc24b(decoded_bytes,
crc = crc24b(decoded_bytes, n-24)>>8;
n-24)>>8; temp=((uint8_t *)&crc)[2];
temp=((uint8_t *)&crc)[2]; ((uint8_t *)&crc)[2] = ((uint8_t *)&crc)[0];
((uint8_t *)&crc)[2] = ((uint8_t *)&crc)[0]; ((uint8_t *)&crc)[0] = temp;
((uint8_t *)&crc)[0] = temp; break;
break;
case CRC16:
case CRC16: oldcrc&=0x0000ffff;
oldcrc&=0x0000ffff; crc = crc16(decoded_bytes,
crc = crc16(decoded_bytes, n-16)>>16;
n-16)>>16; break;
break;
case CRC8:
case CRC8: oldcrc&=0x000000ff;
oldcrc&=0x000000ff; crc = crc8(decoded_bytes,
crc = crc8(decoded_bytes, n-8)>>24;
n-8)>>24; break;
break;
default:
default: printf("FATAL: 3gpplte_turbo_decoder_sse.c: Unknown CRC\n");
printf("FATAL: 3gpplte_turbo_decoder_sse.c: Unknown CRC\n"); return(255);
return(255); break;
break;
} }
stop_meas(intl2_stats); stop_meas(intl2_stats);
...@@ -1610,13 +1461,13 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y, ...@@ -1610,13 +1461,13 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
if (iteration_cnt < max_iterations) { if (iteration_cnt < max_iterations) {
log_map16(systematic1,yparity1,m11,m10,alpha,beta,ext,n,0,F,offset8_flag,alpha_stats,beta_stats,gamma_stats,ext_stats); log_map16(systematic1,yparity1,m11,m10,alpha,beta,ext,n,0,F,offset8_flag,alpha_stats,beta_stats,gamma_stats,ext_stats);
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
__m128i* ext_128=(__m128i*) ext; __m128i *ext_128=(__m128i *) ext;
__m128i* s1_128=(__m128i*) systematic1; __m128i *s1_128=(__m128i *) systematic1;
__m128i* s0_128=(__m128i*) systematic0; __m128i *s0_128=(__m128i *) systematic0;
#elif defined(__arm__) #elif defined(__arm__)
int16x8_t* ext_128=(int16x8_t*) ext; int16x8_t *ext_128=(int16x8_t *) ext;
int16x8_t* s1_128=(int16x8_t*) systematic1; int16x8_t *s1_128=(int16x8_t *) systematic1;
int16x8_t* s0_128=(int16x8_t*) systematic0; int16x8_t *s0_128=(int16x8_t *) systematic0;
#endif #endif
int myloop=n>>3; int myloop=n>>3;
...@@ -1630,13 +1481,11 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y, ...@@ -1630,13 +1481,11 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
} }
} }
} }
// fprintf(fdsse4,"crc %x, oldcrc %x\n",crc,oldcrc);
// fprintf(fdsse4,"crc %x, oldcrc %x\n",crc,oldcrc);
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
fclose(fdsse4); fclose(fdsse4);
#endif #endif
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
_mm_empty(); _mm_empty();
_m_empty(); _m_empty();
......
...@@ -39,39 +39,39 @@ ...@@ -39,39 +39,39 @@
#include "PHY/sse_intrin.h" #include "PHY/sse_intrin.h"
#ifndef TEST_DEBUG #ifndef TEST_DEBUG
#include "PHY/defs_common.h" #include "PHY/defs_common.h"
#include "PHY/CODING/coding_defs.h" #include "PHY/CODING/coding_defs.h"
#include "PHY/CODING/lte_interleaver_inline.h" #include "PHY/CODING/lte_interleaver_inline.h"
#else #else
#include "defs.h" #include "defs.h"
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#endif #endif
#ifdef MEX #ifdef MEX
#include "mex.h" #include "mex.h"
#endif #endif
#include "common/ran_context.h" #include "common/ran_context.h"
#define SHUFFLE16(a,b,c,d,e,f,g,h) _mm_set_epi8(h==-1?-1:h*2+1, \ #define SHUFFLE16(a,b,c,d,e,f,g,h) _mm_set_epi8(h==-1?-1:h*2+1, \
h==-1?-1:h*2, \ h==-1?-1:h*2, \
g==-1?-1:g*2+1, \ g==-1?-1:g*2+1, \
g==-1?-1:g*2, \ g==-1?-1:g*2, \
f==-1?-1:f*2+1, \ f==-1?-1:f*2+1, \
f==-1?-1:f*2, \ f==-1?-1:f*2, \
e==-1?-1:e*2+1, \ e==-1?-1:e*2+1, \
e==-1?-1:e*2, \ e==-1?-1:e*2, \
d==-1?-1:d*2+1, \ d==-1?-1:d*2+1, \
d==-1?-1:d*2, \ d==-1?-1:d*2, \
c==-1?-1:c*2+1, \ c==-1?-1:c*2+1, \
c==-1?-1:c*2, \ c==-1?-1:c*2, \
b==-1?-1:b*2+1, \ b==-1?-1:b*2+1, \
b==-1?-1:b*2, \ b==-1?-1:b*2, \
a==-1?-1:a*2+1, \ a==-1?-1:a*2+1, \
a==-1?-1:a*2); a==-1?-1:a*2);
...@@ -86,32 +86,28 @@ typedef int8_t channel_t; ...@@ -86,32 +86,28 @@ typedef int8_t channel_t;
#define MAX8 127 #define MAX8 127
void log_map8(llr_t* systematic,channel_t* y_parity, llr_t* m11, llr_t* m10, llr_t *alpha, llr_t *beta, llr_t* ext,unsigned short frame_length,unsigned char term_flag,unsigned char F,int offset8_flag, void log_map8(llr_t *systematic,channel_t *y_parity, llr_t *m11, llr_t *m10, llr_t *alpha, llr_t *beta, llr_t *ext,unsigned short frame_length,unsigned char term_flag,unsigned char F,int offset8_flag,
time_stats_t *alpha_stats,time_stats_t *beta_stats,time_stats_t *gamma_stats,time_stats_t *ext_stats); time_stats_t *alpha_stats,time_stats_t *beta_stats,time_stats_t *gamma_stats,time_stats_t *ext_stats);
void compute_gamma8(llr_t* m11,llr_t* m10,llr_t* systematic, channel_t* y_parity, unsigned short frame_length,unsigned char term_flag); void compute_gamma8(llr_t *m11,llr_t *m10,llr_t *systematic, channel_t *y_parity, unsigned short frame_length,unsigned char term_flag);
void compute_alpha8(llr_t*alpha,llr_t *beta, llr_t* m11,llr_t* m10, unsigned short frame_length,unsigned char F); void compute_alpha8(llr_t *alpha,llr_t *beta, llr_t *m11,llr_t *m10, unsigned short frame_length,unsigned char F);
void compute_beta8(llr_t*alpha, llr_t* beta,llr_t* m11,llr_t* m10, unsigned short frame_length,unsigned char F,int offset8_flag); void compute_beta8(llr_t *alpha, llr_t *beta,llr_t *m11,llr_t *m10, unsigned short frame_length,unsigned char F,int offset8_flag);
void compute_ext8(llr_t* alpha,llr_t* beta,llr_t* m11,llr_t* m10,llr_t* extrinsic, llr_t* ap, unsigned short frame_length); void compute_ext8(llr_t *alpha,llr_t *beta,llr_t *m11,llr_t *m10,llr_t *extrinsic, llr_t *ap, unsigned short frame_length);
void print_bytes(char *s, int8_t *x)
{
void print_bytes(char *s, int8_t *x) {
printf("%s : %d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d\n",s, printf("%s : %d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d\n",s,
x[0],x[1],x[2],x[3],x[4],x[5],x[6],x[7], x[0],x[1],x[2],x[3],x[4],x[5],x[6],x[7],
x[8],x[9],x[10],x[11],x[12],x[13],x[14],x[15]); x[8],x[9],x[10],x[11],x[12],x[13],x[14],x[15]);
} }
void log_map8(llr_t* systematic, void log_map8(llr_t *systematic,
channel_t* y_parity, channel_t *y_parity,
llr_t* m11, llr_t *m11,
llr_t* m10, llr_t *m10,
llr_t *alpha, llr_t *alpha,
llr_t *beta, llr_t *beta,
llr_t* ext, llr_t *ext,
unsigned short frame_length, unsigned short frame_length,
unsigned char term_flag, unsigned char term_flag,
unsigned char F, unsigned char F,
...@@ -119,32 +115,38 @@ void log_map8(llr_t* systematic, ...@@ -119,32 +115,38 @@ void log_map8(llr_t* systematic,
time_stats_t *alpha_stats, time_stats_t *alpha_stats,
time_stats_t *beta_stats, time_stats_t *beta_stats,
time_stats_t *gamma_stats, time_stats_t *gamma_stats,
time_stats_t *ext_stats) time_stats_t *ext_stats) {
{
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
printf("log_map, frame_length %d\n",frame_length); printf("log_map, frame_length %d\n",frame_length);
#endif #endif
if (gamma_stats) start_meas(gamma_stats) ; if (gamma_stats) start_meas(gamma_stats) ;
compute_gamma8(m11,m10,systematic,y_parity,frame_length,term_flag) ; compute_gamma8(m11,m10,systematic,y_parity,frame_length,term_flag) ;
if (gamma_stats) stop_meas(gamma_stats); if (gamma_stats) stop_meas(gamma_stats);
if (alpha_stats) start_meas(alpha_stats) ; if (alpha_stats) start_meas(alpha_stats) ;
compute_alpha8(alpha,beta,m11,m10,frame_length,F) ; compute_alpha8(alpha,beta,m11,m10,frame_length,F) ;
if (alpha_stats) stop_meas(alpha_stats); if (alpha_stats) stop_meas(alpha_stats);
if (beta_stats) start_meas(beta_stats) ; if (beta_stats) start_meas(beta_stats) ;
compute_beta8(alpha,beta,m11,m10,frame_length,F,offset8_flag) ; compute_beta8(alpha,beta,m11,m10,frame_length,F,offset8_flag) ;
if (beta_stats) stop_meas(beta_stats); if (beta_stats) stop_meas(beta_stats);
if (ext_stats) start_meas(ext_stats) ; if (ext_stats) start_meas(ext_stats) ;
compute_ext8(alpha,beta,m11,m10,ext,systematic,frame_length) ;
if (ext_stats) stop_meas(ext_stats);
compute_ext8(alpha,beta,m11,m10,ext,systematic,frame_length) ;
if (ext_stats) stop_meas(ext_stats);
} }
void compute_gamma8(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity, void compute_gamma8(llr_t *m11,llr_t *m10,llr_t *systematic,channel_t *y_parity,
unsigned short frame_length,unsigned char term_flag) unsigned short frame_length,unsigned char term_flag) {
{
int k,K1; int k,K1;
#if defined(__x86_64__)||defined(__i386__) #if defined(__x86_64__)||defined(__i386__)
__m128i *systematic128 = (__m128i *)systematic; __m128i *systematic128 = (__m128i *)systematic;
...@@ -157,11 +159,9 @@ void compute_gamma8(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity, ...@@ -157,11 +159,9 @@ void compute_gamma8(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity,
int8x16_t *m10_128 = (int8x16_t *)m10; int8x16_t *m10_128 = (int8x16_t *)m10;
int8x16_t *m11_128 = (int8x16_t *)m11; int8x16_t *m11_128 = (int8x16_t *)m11;
#endif #endif
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
printf("compute_gamma, %p,%p,%p,%p,framelength %d\n",m11,m10,systematic,y_parity,frame_length); printf("compute_gamma, %p,%p,%p,%p,framelength %d\n",m11,m10,systematic,y_parity,frame_length);
#endif #endif
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
register __m128i sl,sh,ypl,yph; //K128=_mm_set1_epi8(-128); register __m128i sl,sh,ypl,yph; //K128=_mm_set1_epi8(-128);
#endif #endif
...@@ -181,11 +181,9 @@ void compute_gamma8(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity, ...@@ -181,11 +181,9 @@ void compute_gamma8(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity,
m11_128[k] = vhaddq_s8(systematic128[k],y_parity128[k]); m11_128[k] = vhaddq_s8(systematic128[k],y_parity128[k]);
m10_128[k] = vhsubq_s8(systematic128[k],y_parity128[k]); m10_128[k] = vhsubq_s8(systematic128[k],y_parity128[k]);
#endif #endif
} }
// Termination // Termination
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
sl = _mm_cvtepi8_epi16(systematic128[k+term_flag]); sl = _mm_cvtepi8_epi16(systematic128[k+term_flag]);
sh = _mm_cvtepi8_epi16(_mm_srli_si128(systematic128[k],8)); sh = _mm_cvtepi8_epi16(_mm_srli_si128(systematic128[k],8));
...@@ -199,15 +197,12 @@ void compute_gamma8(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity, ...@@ -199,15 +197,12 @@ void compute_gamma8(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity,
m11_128[k] = vhaddq_s8(systematic128[k+term_flag],y_parity128[k]); m11_128[k] = vhaddq_s8(systematic128[k+term_flag],y_parity128[k]);
m10_128[k] = vhsubq_s8(systematic128[k+term_flag],y_parity128[k]); m10_128[k] = vhsubq_s8(systematic128[k+term_flag],y_parity128[k]);
#endif #endif
} }
#define L 16 #define L 16
void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned short frame_length,unsigned char F) void compute_alpha8(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,unsigned short frame_length,unsigned char F) {
{
int k,loopval,rerun_flag; int k,loopval,rerun_flag;
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
__m128i *alpha128=(__m128i *)alpha,*alpha_ptr; __m128i *alpha128=(__m128i *)alpha,*alpha_ptr;
__m128i *m11p,*m10p; __m128i *m11p,*m10p;
...@@ -223,7 +218,6 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh ...@@ -223,7 +218,6 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh
#endif #endif
// Set initial state: first colum is known // Set initial state: first colum is known
// the other columns are unknown, so all states are set to same value // the other columns are unknown, so all states are set to same value
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
alpha128[0] = _mm_set_epi8(-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,0); alpha128[0] = _mm_set_epi8(-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,0);
alpha128[1] = _mm_set_epi8(-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2); alpha128[1] = _mm_set_epi8(-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2);
...@@ -233,12 +227,11 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh ...@@ -233,12 +227,11 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh
alpha128[5] = _mm_set_epi8(-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2); alpha128[5] = _mm_set_epi8(-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2);
alpha128[6] = _mm_set_epi8(-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2); alpha128[6] = _mm_set_epi8(-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2);
alpha128[7] = _mm_set_epi8(-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2); alpha128[7] = _mm_set_epi8(-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2);
for (loopval=frame_length>>4, rerun_flag=0; rerun_flag<2; loopval=L, rerun_flag++) {
for (loopval=frame_length>>4, rerun_flag=0; rerun_flag<2; loopval=L, rerun_flag++) {
alpha_ptr = &alpha128[0]; alpha_ptr = &alpha128[0];
m11p = (__m128i *)m_11;
m11p = (__m128i*)m_11; m10p = (__m128i *)m_10;
m10p = (__m128i*)m_10;
for (k=0; k<loopval; k++) { for (k=0; k<loopval; k++) {
m_b0 = _mm_adds_epi8(alpha_ptr[1],*m11p); // m11 m_b0 = _mm_adds_epi8(alpha_ptr[1],*m11p); // m11
...@@ -249,7 +242,6 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh ...@@ -249,7 +242,6 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh
m_b6 = _mm_subs_epi8(alpha_ptr[5],*m10p); // m01=-m10 m_b6 = _mm_subs_epi8(alpha_ptr[5],*m10p); // m01=-m10
m_b3 = _mm_subs_epi8(alpha_ptr[7],*m11p); // m00=-m11 m_b3 = _mm_subs_epi8(alpha_ptr[7],*m11p); // m00=-m11
m_b7 = _mm_adds_epi8(alpha_ptr[7],*m11p); // m11 m_b7 = _mm_adds_epi8(alpha_ptr[7],*m11p); // m11
new0 = _mm_subs_epi8(alpha_ptr[0],*m11p); // m00=-m11 new0 = _mm_subs_epi8(alpha_ptr[0],*m11p); // m00=-m11
new4 = _mm_adds_epi8(alpha_ptr[0],*m11p); // m11 new4 = _mm_adds_epi8(alpha_ptr[0],*m11p); // m11
new1 = _mm_adds_epi8(alpha_ptr[2],*m10p); // m10 new1 = _mm_adds_epi8(alpha_ptr[2],*m10p); // m10
...@@ -258,7 +250,6 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh ...@@ -258,7 +250,6 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh
new6 = _mm_adds_epi8(alpha_ptr[4],*m10p); // m10 new6 = _mm_adds_epi8(alpha_ptr[4],*m10p); // m10
new3 = _mm_adds_epi8(alpha_ptr[6],*m11p); // m11 new3 = _mm_adds_epi8(alpha_ptr[6],*m11p); // m11
new7 = _mm_subs_epi8(alpha_ptr[6],*m11p); // m00=-m11 new7 = _mm_subs_epi8(alpha_ptr[6],*m11p); // m00=-m11
alpha_ptr += 8; alpha_ptr += 8;
m11p++; m11p++;
m10p++; m10p++;
...@@ -270,7 +261,6 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh ...@@ -270,7 +261,6 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh
alpha_ptr[5] = _mm_max_epi8(m_b5,new5); alpha_ptr[5] = _mm_max_epi8(m_b5,new5);
alpha_ptr[6] = _mm_max_epi8(m_b6,new6); alpha_ptr[6] = _mm_max_epi8(m_b6,new6);
alpha_ptr[7] = _mm_max_epi8(m_b7,new7); alpha_ptr[7] = _mm_max_epi8(m_b7,new7);
// compute and subtract maxima // compute and subtract maxima
alpha_max = _mm_max_epi8(alpha_ptr[0],alpha_ptr[1]); alpha_max = _mm_max_epi8(alpha_ptr[0],alpha_ptr[1]);
alpha_max = _mm_max_epi8(alpha_max,alpha_ptr[2]); alpha_max = _mm_max_epi8(alpha_max,alpha_ptr[2]);
...@@ -279,7 +269,6 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh ...@@ -279,7 +269,6 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh
alpha_max = _mm_max_epi8(alpha_max,alpha_ptr[5]); alpha_max = _mm_max_epi8(alpha_max,alpha_ptr[5]);
alpha_max = _mm_max_epi8(alpha_max,alpha_ptr[6]); alpha_max = _mm_max_epi8(alpha_max,alpha_ptr[6]);
alpha_max = _mm_max_epi8(alpha_max,alpha_ptr[7]); alpha_max = _mm_max_epi8(alpha_max,alpha_ptr[7]);
alpha_ptr[0] = _mm_subs_epi8(alpha_ptr[0],alpha_max); alpha_ptr[0] = _mm_subs_epi8(alpha_ptr[0],alpha_max);
alpha_ptr[1] = _mm_subs_epi8(alpha_ptr[1],alpha_max); alpha_ptr[1] = _mm_subs_epi8(alpha_ptr[1],alpha_max);
alpha_ptr[2] = _mm_subs_epi8(alpha_ptr[2],alpha_max); alpha_ptr[2] = _mm_subs_epi8(alpha_ptr[2],alpha_max);
...@@ -308,8 +297,8 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh ...@@ -308,8 +297,8 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh
alpha[80] = -MAX8/2; alpha[80] = -MAX8/2;
alpha[96] = -MAX8/2; alpha[96] = -MAX8/2;
alpha[112] = -MAX8/2; alpha[112] = -MAX8/2;
} }
#elif defined(__arm__) #elif defined(__arm__)
alpha128[0] = vdupq_n_s8(-MAX8/2); alpha128[0] = vdupq_n_s8(-MAX8/2);
alpha128[0] = vsetq_lane_s8(0,alpha128[0],0); alpha128[0] = vsetq_lane_s8(0,alpha128[0],0);
...@@ -320,12 +309,11 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh ...@@ -320,12 +309,11 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh
alpha128[5] = vdupq_n_s8(-MAX8/2); alpha128[5] = vdupq_n_s8(-MAX8/2);
alpha128[6] = vdupq_n_s8(-MAX8/2); alpha128[6] = vdupq_n_s8(-MAX8/2);
alpha128[7] = vdupq_n_s8(-MAX8/2); alpha128[7] = vdupq_n_s8(-MAX8/2);
for (loopval=frame_length>>4, rerun_flag=0; rerun_flag<2; loopval=L, rerun_flag++) {
for (loopval=frame_length>>4, rerun_flag=0; rerun_flag<2; loopval=L, rerun_flag++) {
alpha_ptr = &alpha128[0]; alpha_ptr = &alpha128[0];
m11p = (int8x16_t *)m_11;
m11p = (int8x16_t*)m_11; m10p = (int8x16_t *)m_10;
m10p = (int8x16_t*)m_10;
for (k=0; k<loopval; k++) { for (k=0; k<loopval; k++) {
m_b0 = vqaddq_s8(alpha_ptr[1],*m11p); // m11 m_b0 = vqaddq_s8(alpha_ptr[1],*m11p); // m11
...@@ -336,7 +324,6 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh ...@@ -336,7 +324,6 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh
m_b6 = vqsubq_s8(alpha_ptr[5],*m10p); // m01=-m10 m_b6 = vqsubq_s8(alpha_ptr[5],*m10p); // m01=-m10
m_b3 = vqsubq_s8(alpha_ptr[7],*m11p); // m00=-m11 m_b3 = vqsubq_s8(alpha_ptr[7],*m11p); // m00=-m11
m_b7 = vqaddq_s8(alpha_ptr[7],*m11p); // m11 m_b7 = vqaddq_s8(alpha_ptr[7],*m11p); // m11
new0 = vqsubq_s8(alpha_ptr[0],*m11p); // m00=-m11 new0 = vqsubq_s8(alpha_ptr[0],*m11p); // m00=-m11
new4 = vqaddq_s8(alpha_ptr[0],*m11p); // m11 new4 = vqaddq_s8(alpha_ptr[0],*m11p); // m11
new1 = vqaddq_s8(alpha_ptr[2],*m10p); // m10 new1 = vqaddq_s8(alpha_ptr[2],*m10p); // m10
...@@ -345,7 +332,6 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh ...@@ -345,7 +332,6 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh
new6 = vqaddq_s8(alpha_ptr[4],*m10p); // m10 new6 = vqaddq_s8(alpha_ptr[4],*m10p); // m10
new3 = vqaddq_s8(alpha_ptr[6],*m11p); // m11 new3 = vqaddq_s8(alpha_ptr[6],*m11p); // m11
new7 = vqsubq_s8(alpha_ptr[6],*m11p); // m00=-m11 new7 = vqsubq_s8(alpha_ptr[6],*m11p); // m00=-m11
alpha_ptr += 8; alpha_ptr += 8;
m11p++; m11p++;
m10p++; m10p++;
...@@ -357,7 +343,6 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh ...@@ -357,7 +343,6 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh
alpha_ptr[5] = vmaxq_s8(m_b5,new5); alpha_ptr[5] = vmaxq_s8(m_b5,new5);
alpha_ptr[6] = vmaxq_s8(m_b6,new6); alpha_ptr[6] = vmaxq_s8(m_b6,new6);
alpha_ptr[7] = vmaxq_s8(m_b7,new7); alpha_ptr[7] = vmaxq_s8(m_b7,new7);
// compute and subtract maxima // compute and subtract maxima
alpha_max = vmaxq_s8(alpha_ptr[0],alpha_ptr[1]); alpha_max = vmaxq_s8(alpha_ptr[0],alpha_ptr[1]);
alpha_max = vmaxq_s8(alpha_max,alpha_ptr[2]); alpha_max = vmaxq_s8(alpha_max,alpha_ptr[2]);
...@@ -366,7 +351,6 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh ...@@ -366,7 +351,6 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh
alpha_max = vmaxq_s8(alpha_max,alpha_ptr[5]); alpha_max = vmaxq_s8(alpha_max,alpha_ptr[5]);
alpha_max = vmaxq_s8(alpha_max,alpha_ptr[6]); alpha_max = vmaxq_s8(alpha_max,alpha_ptr[6]);
alpha_max = vmaxq_s8(alpha_max,alpha_ptr[7]); alpha_max = vmaxq_s8(alpha_max,alpha_ptr[7]);
alpha_ptr[0] = vqsubq_s8(alpha_ptr[0],alpha_max); alpha_ptr[0] = vqsubq_s8(alpha_ptr[0],alpha_max);
alpha_ptr[1] = vqsubq_s8(alpha_ptr[1],alpha_max); alpha_ptr[1] = vqsubq_s8(alpha_ptr[1],alpha_max);
alpha_ptr[2] = vqsubq_s8(alpha_ptr[2],alpha_max); alpha_ptr[2] = vqsubq_s8(alpha_ptr[2],alpha_max);
...@@ -380,14 +364,22 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh ...@@ -380,14 +364,22 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh
// Set intial state for next iteration from the last state // Set intial state for next iteration from the last state
// as a column end states are the first states of the next column // as a column end states are the first states of the next column
int K1= frame_length>>1; int K1= frame_length>>1;
alpha128[0] = (int8x16_t)vshlq_n_s64((int64x2_t)alpha128[K1],8); alpha128[0] = vsetq_lane_s8(alpha[8],alpha128[0],7); alpha128[0] = (int8x16_t)vshlq_n_s64((int64x2_t)alpha128[K1],8);
alpha128[1] = (int8x16_t)vshlq_n_s64((int64x2_t)alpha128[1+K1],8); alpha128[1] = vsetq_lane_s8(alpha[24],alpha128[0],7); alpha128[0] = vsetq_lane_s8(alpha[8],alpha128[0],7);
alpha128[2] = (int8x16_t)vshlq_n_s64((int64x2_t)alpha128[2+K1],8); alpha128[2] = vsetq_lane_s8(alpha[40],alpha128[0],7); alpha128[1] = (int8x16_t)vshlq_n_s64((int64x2_t)alpha128[1+K1],8);
alpha128[3] = (int8x16_t)vshlq_n_s64((int64x2_t)alpha128[3+K1],8); alpha128[3] = vsetq_lane_s8(alpha[56],alpha128[0],7); alpha128[1] = vsetq_lane_s8(alpha[24],alpha128[0],7);
alpha128[4] = (int8x16_t)vshlq_n_s64((int64x2_t)alpha128[4+K1],8); alpha128[4] = vsetq_lane_s8(alpha[72],alpha128[0],7); alpha128[2] = (int8x16_t)vshlq_n_s64((int64x2_t)alpha128[2+K1],8);
alpha128[5] = (int8x16_t)vshlq_n_s64((int64x2_t)alpha128[5+K1],8); alpha128[5] = vsetq_lane_s8(alpha[88],alpha128[0],7); alpha128[2] = vsetq_lane_s8(alpha[40],alpha128[0],7);
alpha128[6] = (int8x16_t)vshlq_n_s64((int64x2_t)alpha128[6+K1],8); alpha128[6] = vsetq_lane_s8(alpha[104],alpha128[0],7); alpha128[3] = (int8x16_t)vshlq_n_s64((int64x2_t)alpha128[3+K1],8);
alpha128[7] = (int8x16_t)vshlq_n_s64((int64x2_t)alpha128[7+K1],8); alpha128[7] = vsetq_lane_s8(alpha[120],alpha128[0],7); alpha128[3] = vsetq_lane_s8(alpha[56],alpha128[0],7);
alpha128[4] = (int8x16_t)vshlq_n_s64((int64x2_t)alpha128[4+K1],8);
alpha128[4] = vsetq_lane_s8(alpha[72],alpha128[0],7);
alpha128[5] = (int8x16_t)vshlq_n_s64((int64x2_t)alpha128[5+K1],8);
alpha128[5] = vsetq_lane_s8(alpha[88],alpha128[0],7);
alpha128[6] = (int8x16_t)vshlq_n_s64((int64x2_t)alpha128[6+K1],8);
alpha128[6] = vsetq_lane_s8(alpha[104],alpha128[0],7);
alpha128[7] = (int8x16_t)vshlq_n_s64((int64x2_t)alpha128[7+K1],8);
alpha128[7] = vsetq_lane_s8(alpha[120],alpha128[0],7);
alpha[16] = -MAX8/2; alpha[16] = -MAX8/2;
alpha[32] = -MAX8/2; alpha[32] = -MAX8/2;
alpha[48] = -MAX8/2; alpha[48] = -MAX8/2;
...@@ -395,35 +387,28 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh ...@@ -395,35 +387,28 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh
alpha[80] = -MAX8/2; alpha[80] = -MAX8/2;
alpha[96] = -MAX8/2; alpha[96] = -MAX8/2;
alpha[112] = -MAX8/2; alpha[112] = -MAX8/2;
} }
#endif
#endif
} }
void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned short frame_length,unsigned char F,int offset8_flag) void compute_beta8(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,unsigned short frame_length,unsigned char F,int offset8_flag) {
{
int k,rerun_flag, loopval; int k,rerun_flag, loopval;
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
__m128i m11_128,m10_128; __m128i m11_128,m10_128;
__m128i m_b0,m_b1,m_b2,m_b3,m_b4,m_b5,m_b6,m_b7; __m128i m_b0,m_b1,m_b2,m_b3,m_b4,m_b5,m_b6,m_b7;
__m128i new0,new1,new2,new3,new4,new5,new6,new7; __m128i new0,new1,new2,new3,new4,new5,new6,new7;
__m128i *beta128,*alpha128,*beta_ptr; __m128i *beta128,*alpha128,*beta_ptr;
__m128i beta_max; __m128i beta_max;
#elif defined(__arm__) #elif defined(__arm__)
int8x16_t m11_128,m10_128; int8x16_t m11_128,m10_128;
int8x16_t m_b0,m_b1,m_b2,m_b3,m_b4,m_b5,m_b6,m_b7; int8x16_t m_b0,m_b1,m_b2,m_b3,m_b4,m_b5,m_b6,m_b7;
int8x16_t new0,new1,new2,new3,new4,new5,new6,new7; int8x16_t new0,new1,new2,new3,new4,new5,new6,new7;
int8x16_t *beta128,*alpha128,*beta_ptr; int8x16_t *beta128,*alpha128,*beta_ptr;
int8x16_t beta_max; int8x16_t beta_max;
#endif #endif
llr_t beta0,beta1; llr_t beta0,beta1;
llr_t beta2,beta3,beta4,beta5,beta6,beta7; llr_t beta2,beta3,beta4,beta5,beta6,beta7;
if (frame_length > 6144) { if (frame_length > 6144) {
...@@ -433,13 +418,12 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho ...@@ -433,13 +418,12 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho
// we are supposed to run compute_alpha just before compute_beta // we are supposed to run compute_alpha just before compute_beta
// so the initial states of backward computation can be set from last value of alpha states (forward computation) // so the initial states of backward computation can be set from last value of alpha states (forward computation)
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
beta_ptr = (__m128i*)&beta[frame_length<<3]; beta_ptr = (__m128i *)&beta[frame_length<<3];
alpha128 = (__m128i*)&alpha[0]; alpha128 = (__m128i *)&alpha[0];
#elif defined(__arm__) #elif defined(__arm__)
beta_ptr = (int8x16_t*)&beta[frame_length<<3]; beta_ptr = (int8x16_t *)&beta[frame_length<<3];
alpha128 = (int8x16_t*)&alpha[0]; alpha128 = (int8x16_t *)&alpha[0];
#endif #endif
beta_ptr[0] = alpha128[(frame_length>>1)]; beta_ptr[0] = alpha128[(frame_length>>1)];
beta_ptr[1] = alpha128[1+(frame_length>>1)]; beta_ptr[1] = alpha128[1+(frame_length>>1)];
...@@ -449,18 +433,15 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho ...@@ -449,18 +433,15 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho
beta_ptr[5] = alpha128[5+(frame_length>>1)]; beta_ptr[5] = alpha128[5+(frame_length>>1)];
beta_ptr[6] = alpha128[6+(frame_length>>1)]; beta_ptr[6] = alpha128[6+(frame_length>>1)];
beta_ptr[7] = alpha128[7+(frame_length>>1)]; beta_ptr[7] = alpha128[7+(frame_length>>1)];
int overlap = (frame_length>>4)> L ? (frame_length>>4)-L : 0 ; int overlap = (frame_length>>4)> L ? (frame_length>>4)-L : 0 ;
for (rerun_flag=0, loopval=0; for (rerun_flag=0, loopval=0;
rerun_flag<2 ; rerun_flag<2 ;
loopval=overlap,rerun_flag++) { loopval=overlap,rerun_flag++) {
if (offset8_flag==0) { if (offset8_flag==0) {
// FIXME! beta0-beta7 are used uninitialized. FIXME! // FIXME! beta0-beta7 are used uninitialized. FIXME!
// workaround: init with 0 // workaround: init with 0
beta0 = beta1 = beta2 = beta3 = beta4 = beta5 = beta6 = beta7 = 0; beta0 = beta1 = beta2 = beta3 = beta4 = beta5 = beta6 = beta7 = 0;
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
beta_ptr[0] = _mm_insert_epi8(beta_ptr[0],beta0,15); beta_ptr[0] = _mm_insert_epi8(beta_ptr[0],beta0,15);
beta_ptr[1] = _mm_insert_epi8(beta_ptr[1],beta1,15); beta_ptr[1] = _mm_insert_epi8(beta_ptr[1],beta1,15);
...@@ -483,16 +464,17 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho ...@@ -483,16 +464,17 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho
} }
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
beta_ptr = (__m128i*)&beta[frame_length<<3]; beta_ptr = (__m128i *)&beta[frame_length<<3];
#elif defined(__arm__) #elif defined(__arm__)
beta_ptr = (int8x16_t*)&beta[frame_length<<3]; beta_ptr = (int8x16_t *)&beta[frame_length<<3];
#endif #endif
for (k=(frame_length>>4)-1; for (k=(frame_length>>4)-1;
k>=loopval; k>=loopval;
k--) { k--) {
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
m11_128=((__m128i*)m_11)[k]; m11_128=((__m128i *)m_11)[k];
m10_128=((__m128i*)m_10)[k]; m10_128=((__m128i *)m_10)[k];
m_b0 = _mm_adds_epi8(beta_ptr[4],m11_128); //m11 m_b0 = _mm_adds_epi8(beta_ptr[4],m11_128); //m11
m_b1 = _mm_subs_epi8(beta_ptr[4],m11_128); //m00 m_b1 = _mm_subs_epi8(beta_ptr[4],m11_128); //m00
m_b2 = _mm_subs_epi8(beta_ptr[5],m10_128); //m01 m_b2 = _mm_subs_epi8(beta_ptr[5],m10_128); //m01
...@@ -501,7 +483,6 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho ...@@ -501,7 +483,6 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho
m_b5 = _mm_subs_epi8(beta_ptr[6],m10_128); //m01 m_b5 = _mm_subs_epi8(beta_ptr[6],m10_128); //m01
m_b6 = _mm_subs_epi8(beta_ptr[7],m11_128); //m00 m_b6 = _mm_subs_epi8(beta_ptr[7],m11_128); //m00
m_b7 = _mm_adds_epi8(beta_ptr[7],m11_128); //m11 m_b7 = _mm_adds_epi8(beta_ptr[7],m11_128); //m11
new0 = _mm_subs_epi8(beta_ptr[0],m11_128); //m00 new0 = _mm_subs_epi8(beta_ptr[0],m11_128); //m00
new1 = _mm_adds_epi8(beta_ptr[0],m11_128); //m11 new1 = _mm_adds_epi8(beta_ptr[0],m11_128); //m11
new2 = _mm_adds_epi8(beta_ptr[1],m10_128); //m10 new2 = _mm_adds_epi8(beta_ptr[1],m10_128); //m10
...@@ -510,9 +491,7 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho ...@@ -510,9 +491,7 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho
new5 = _mm_adds_epi8(beta_ptr[2],m10_128); //m10 new5 = _mm_adds_epi8(beta_ptr[2],m10_128); //m10
new6 = _mm_adds_epi8(beta_ptr[3],m11_128); //m11 new6 = _mm_adds_epi8(beta_ptr[3],m11_128); //m11
new7 = _mm_subs_epi8(beta_ptr[3],m11_128); //m00 new7 = _mm_subs_epi8(beta_ptr[3],m11_128); //m00
beta_ptr-=8; beta_ptr-=8;
beta_ptr[0] = _mm_max_epi8(m_b0,new0); beta_ptr[0] = _mm_max_epi8(m_b0,new0);
beta_ptr[1] = _mm_max_epi8(m_b1,new1); beta_ptr[1] = _mm_max_epi8(m_b1,new1);
beta_ptr[2] = _mm_max_epi8(m_b2,new2); beta_ptr[2] = _mm_max_epi8(m_b2,new2);
...@@ -521,7 +500,6 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho ...@@ -521,7 +500,6 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho
beta_ptr[5] = _mm_max_epi8(m_b5,new5); beta_ptr[5] = _mm_max_epi8(m_b5,new5);
beta_ptr[6] = _mm_max_epi8(m_b6,new6); beta_ptr[6] = _mm_max_epi8(m_b6,new6);
beta_ptr[7] = _mm_max_epi8(m_b7,new7); beta_ptr[7] = _mm_max_epi8(m_b7,new7);
beta_max = _mm_max_epi8(beta_ptr[0],beta_ptr[1]); beta_max = _mm_max_epi8(beta_ptr[0],beta_ptr[1]);
beta_max = _mm_max_epi8(beta_max ,beta_ptr[2]); beta_max = _mm_max_epi8(beta_max ,beta_ptr[2]);
beta_max = _mm_max_epi8(beta_max ,beta_ptr[3]); beta_max = _mm_max_epi8(beta_max ,beta_ptr[3]);
...@@ -529,7 +507,6 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho ...@@ -529,7 +507,6 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho
beta_max = _mm_max_epi8(beta_max ,beta_ptr[5]); beta_max = _mm_max_epi8(beta_max ,beta_ptr[5]);
beta_max = _mm_max_epi8(beta_max ,beta_ptr[6]); beta_max = _mm_max_epi8(beta_max ,beta_ptr[6]);
beta_max = _mm_max_epi8(beta_max ,beta_ptr[7]); beta_max = _mm_max_epi8(beta_max ,beta_ptr[7]);
beta_ptr[0] = _mm_subs_epi8(beta_ptr[0],beta_max); beta_ptr[0] = _mm_subs_epi8(beta_ptr[0],beta_max);
beta_ptr[1] = _mm_subs_epi8(beta_ptr[1],beta_max); beta_ptr[1] = _mm_subs_epi8(beta_ptr[1],beta_max);
beta_ptr[2] = _mm_subs_epi8(beta_ptr[2],beta_max); beta_ptr[2] = _mm_subs_epi8(beta_ptr[2],beta_max);
...@@ -539,8 +516,8 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho ...@@ -539,8 +516,8 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho
beta_ptr[6] = _mm_subs_epi8(beta_ptr[6],beta_max); beta_ptr[6] = _mm_subs_epi8(beta_ptr[6],beta_max);
beta_ptr[7] = _mm_subs_epi8(beta_ptr[7],beta_max); beta_ptr[7] = _mm_subs_epi8(beta_ptr[7],beta_max);
#elif defined(__arm__) #elif defined(__arm__)
m11_128=((int8x16_t*)m_11)[k]; m11_128=((int8x16_t *)m_11)[k];
m10_128=((int8x16_t*)m_10)[k]; m10_128=((int8x16_t *)m_10)[k];
m_b0 = vqaddq_s8(beta_ptr[4],m11_128); //m11 m_b0 = vqaddq_s8(beta_ptr[4],m11_128); //m11
m_b1 = vqsubq_s8(beta_ptr[4],m11_128); //m00 m_b1 = vqsubq_s8(beta_ptr[4],m11_128); //m00
m_b2 = vqsubq_s8(beta_ptr[5],m10_128); //m01 m_b2 = vqsubq_s8(beta_ptr[5],m10_128); //m01
...@@ -549,7 +526,6 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho ...@@ -549,7 +526,6 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho
m_b5 = vqsubq_s8(beta_ptr[6],m10_128); //m01 m_b5 = vqsubq_s8(beta_ptr[6],m10_128); //m01
m_b6 = vqsubq_s8(beta_ptr[7],m11_128); //m00 m_b6 = vqsubq_s8(beta_ptr[7],m11_128); //m00
m_b7 = vqaddq_s8(beta_ptr[7],m11_128); //m11 m_b7 = vqaddq_s8(beta_ptr[7],m11_128); //m11
new0 = vqsubq_s8(beta_ptr[0],m11_128); //m00 new0 = vqsubq_s8(beta_ptr[0],m11_128); //m00
new1 = vqaddq_s8(beta_ptr[0],m11_128); //m11 new1 = vqaddq_s8(beta_ptr[0],m11_128); //m11
new2 = vqaddq_s8(beta_ptr[1],m10_128); //m10 new2 = vqaddq_s8(beta_ptr[1],m10_128); //m10
...@@ -558,9 +534,7 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho ...@@ -558,9 +534,7 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho
new5 = vqaddq_s8(beta_ptr[2],m10_128); //m10 new5 = vqaddq_s8(beta_ptr[2],m10_128); //m10
new6 = vqaddq_s8(beta_ptr[3],m11_128); //m11 new6 = vqaddq_s8(beta_ptr[3],m11_128); //m11
new7 = vqsubq_s8(beta_ptr[3],m11_128); //m00 new7 = vqsubq_s8(beta_ptr[3],m11_128); //m00
beta_ptr-=8; beta_ptr-=8;
beta_ptr[0] = vmaxq_s8(m_b0,new0); beta_ptr[0] = vmaxq_s8(m_b0,new0);
beta_ptr[1] = vmaxq_s8(m_b1,new1); beta_ptr[1] = vmaxq_s8(m_b1,new1);
beta_ptr[2] = vmaxq_s8(m_b2,new2); beta_ptr[2] = vmaxq_s8(m_b2,new2);
...@@ -569,7 +543,6 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho ...@@ -569,7 +543,6 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho
beta_ptr[5] = vmaxq_s8(m_b5,new5); beta_ptr[5] = vmaxq_s8(m_b5,new5);
beta_ptr[6] = vmaxq_s8(m_b6,new6); beta_ptr[6] = vmaxq_s8(m_b6,new6);
beta_ptr[7] = vmaxq_s8(m_b7,new7); beta_ptr[7] = vmaxq_s8(m_b7,new7);
beta_max = vmaxq_s8(beta_ptr[0],beta_ptr[1]); beta_max = vmaxq_s8(beta_ptr[0],beta_ptr[1]);
beta_max = vmaxq_s8(beta_max ,beta_ptr[2]); beta_max = vmaxq_s8(beta_max ,beta_ptr[2]);
beta_max = vmaxq_s8(beta_max ,beta_ptr[3]); beta_max = vmaxq_s8(beta_max ,beta_ptr[3]);
...@@ -577,7 +550,6 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho ...@@ -577,7 +550,6 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho
beta_max = vmaxq_s8(beta_max ,beta_ptr[5]); beta_max = vmaxq_s8(beta_max ,beta_ptr[5]);
beta_max = vmaxq_s8(beta_max ,beta_ptr[6]); beta_max = vmaxq_s8(beta_max ,beta_ptr[6]);
beta_max = vmaxq_s8(beta_max ,beta_ptr[7]); beta_max = vmaxq_s8(beta_max ,beta_ptr[7]);
beta_ptr[0] = vqsubq_s8(beta_ptr[0],beta_max); beta_ptr[0] = vqsubq_s8(beta_ptr[0],beta_max);
beta_ptr[1] = vqsubq_s8(beta_ptr[1],beta_max); beta_ptr[1] = vqsubq_s8(beta_ptr[1],beta_max);
beta_ptr[2] = vqsubq_s8(beta_ptr[2],beta_max); beta_ptr[2] = vqsubq_s8(beta_ptr[2],beta_max);
...@@ -592,10 +564,9 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho ...@@ -592,10 +564,9 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho
// Set intial state for next iteration from the last state // Set intial state for next iteration from the last state
// as column last states are the first states of the next column // as column last states are the first states of the next column
// The initial state of column 0 is coming from tail bits (to be computed) // The initial state of column 0 is coming from tail bits (to be computed)
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
beta128 = (__m128i*)&beta[0]; beta128 = (__m128i *)&beta[0];
beta_ptr = (__m128i*)&beta[frame_length<<3]; beta_ptr = (__m128i *)&beta[frame_length<<3];
beta_ptr[0] = _mm_srli_si128(beta128[0],1); beta_ptr[0] = _mm_srli_si128(beta128[0],1);
beta_ptr[1] = _mm_srli_si128(beta128[1],1); beta_ptr[1] = _mm_srli_si128(beta128[1],1);
beta_ptr[2] = _mm_srli_si128(beta128[2],1); beta_ptr[2] = _mm_srli_si128(beta128[2],1);
...@@ -605,23 +576,29 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho ...@@ -605,23 +576,29 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho
beta_ptr[6] = _mm_srli_si128(beta128[6],1); beta_ptr[6] = _mm_srli_si128(beta128[6],1);
beta_ptr[7] = _mm_srli_si128(beta128[7],1); beta_ptr[7] = _mm_srli_si128(beta128[7],1);
#elif defined(__arm__) #elif defined(__arm__)
beta128 = (int8x16_t*)&beta[0]; beta128 = (int8x16_t *)&beta[0];
beta_ptr = (int8x16_t*)&beta[frame_length<<3]; beta_ptr = (int8x16_t *)&beta[frame_length<<3];
beta_ptr[0] = (int8x16_t)vshrq_n_s64((int64x2_t)beta128[0],8); beta_ptr[0] = vsetq_lane_s8(beta[7],beta_ptr[0],8); beta_ptr[0] = (int8x16_t)vshrq_n_s64((int64x2_t)beta128[0],8);
beta_ptr[1] = (int8x16_t)vshrq_n_s64((int64x2_t)beta128[1],8); beta_ptr[1] = vsetq_lane_s8(beta[23],beta_ptr[1],8); beta_ptr[0] = vsetq_lane_s8(beta[7],beta_ptr[0],8);
beta_ptr[2] = (int8x16_t)vshrq_n_s64((int64x2_t)beta128[2],8); beta_ptr[2] = vsetq_lane_s8(beta[39],beta_ptr[2],8); beta_ptr[1] = (int8x16_t)vshrq_n_s64((int64x2_t)beta128[1],8);
beta_ptr[3] = (int8x16_t)vshrq_n_s64((int64x2_t)beta128[3],8); beta_ptr[3] = vsetq_lane_s8(beta[55],beta_ptr[3],8); beta_ptr[1] = vsetq_lane_s8(beta[23],beta_ptr[1],8);
beta_ptr[4] = (int8x16_t)vshrq_n_s64((int64x2_t)beta128[4],8); beta_ptr[4] = vsetq_lane_s8(beta[71],beta_ptr[4],8); beta_ptr[2] = (int8x16_t)vshrq_n_s64((int64x2_t)beta128[2],8);
beta_ptr[5] = (int8x16_t)vshrq_n_s64((int64x2_t)beta128[5],8); beta_ptr[5] = vsetq_lane_s8(beta[87],beta_ptr[5],8); beta_ptr[2] = vsetq_lane_s8(beta[39],beta_ptr[2],8);
beta_ptr[6] = (int8x16_t)vshrq_n_s64((int64x2_t)beta128[6],8); beta_ptr[6] = vsetq_lane_s8(beta[103],beta_ptr[6],8); beta_ptr[3] = (int8x16_t)vshrq_n_s64((int64x2_t)beta128[3],8);
beta_ptr[7] = (int8x16_t)vshrq_n_s64((int64x2_t)beta128[7],8); beta_ptr[7] = vsetq_lane_s8(beta[119],beta_ptr[7],8); beta_ptr[3] = vsetq_lane_s8(beta[55],beta_ptr[3],8);
beta_ptr[4] = (int8x16_t)vshrq_n_s64((int64x2_t)beta128[4],8);
beta_ptr[4] = vsetq_lane_s8(beta[71],beta_ptr[4],8);
beta_ptr[5] = (int8x16_t)vshrq_n_s64((int64x2_t)beta128[5],8);
beta_ptr[5] = vsetq_lane_s8(beta[87],beta_ptr[5],8);
beta_ptr[6] = (int8x16_t)vshrq_n_s64((int64x2_t)beta128[6],8);
beta_ptr[6] = vsetq_lane_s8(beta[103],beta_ptr[6],8);
beta_ptr[7] = (int8x16_t)vshrq_n_s64((int64x2_t)beta128[7],8);
beta_ptr[7] = vsetq_lane_s8(beta[119],beta_ptr[7],8);
#endif #endif
} }
} }
void compute_ext8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, llr_t* systematic,unsigned short frame_length) void compute_ext8(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,llr_t *ext, llr_t *systematic,unsigned short frame_length) {
{
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
__m128i *alpha128=(__m128i *)alpha; __m128i *alpha128=(__m128i *)alpha;
__m128i *beta128=(__m128i *)beta; __m128i *beta128=(__m128i *)beta;
...@@ -642,27 +619,20 @@ void compute_ext8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, l ...@@ -642,27 +619,20 @@ void compute_ext8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, l
int8x16_t m11_1,m11_2,m11_3,m11_4; int8x16_t m11_1,m11_2,m11_3,m11_4;
#endif #endif
int k; int k;
// //
// LLR computation, 8 consequtive bits per loop // LLR computation, 8 consequtive bits per loop
// //
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
printf("compute_ext, %p, %p, %p, %p, %p, %p ,framelength %d\n",alpha,beta,m_11,m_10,ext,systematic,frame_length); printf("compute_ext, %p, %p, %p, %p, %p, %p ,framelength %d\n",alpha,beta,m_11,m_10,ext,systematic,frame_length);
#endif #endif
alpha_ptr = alpha128; alpha_ptr = alpha128;
beta_ptr = &beta128[8]; beta_ptr = &beta128[8];
for (k=0; k<(frame_length>>4); k++) { for (k=0; k<(frame_length>>4); k++) {
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
m11_128 = (__m128i *)&m_11[k<<4];
m11_128 = (__m128i*)&m_11[k<<4]; m10_128 = (__m128i *)&m_10[k<<4];
m10_128 = (__m128i*)&m_10[k<<4]; ext_128 = (__m128i *)&ext[k<<4];
ext_128 = (__m128i*)&ext[k<<4];
m00_4 = _mm_adds_epi8(alpha_ptr[7],beta_ptr[3]); //ALPHA_BETA_4m00; m00_4 = _mm_adds_epi8(alpha_ptr[7],beta_ptr[3]); //ALPHA_BETA_4m00;
m11_4 = _mm_adds_epi8(alpha_ptr[7],beta_ptr[7]); //ALPHA_BETA_4m11; m11_4 = _mm_adds_epi8(alpha_ptr[7],beta_ptr[7]); //ALPHA_BETA_4m11;
m00_3 = _mm_adds_epi8(alpha_ptr[6],beta_ptr[7]); //ALPHA_BETA_3m00; m00_3 = _mm_adds_epi8(alpha_ptr[6],beta_ptr[7]); //ALPHA_BETA_3m00;
...@@ -679,7 +649,6 @@ void compute_ext8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, l ...@@ -679,7 +649,6 @@ void compute_ext8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, l
m10_2 = _mm_adds_epi8(alpha_ptr[3],beta_ptr[5]); //ALPHA_BETA_2m10; m10_2 = _mm_adds_epi8(alpha_ptr[3],beta_ptr[5]); //ALPHA_BETA_2m10;
m10_1 = _mm_adds_epi8(alpha_ptr[2],beta_ptr[1]); //ALPHA_BETA_1m10; m10_1 = _mm_adds_epi8(alpha_ptr[2],beta_ptr[1]); //ALPHA_BETA_1m10;
m01_1 = _mm_adds_epi8(alpha_ptr[2],beta_ptr[5]); //ALPHA_BETA_1m01; m01_1 = _mm_adds_epi8(alpha_ptr[2],beta_ptr[5]); //ALPHA_BETA_1m01;
m01_1 = _mm_max_epi8(m01_1,m01_2); m01_1 = _mm_max_epi8(m01_1,m01_2);
m01_1 = _mm_max_epi8(m01_1,m01_3); m01_1 = _mm_max_epi8(m01_1,m01_3);
m01_1 = _mm_max_epi8(m01_1,m01_4); m01_1 = _mm_max_epi8(m01_1,m01_4);
...@@ -692,28 +661,19 @@ void compute_ext8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, l ...@@ -692,28 +661,19 @@ void compute_ext8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, l
m11_1 = _mm_max_epi8(m11_1,m11_2); m11_1 = _mm_max_epi8(m11_1,m11_2);
m11_1 = _mm_max_epi8(m11_1,m11_3); m11_1 = _mm_max_epi8(m11_1,m11_3);
m11_1 = _mm_max_epi8(m11_1,m11_4); m11_1 = _mm_max_epi8(m11_1,m11_4);
m01_1 = _mm_subs_epi8(m01_1,*m10_128); m01_1 = _mm_subs_epi8(m01_1,*m10_128);
m00_1 = _mm_subs_epi8(m00_1,*m11_128); m00_1 = _mm_subs_epi8(m00_1,*m11_128);
m10_1 = _mm_adds_epi8(m10_1,*m10_128); m10_1 = _mm_adds_epi8(m10_1,*m10_128);
m11_1 = _mm_adds_epi8(m11_1,*m11_128); m11_1 = _mm_adds_epi8(m11_1,*m11_128);
m01_1 = _mm_max_epi8(m01_1,m00_1); m01_1 = _mm_max_epi8(m01_1,m00_1);
m10_1 = _mm_max_epi8(m10_1,m11_1); m10_1 = _mm_max_epi8(m10_1,m11_1);
*ext_128 = _mm_subs_epi8(m10_1,m01_1); *ext_128 = _mm_subs_epi8(m10_1,m01_1);
alpha_ptr+=8; alpha_ptr+=8;
beta_ptr+=8; beta_ptr+=8;
#elif defined(__arm__) #elif defined(__arm__)
m11_128 = (int8x16_t *)&m_11[k<<4];
m11_128 = (int8x16_t*)&m_11[k<<4]; m10_128 = (int8x16_t *)&m_10[k<<4];
m10_128 = (int8x16_t*)&m_10[k<<4]; ext_128 = (int8x16_t *)&ext[k<<4];
ext_128 = (int8x16_t*)&ext[k<<4];
m00_4 = vqaddq_s8(alpha_ptr[7],beta_ptr[3]); //ALPHA_BETA_4m00; m00_4 = vqaddq_s8(alpha_ptr[7],beta_ptr[3]); //ALPHA_BETA_4m00;
m11_4 = vqaddq_s8(alpha_ptr[7],beta_ptr[7]); //ALPHA_BETA_4m11; m11_4 = vqaddq_s8(alpha_ptr[7],beta_ptr[7]); //ALPHA_BETA_4m11;
m00_3 = vqaddq_s8(alpha_ptr[6],beta_ptr[7]); //ALPHA_BETA_3m00; m00_3 = vqaddq_s8(alpha_ptr[6],beta_ptr[7]); //ALPHA_BETA_3m00;
...@@ -730,7 +690,6 @@ void compute_ext8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, l ...@@ -730,7 +690,6 @@ void compute_ext8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, l
m10_2 = vqaddq_s8(alpha_ptr[3],beta_ptr[5]); //ALPHA_BETA_2m10; m10_2 = vqaddq_s8(alpha_ptr[3],beta_ptr[5]); //ALPHA_BETA_2m10;
m10_1 = vqaddq_s8(alpha_ptr[2],beta_ptr[1]); //ALPHA_BETA_1m10; m10_1 = vqaddq_s8(alpha_ptr[2],beta_ptr[1]); //ALPHA_BETA_1m10;
m01_1 = vqaddq_s8(alpha_ptr[2],beta_ptr[5]); //ALPHA_BETA_1m01; m01_1 = vqaddq_s8(alpha_ptr[2],beta_ptr[5]); //ALPHA_BETA_1m01;
m01_1 = vmaxq_s8(m01_1,m01_2); m01_1 = vmaxq_s8(m01_1,m01_2);
m01_1 = vmaxq_s8(m01_1,m01_3); m01_1 = vmaxq_s8(m01_1,m01_3);
m01_1 = vmaxq_s8(m01_1,m01_4); m01_1 = vmaxq_s8(m01_1,m01_4);
...@@ -743,27 +702,17 @@ void compute_ext8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, l ...@@ -743,27 +702,17 @@ void compute_ext8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, l
m11_1 = vmaxq_s8(m11_1,m11_2); m11_1 = vmaxq_s8(m11_1,m11_2);
m11_1 = vmaxq_s8(m11_1,m11_3); m11_1 = vmaxq_s8(m11_1,m11_3);
m11_1 = vmaxq_s8(m11_1,m11_4); m11_1 = vmaxq_s8(m11_1,m11_4);
m01_1 = vqsubq_s8(m01_1,*m10_128); m01_1 = vqsubq_s8(m01_1,*m10_128);
m00_1 = vqsubq_s8(m00_1,*m11_128); m00_1 = vqsubq_s8(m00_1,*m11_128);
m10_1 = vqaddq_s8(m10_1,*m10_128); m10_1 = vqaddq_s8(m10_1,*m10_128);
m11_1 = vqaddq_s8(m11_1,*m11_128); m11_1 = vqaddq_s8(m11_1,*m11_128);
m01_1 = vmaxq_s8(m01_1,m00_1); m01_1 = vmaxq_s8(m01_1,m00_1);
m10_1 = vmaxq_s8(m10_1,m11_1); m10_1 = vmaxq_s8(m10_1,m11_1);
*ext_128 = vqsubq_s8(m10_1,m01_1); *ext_128 = vqsubq_s8(m10_1,m01_1);
alpha_ptr+=8; alpha_ptr+=8;
beta_ptr+=8; beta_ptr+=8;
#endif #endif
} }
} }
...@@ -771,8 +720,7 @@ void compute_ext8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, l ...@@ -771,8 +720,7 @@ void compute_ext8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, l
//int pi2[n],pi3[n+8],pi5[n+8],pi4[n+8],pi6[n+8], //int pi2[n],pi3[n+8],pi5[n+8],pi4[n+8],pi6[n+8],
int *pi2tab8[188],*pi5tab8[188],*pi4tab8[188],*pi6tab8[188]; int *pi2tab8[188],*pi5tab8[188],*pi4tab8[188],*pi6tab8[188];
void free_td8(void) void free_td8(void) {
{
int ind; int ind;
for (ind=0; ind<188; ind++) { for (ind=0; ind<188; ind++) {
...@@ -787,14 +735,11 @@ void free_td8(void) ...@@ -787,14 +735,11 @@ void free_td8(void)
extern RAN_CONTEXT_t RC; extern RAN_CONTEXT_t RC;
void init_td8(void) void init_td8(void) {
{
int ind,i,j,n,n2,pi,pi3; int ind,i,j,n,n2,pi,pi3;
short * base_interleaver; short *base_interleaver;
for (ind=0; ind<188; ind++) { for (ind=0; ind<188; ind++) {
n = f1f2mat[ind].nb_bits; n = f1f2mat[ind].nb_bits;
base_interleaver=il_tb+f1f2mat[ind].beg_index; base_interleaver=il_tb+f1f2mat[ind].beg_index;
#ifdef MEX #ifdef MEX
...@@ -816,68 +761,57 @@ void init_td8(void) ...@@ -816,68 +761,57 @@ void init_td8(void)
n2 = n; n2 = n;
for (j=0,i=0; i<n2; i++,j+=16) { for (j=0,i=0; i<n2; i++,j+=16) {
if (j>=n2) if (j>=n2)
j-=(n2-1); j-=(n2-1);
pi2tab8[ind][i] = j; pi2tab8[ind][i] = j;
// printf("pi2[%d] = %d\n",i,j); // printf("pi2[%d] = %d\n",i,j);
} }
for (i=0; i<n2; i++) { for (i=0; i<n2; i++) {
pi = base_interleaver[i];//(unsigned int)threegpplte_interleaver(f1,f2,n); pi = base_interleaver[i];//(unsigned int)threegpplte_interleaver(f1,f2,n);
pi3 = pi2tab8[ind][pi]; pi3 = pi2tab8[ind][pi];
pi4tab8[ind][pi2tab8[ind][i]] = pi3; pi4tab8[ind][pi2tab8[ind][i]] = pi3;
pi5tab8[ind][pi3] = pi2tab8[ind][i]; pi5tab8[ind][pi3] = pi2tab8[ind][i];
pi6tab8[ind][pi] = pi2tab8[ind][i]; pi6tab8[ind][pi] = pi2tab8[ind][i];
} }
} }
} }
uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
int16_t *y2, int16_t *y2,
uint8_t *decoded_bytes, uint8_t *decoded_bytes,
uint8_t *decoded_bytes2, uint8_t *decoded_bytes2,
uint16_t n, uint16_t n,
uint8_t max_iterations, uint8_t max_iterations,
uint8_t crc_type, uint8_t crc_type,
uint8_t F, uint8_t F,
time_stats_t *init_stats, time_stats_t *init_stats,
time_stats_t *alpha_stats, time_stats_t *alpha_stats,
time_stats_t *beta_stats, time_stats_t *beta_stats,
time_stats_t *gamma_stats, time_stats_t *gamma_stats,
time_stats_t *ext_stats, time_stats_t *ext_stats,
time_stats_t *intl1_stats, time_stats_t *intl1_stats,
time_stats_t *intl2_stats) { time_stats_t *intl2_stats) {
/* y is a pointer to the input /* y is a pointer to the input
decoded_bytes is a pointer to the decoded output decoded_bytes is a pointer to the decoded output
n is the size in bits of the coded block, with the tail */ n is the size in bits of the coded block, with the tail */
int n2; int n2;
llr_t y8[3*(n+16)] __attribute__((aligned(16))); llr_t y8[3*(n+16)] __attribute__((aligned(16)));
llr_t systematic0[n+16] __attribute__ ((aligned(16))); llr_t systematic0[n+16] __attribute__ ((aligned(16)));
llr_t systematic1[n+16] __attribute__ ((aligned(16))); llr_t systematic1[n+16] __attribute__ ((aligned(16)));
llr_t systematic2[n+16] __attribute__ ((aligned(16))); llr_t systematic2[n+16] __attribute__ ((aligned(16)));
llr_t yparity1[n+16] __attribute__ ((aligned(16))); llr_t yparity1[n+16] __attribute__ ((aligned(16)));
llr_t yparity2[n+16] __attribute__ ((aligned(16))); llr_t yparity2[n+16] __attribute__ ((aligned(16)));
llr_t ext[n+128] __attribute__((aligned(16))); llr_t ext[n+128] __attribute__((aligned(16)));
llr_t ext2[n+128] __attribute__((aligned(16))); llr_t ext2[n+128] __attribute__((aligned(16)));
llr_t alpha[(n+16)*8] __attribute__ ((aligned(16))); llr_t alpha[(n+16)*8] __attribute__ ((aligned(16)));
llr_t beta[(n+16)*8] __attribute__ ((aligned(16))); llr_t beta[(n+16)*8] __attribute__ ((aligned(16)));
llr_t m11[n+16] __attribute__ ((aligned(16))); llr_t m11[n+16] __attribute__ ((aligned(16)));
llr_t m10[n+16] __attribute__ ((aligned(16))); llr_t m10[n+16] __attribute__ ((aligned(16)));
// int *pi2_p,*pi4_p,*pi5_p,*pi6_p; // int *pi2_p,*pi4_p,*pi5_p,*pi6_p;
int *pi4_p,*pi5_p,*pi6_p; int *pi4_p,*pi5_p,*pi6_p;
llr_t *s,*s1,*s2,*yp1,*yp2,*yp; llr_t *s,*s1,*s2,*yp1,*yp2,*yp;
unsigned int i,j,iind;//,pi; unsigned int i,j,iind;//,pi;
unsigned char iteration_cnt=0; unsigned char iteration_cnt=0;
unsigned int crc,oldcrc,crc_len; unsigned int crc,oldcrc,crc_len;
...@@ -890,13 +824,11 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, ...@@ -890,13 +824,11 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
int8x16_t *yp128; int8x16_t *yp128;
int8x16_t tmp128[(n+8)>>3]; int8x16_t tmp128[(n+8)>>3];
int8x16_t tmp, zeros=vdupq_n_s8(0); int8x16_t tmp, zeros=vdupq_n_s8(0);
const uint8_t __attribute__ ((aligned (16))) _Powers[16]= const uint8_t __attribute__ ((aligned (16))) _Powers[16]=
{ 1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128 }; { 1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128 };
// Set the powers of 2 (do it once for all, if applicable) // Set the powers of 2 (do it once for all, if applicable)
uint8x16_t Powers= vld1q_u8(_Powers); uint8x16_t Powers= vld1q_u8(_Powers);
#endif #endif
int offset8_flag=0; int offset8_flag=0;
if (crc_type > 3) { if (crc_type > 3) {
...@@ -904,17 +836,14 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, ...@@ -904,17 +836,14 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
return 255; return 255;
} }
if (init_stats) start_meas(init_stats); if (init_stats) start_meas(init_stats);
if ((n&15)>0) { if ((n&15)>0) {
n2 = n+8; n2 = n+8;
offset8_flag=1; offset8_flag=1;
} else } else
n2 = n; n2 = n;
for (iind=0; iind < 188 && f1f2mat[iind].nb_bits != n; iind++); for (iind=0; iind < 188 && f1f2mat[iind].nb_bits != n; iind++);
if ( iind == 188 ) { if ( iind == 188 ) {
...@@ -923,31 +852,30 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, ...@@ -923,31 +852,30 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
} }
switch (crc_type) { switch (crc_type) {
case CRC24_A: case CRC24_A:
case CRC24_B: case CRC24_B:
crc_len=3; crc_len=3;
break; break;
case CRC16: case CRC16:
crc_len=2; crc_len=2;
break; break;
case CRC8: case CRC8:
crc_len=1; crc_len=1;
break; break;
default: default:
crc_len=3; crc_len=3;
} }
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
// note: this makes valgrind freak // note: this makes valgrind freak
__m128i avg=_mm_set1_epi32(0); __m128i avg=_mm_set1_epi32(0);
for (i=0; i<(3*(n>>4))+1; i++) { for (i=0; i<(3*(n>>4))+1; i++) {
__m128i tmp=_mm_abs_epi16(_mm_unpackhi_epi16(((__m128i*)y)[i],((__m128i*)y)[i])); __m128i tmp=_mm_abs_epi16(_mm_unpackhi_epi16(((__m128i *)y)[i],((__m128i *)y)[i]));
avg=_mm_add_epi32(_mm_cvtepi16_epi32(_mm_abs_epi16(((__m128i*)y)[i])),avg); avg=_mm_add_epi32(_mm_cvtepi16_epi32(_mm_abs_epi16(((__m128i *)y)[i])),avg);
avg=_mm_add_epi32(_mm_cvtepi16_epi32(tmp),avg); avg=_mm_add_epi32(_mm_cvtepi16_epi32(tmp),avg);
} }
...@@ -971,15 +899,13 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, ...@@ -971,15 +899,13 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
for (i=0,j=0; i<(3*(n2>>4))+1; i++,j+=2) for (i=0,j=0; i<(3*(n2>>4))+1; i++,j+=2)
((__m128i *)y8)[i] = _mm_packs_epi16(_mm_srai_epi16(((__m128i *)y)[j],3),_mm_srai_epi16(((__m128i *)y)[j+1],4)); ((__m128i *)y8)[i] = _mm_packs_epi16(_mm_srai_epi16(((__m128i *)y)[j],3),_mm_srai_epi16(((__m128i *)y)[j+1],4));
yp128 = (__m128i*)y8; yp128 = (__m128i *)y8;
#elif defined(__arm__) #elif defined(__arm__)
int32x4_t avg=vdupq_n_s32(0); int32x4_t avg=vdupq_n_s32(0);
for (i=0; i<(3*(n>>4))+1; i++) { for (i=0; i<(3*(n>>4))+1; i++) {
int16x8_t tmp=vabsq_s16(((int16x8_t*)y)[i]); int16x8_t tmp=vabsq_s16(((int16x8_t *)y)[i]);
avg = vqaddq_s32(avg,vaddl_s16(((int16x4_t*)&tmp)[0],((int16x4_t*)&tmp)[1])); avg = vqaddq_s32(avg,vaddl_s16(((int16x4_t *)&tmp)[0],((int16x4_t *)&tmp)[1]));
} }
int32_t round_avg=(vgetq_lane_s32(avg,0)+vgetq_lane_s32(avg,1)+vgetq_lane_s32(avg,2)+vgetq_lane_s32(avg,3))/(n*3); int32_t round_avg=(vgetq_lane_s32(avg,0)+vgetq_lane_s32(avg,1)+vgetq_lane_s32(avg,2)+vgetq_lane_s32(avg,3))/(n*3);
...@@ -999,10 +925,8 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, ...@@ -999,10 +925,8 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
for (i=0,j=0; i<(3*(n2>>3))+1; i++,j+=2) for (i=0,j=0; i<(3*(n2>>3))+1; i++,j+=2)
((int8x8_t *)y8)[i] = vqmovn_s16(vshrq_n_s16(((int16x8_t *)y)[j],3)); ((int8x8_t *)y8)[i] = vqmovn_s16(vshrq_n_s16(((int16x8_t *)y)[j],3));
yp128 = (int8x16_t*)y8; yp128 = (int8x16_t *)y8;
#endif #endif
s = systematic0; s = systematic0;
s1 = systematic1; s1 = systematic1;
s2 = systematic2; s2 = systematic2;
...@@ -1020,8 +944,7 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, ...@@ -1020,8 +944,7 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
} }
#endif #endif
yp=(llr_t *)yp128;
yp=(llr_t*)yp128;
if (n2>n) { if (n2>n) {
/* /*
...@@ -1031,7 +954,7 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, ...@@ -1031,7 +954,7 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
s1[n+4]=0;s1[n+5]=0;s1[n+6]=0;s1[n+7]=0; s1[n+4]=0;s1[n+5]=0;s1[n+6]=0;s1[n+7]=0;
s2[n]=0;s2[n+1]=0;s2[n+2]=0;s2[n+3]=0; s2[n]=0;s2[n+1]=0;s2[n+2]=0;s2[n+3]=0;
s2[n+4]=0;s2[n+5]=0;s2[n+6]=0;s2[n+7]=0;*/ s2[n+4]=0;s2[n+5]=0;s2[n+6]=0;s2[n+7]=0;*/
yp=(llr_t*)(y8+n); yp=(llr_t *)(y8+n);
} }
// printf("n=%d,n2=%d\n",n,n2); // printf("n=%d,n2=%d\n",n,n2);
...@@ -1045,7 +968,7 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, ...@@ -1045,7 +968,7 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
yp1[i] = *yp; yp1[i] = *yp;
yp++; yp++;
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
printf("Term 1 (%d): %d %d\n",i,s[i],yp1[i]); printf("Term 1 (%u): %d %d\n",i,s[i],yp1[i]);
#endif //DEBUG_LOGMAP #endif //DEBUG_LOGMAP
} }
...@@ -1057,7 +980,7 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, ...@@ -1057,7 +980,7 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
yp2[i-16] = *yp; yp2[i-16] = *yp;
yp++; yp++;
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
printf("Term 2 (%d): %d %d\n",i-16,s[i],yp2[i-16]); printf("Term 2 (%u): %d %d\n",i-16,s[i],yp2[i-16]);
#endif //DEBUG_LOGMAP #endif //DEBUG_LOGMAP
} }
...@@ -1068,63 +991,59 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, ...@@ -1068,63 +991,59 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
if (init_stats) stop_meas(init_stats); if (init_stats) stop_meas(init_stats);
// do log_map from first parity bit // do log_map from first parity bit
log_map8(systematic0,yparity1,m11,m10,alpha,beta,ext,n2,0,F,offset8_flag,alpha_stats,beta_stats,gamma_stats,ext_stats); log_map8(systematic0,yparity1,m11,m10,alpha,beta,ext,n2,0,F,offset8_flag,alpha_stats,beta_stats,gamma_stats,ext_stats);
while (iteration_cnt++ < max_iterations) {
while (iteration_cnt++ < max_iterations) {
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
printf("\n*******************ITERATION %d (n %d, n2 %d), ext %p\n\n",iteration_cnt,n,n2,ext); printf("\n*******************ITERATION %d (n %d, n2 %d), ext %p\n\n",iteration_cnt,n,n2,ext);
#endif //DEBUG_LOGMAP #endif //DEBUG_LOGMAP
if (intl1_stats) start_meas(intl1_stats); if (intl1_stats) start_meas(intl1_stats);
pi4_p=pi4tab8[iind]; pi4_p=pi4tab8[iind];
for (i=0; i<(n2>>4); i++) { // steady-state portion for (i=0; i<(n2>>4); i++) { // steady-state portion
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],0); tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],0);
tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],1); tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],1);
tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],2); tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],2);
tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],3); tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],3);
tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],4); tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],4);
tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],5); tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],5);
tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],6); tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],6);
tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],7); tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],7);
tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],8); tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],8);
tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],9); tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],9);
tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],10); tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],10);
tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],11); tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],11);
tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],12); tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],12);
tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],13); tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],13);
tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],14); tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],14);
((__m128i *)systematic2)[i]=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],15); ((__m128i *)systematic2)[i]=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],15);
#elif defined(__arm__) #elif defined(__arm__)
tmp=vsetq_lane_s8(((llr_t*)ext)[*pi4_p++],tmp,0); tmp=vsetq_lane_s8(((llr_t *)ext)[*pi4_p++],tmp,0);
tmp=vsetq_lane_s8(((llr_t*)ext)[*pi4_p++],tmp,1); tmp=vsetq_lane_s8(((llr_t *)ext)[*pi4_p++],tmp,1);
tmp=vsetq_lane_s8(((llr_t*)ext)[*pi4_p++],tmp,2); tmp=vsetq_lane_s8(((llr_t *)ext)[*pi4_p++],tmp,2);
tmp=vsetq_lane_s8(((llr_t*)ext)[*pi4_p++],tmp,3); tmp=vsetq_lane_s8(((llr_t *)ext)[*pi4_p++],tmp,3);
tmp=vsetq_lane_s8(((llr_t*)ext)[*pi4_p++],tmp,4); tmp=vsetq_lane_s8(((llr_t *)ext)[*pi4_p++],tmp,4);
tmp=vsetq_lane_s8(((llr_t*)ext)[*pi4_p++],tmp,5); tmp=vsetq_lane_s8(((llr_t *)ext)[*pi4_p++],tmp,5);
tmp=vsetq_lane_s8(((llr_t*)ext)[*pi4_p++],tmp,6); tmp=vsetq_lane_s8(((llr_t *)ext)[*pi4_p++],tmp,6);
tmp=vsetq_lane_s8(((llr_t*)ext)[*pi4_p++],tmp,7); tmp=vsetq_lane_s8(((llr_t *)ext)[*pi4_p++],tmp,7);
tmp=vsetq_lane_s8(((llr_t*)ext)[*pi4_p++],tmp,8); tmp=vsetq_lane_s8(((llr_t *)ext)[*pi4_p++],tmp,8);
tmp=vsetq_lane_s8(((llr_t*)ext)[*pi4_p++],tmp,9); tmp=vsetq_lane_s8(((llr_t *)ext)[*pi4_p++],tmp,9);
tmp=vsetq_lane_s8(((llr_t*)ext)[*pi4_p++],tmp,10); tmp=vsetq_lane_s8(((llr_t *)ext)[*pi4_p++],tmp,10);
tmp=vsetq_lane_s8(((llr_t*)ext)[*pi4_p++],tmp,11); tmp=vsetq_lane_s8(((llr_t *)ext)[*pi4_p++],tmp,11);
tmp=vsetq_lane_s8(((llr_t*)ext)[*pi4_p++],tmp,12); tmp=vsetq_lane_s8(((llr_t *)ext)[*pi4_p++],tmp,12);
tmp=vsetq_lane_s8(((llr_t*)ext)[*pi4_p++],tmp,13); tmp=vsetq_lane_s8(((llr_t *)ext)[*pi4_p++],tmp,13);
tmp=vsetq_lane_s8(((llr_t*)ext)[*pi4_p++],tmp,14); tmp=vsetq_lane_s8(((llr_t *)ext)[*pi4_p++],tmp,14);
((int8x16_t *)systematic2)[i]=vsetq_lane_s8(((llr_t*)ext)[*pi4_p++],tmp,15); ((int8x16_t *)systematic2)[i]=vsetq_lane_s8(((llr_t *)ext)[*pi4_p++],tmp,15);
#endif #endif
} }
if (intl1_stats) stop_meas(intl1_stats); if (intl1_stats) stop_meas(intl1_stats);
// do log_map from second parity bit // do log_map from second parity bit
log_map8(systematic2,yparity2,m11,m10,alpha,beta,ext2,n2,1,F,offset8_flag,alpha_stats,beta_stats,gamma_stats,ext_stats); log_map8(systematic2,yparity2,m11,m10,alpha,beta,ext2,n2,1,F,offset8_flag,alpha_stats,beta_stats,gamma_stats,ext_stats);
pi5_p=pi5tab8[iind]; pi5_p=pi5tab8[iind];
uint16_t decoded_bytes_interl[6144/16] __attribute__((aligned(16))); uint16_t decoded_bytes_interl[6144/16] __attribute__((aligned(16)));
...@@ -1148,7 +1067,7 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, ...@@ -1148,7 +1067,7 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
tmp=_mm_insert_epi8(tmp,ext2[*pi5_p++],14); tmp=_mm_insert_epi8(tmp,ext2[*pi5_p++],14);
tmp=_mm_insert_epi8(tmp,ext2[*pi5_p++],15); tmp=_mm_insert_epi8(tmp,ext2[*pi5_p++],15);
decoded_bytes_interl[i]=(uint16_t) _mm_movemask_epi8(_mm_cmpgt_epi8(tmp,zeros)); decoded_bytes_interl[i]=(uint16_t) _mm_movemask_epi8(_mm_cmpgt_epi8(tmp,zeros));
((__m128i *)systematic1)[i] = _mm_adds_epi8(_mm_subs_epi8(tmp,((__m128i*)ext)[i]),((__m128i *)systematic0)[i]); ((__m128i *)systematic1)[i] = _mm_adds_epi8(_mm_subs_epi8(tmp,((__m128i *)ext)[i]),((__m128i *)systematic0)[i]);
#elif defined(__arm__) #elif defined(__arm__)
tmp=vsetq_lane_s8(ext2[*pi5_p++],tmp,0); tmp=vsetq_lane_s8(ext2[*pi5_p++],tmp,0);
tmp=vsetq_lane_s8(ext2[*pi5_p++],tmp,1); tmp=vsetq_lane_s8(ext2[*pi5_p++],tmp,1);
...@@ -1166,13 +1085,12 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, ...@@ -1166,13 +1085,12 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
tmp=vsetq_lane_s8(ext2[*pi5_p++],tmp,13); tmp=vsetq_lane_s8(ext2[*pi5_p++],tmp,13);
tmp=vsetq_lane_s8(ext2[*pi5_p++],tmp,14); tmp=vsetq_lane_s8(ext2[*pi5_p++],tmp,14);
tmp=vsetq_lane_s8(ext2[*pi5_p++],tmp,15); tmp=vsetq_lane_s8(ext2[*pi5_p++],tmp,15);
uint64x2_t Mask= vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(vandq_u8(vcgtq_s8(tmp,zeros), Powers)))); uint64x2_t Mask= vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(vandq_u8(vcgtq_s8(tmp,zeros), Powers))));
vst1q_lane_u8(&((uint8_t*)&decoded_bytes[i])[0], (uint8x16_t)Mask, 0); vst1q_lane_u8(&((uint8_t *)&decoded_bytes[i])[0], (uint8x16_t)Mask, 0);
vst1q_lane_u8(&((uint8_t*)&decoded_bytes[i])[1], (uint8x16_t)Mask, 8); vst1q_lane_u8(&((uint8_t *)&decoded_bytes[i])[1], (uint8x16_t)Mask, 8);
((int8x16_t *)systematic1)[i] = vqaddq_s8(vqsubq_s8(tmp,((int8x16_t*)ext)[i]),((int8x16_t *)systematic0)[i]); ((int8x16_t *)systematic1)[i] = vqaddq_s8(vqsubq_s8(tmp,((int8x16_t *)ext)[i]),((int8x16_t *)systematic0)[i]);
#endif #endif
} }
} else { } else {
for (i=0; i<(n2>>4); i++) { for (i=0; i<(n2>>4); i++) {
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
...@@ -1193,8 +1111,7 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, ...@@ -1193,8 +1111,7 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
tmp=_mm_insert_epi8(tmp,ext2[*pi5_p++],14); tmp=_mm_insert_epi8(tmp,ext2[*pi5_p++],14);
tmp=_mm_insert_epi8(tmp,ext2[*pi5_p++],15); tmp=_mm_insert_epi8(tmp,ext2[*pi5_p++],15);
tmp128[i] = _mm_adds_epi8(((__m128i *)ext2)[i],((__m128i *)systematic2)[i]); tmp128[i] = _mm_adds_epi8(((__m128i *)ext2)[i],((__m128i *)systematic2)[i]);
((__m128i *)systematic1)[i] = _mm_adds_epi8(_mm_subs_epi8(tmp,((__m128i *)ext)[i]),((__m128i *)systematic0)[i]);
((__m128i *)systematic1)[i] = _mm_adds_epi8(_mm_subs_epi8(tmp,((__m128i*)ext)[i]),((__m128i *)systematic0)[i]);
#elif defined(__arm__) #elif defined(__arm__)
tmp=vsetq_lane_s8(ext2[*pi5_p++],tmp,0); tmp=vsetq_lane_s8(ext2[*pi5_p++],tmp,0);
tmp=vsetq_lane_s8(ext2[*pi5_p++],tmp,1); tmp=vsetq_lane_s8(ext2[*pi5_p++],tmp,1);
...@@ -1213,11 +1130,9 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, ...@@ -1213,11 +1130,9 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
tmp=vsetq_lane_s8(ext2[*pi5_p++],tmp,14); tmp=vsetq_lane_s8(ext2[*pi5_p++],tmp,14);
tmp=vsetq_lane_s8(ext2[*pi5_p++],tmp,15); tmp=vsetq_lane_s8(ext2[*pi5_p++],tmp,15);
tmp128[i] = vqaddq_s8(((int8x16_t *)ext2)[i],((int8x16_t *)systematic2)[i]); tmp128[i] = vqaddq_s8(((int8x16_t *)ext2)[i],((int8x16_t *)systematic2)[i]);
((int8x16_t *)systematic1)[i] = vqaddq_s8(vqsubq_s8(tmp,((int8x16_t *)ext)[i]),((int8x16_t *)systematic0)[i]);
((int8x16_t *)systematic1)[i] = vqaddq_s8(vqsubq_s8(tmp,((int8x16_t*)ext)[i]),((int8x16_t *)systematic0)[i]); #endif
}
#endif
}
} }
// Check if we decoded the block // Check if we decoded the block
...@@ -1225,11 +1140,10 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, ...@@ -1225,11 +1140,10 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
if (intl2_stats) start_meas(intl2_stats); if (intl2_stats) start_meas(intl2_stats);
if ((n2&0x7f) == 0) { // n2 is a multiple of 128 bits if ((n2&0x7f) == 0) { // n2 is a multiple of 128 bits
// re-order the decoded bits in theregular order // re-order the decoded bits in theregular order
// as it is presently ordered as 16 sequential columns // as it is presently ordered as 16 sequential columns
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
__m128i* dbytes=(__m128i*)decoded_bytes_interl; __m128i *dbytes=(__m128i *)decoded_bytes_interl;
__m128i shuffle=SHUFFLE16(7,6,5,4,3,2,1,0); __m128i shuffle=SHUFFLE16(7,6,5,4,3,2,1,0);
__m128i mask __attribute__((aligned(16))); __m128i mask __attribute__((aligned(16)));
int n_128=n2>>7; int n_128=n2>>7;
...@@ -1239,10 +1153,9 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, ...@@ -1239,10 +1153,9 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
__m128i tmp __attribute__((aligned(16))); __m128i tmp __attribute__((aligned(16)));
tmp=_mm_shuffle_epi8(dbytes[i],shuffle); tmp=_mm_shuffle_epi8(dbytes[i],shuffle);
__m128i tmp2 __attribute__((aligned(16))) ; __m128i tmp2 __attribute__((aligned(16))) ;
tmp2=_mm_and_si128(tmp,mask); tmp2=_mm_and_si128(tmp,mask);
tmp2=_mm_cmpeq_epi16(tmp2,mask); tmp2=_mm_cmpeq_epi16(tmp2,mask);
// printf("decoded_bytes %p\n",decoded_bytes); // printf("decoded_bytes %p\n",decoded_bytes);
decoded_bytes[n_128*0+i]=(uint8_t) _mm_movemask_epi8(_mm_packs_epi16(tmp2,zeros)); decoded_bytes[n_128*0+i]=(uint8_t) _mm_movemask_epi8(_mm_packs_epi16(tmp2,zeros));
int j; int j;
...@@ -1253,22 +1166,22 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, ...@@ -1253,22 +1166,22 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
decoded_bytes[n_128*j +i]=(uint8_t) _mm_movemask_epi8(_mm_packs_epi16(tmp2,zeros)); decoded_bytes[n_128*j +i]=(uint8_t) _mm_movemask_epi8(_mm_packs_epi16(tmp2,zeros));
} }
} }
#elif defined(__arm__) #elif defined(__arm__)
uint8x16_t* dbytes=(uint8x16_t*)decoded_bytes_interl; uint8x16_t *dbytes=(uint8x16_t *)decoded_bytes_interl;
uint16x8_t mask __attribute__((aligned(16))); uint16x8_t mask __attribute__((aligned(16)));
int n_128=n2>>7; int n_128=n2>>7;
for (i=0; i<n_128; i++) { for (i=0; i<n_128; i++) {
mask=vdupq_n_u16(1); mask=vdupq_n_u16(1);
uint8x16_t tmp __attribute__((aligned(16))); uint8x16_t tmp __attribute__((aligned(16)));
tmp=vcombine_u8(vrev64_u8(((uint8x8_t*)&dbytes[i])[1]),vrev64_u8(((uint8x8_t*)&dbytes[i])[0])); tmp=vcombine_u8(vrev64_u8(((uint8x8_t *)&dbytes[i])[1]),vrev64_u8(((uint8x8_t *)&dbytes[i])[0]));
vst1q_lane_u8(&decoded_bytes[n_128*0+i],(uint8x16_t)vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(vandq_u8(tmp, Powers)))),0); vst1q_lane_u8(&decoded_bytes[n_128*0+i],(uint8x16_t)vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(vandq_u8(tmp, Powers)))),0);
int j; int j;
for (j=1; j<16; j++) { for (j=1; j<16; j++) {
mask=vshlq_n_u16(mask,1); mask=vshlq_n_u16(mask,1);
vst1q_lane_u8(&decoded_bytes[n_128*0+i],(uint8x16_t)vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(vandq_u8(tmp, Powers)))),0); vst1q_lane_u8(&decoded_bytes[n_128*0+i],(uint8x16_t)vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(vandq_u8(tmp, Powers)))),0);
} }
} }
...@@ -1313,9 +1226,9 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, ...@@ -1313,9 +1226,9 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
tmp=vsetq_lane_s8(((llr_t *)tmp128)[*pi6_p++],tmp,10); tmp=vsetq_lane_s8(((llr_t *)tmp128)[*pi6_p++],tmp,10);
tmp=vsetq_lane_s8(((llr_t *)tmp128)[*pi6_p++],tmp,9); tmp=vsetq_lane_s8(((llr_t *)tmp128)[*pi6_p++],tmp,9);
tmp=vsetq_lane_s8(((llr_t *)tmp128)[*pi6_p++],tmp,8); tmp=vsetq_lane_s8(((llr_t *)tmp128)[*pi6_p++],tmp,8);
uint64x2_t Mask= vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(vandq_u8(vcgtq_s8(tmp,zeros), Powers)))); uint64x2_t Mask= vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(vandq_u8(vcgtq_s8(tmp,zeros), Powers))));
vst1q_lane_u8(&((uint8_t*)&decoded_bytes[i])[0], (uint8x16_t)Mask, 0); vst1q_lane_u8(&((uint8_t *)&decoded_bytes[i])[0], (uint8x16_t)Mask, 0);
vst1q_lane_u8(&((uint8_t*)&decoded_bytes[i])[1], (uint8x16_t)Mask, 8); vst1q_lane_u8(&((uint8_t *)&decoded_bytes[i])[1], (uint8x16_t)Mask, 8);
#endif #endif
} }
} }
...@@ -1324,41 +1237,40 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, ...@@ -1324,41 +1237,40 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
oldcrc= *((unsigned int *)(&decoded_bytes[(n>>3)-crc_len])); oldcrc= *((unsigned int *)(&decoded_bytes[(n>>3)-crc_len]));
switch (crc_type) { switch (crc_type) {
case CRC24_A:
case CRC24_A: oldcrc&=0x00ffffff;
oldcrc&=0x00ffffff; crc = crc24a(&decoded_bytes[F>>3],
crc = crc24a(&decoded_bytes[F>>3], n-24-F)>>8;
n-24-F)>>8; temp=((uint8_t *)&crc)[2];
temp=((uint8_t *)&crc)[2]; ((uint8_t *)&crc)[2] = ((uint8_t *)&crc)[0];
((uint8_t *)&crc)[2] = ((uint8_t *)&crc)[0]; ((uint8_t *)&crc)[0] = temp;
((uint8_t *)&crc)[0] = temp; break;
break;
case CRC24_B:
case CRC24_B: oldcrc&=0x00ffffff;
oldcrc&=0x00ffffff; crc = crc24b(decoded_bytes,
crc = crc24b(decoded_bytes, n-24)>>8;
n-24)>>8; temp=((uint8_t *)&crc)[2];
temp=((uint8_t *)&crc)[2]; ((uint8_t *)&crc)[2] = ((uint8_t *)&crc)[0];
((uint8_t *)&crc)[2] = ((uint8_t *)&crc)[0]; ((uint8_t *)&crc)[0] = temp;
((uint8_t *)&crc)[0] = temp; break;
break;
case CRC16:
case CRC16: oldcrc&=0x0000ffff;
oldcrc&=0x0000ffff; crc = crc16(decoded_bytes,
crc = crc16(decoded_bytes, n-16)>>16;
n-16)>>16; break;
break;
case CRC8:
case CRC8: oldcrc&=0x000000ff;
oldcrc&=0x000000ff; crc = crc8(decoded_bytes,
crc = crc8(decoded_bytes, n-8)>>24;
n-8)>>24; break;
break;
default:
default: printf("FATAL: 3gpplte_turbo_decoder_sse.c: Unknown CRC\n");
printf("FATAL: 3gpplte_turbo_decoder_sse.c: Unknown CRC\n"); return(255);
return(255); break;
break;
} }
if (intl2_stats) stop_meas(intl2_stats); if (intl2_stats) stop_meas(intl2_stats);
...@@ -1372,13 +1284,13 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, ...@@ -1372,13 +1284,13 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
if (iteration_cnt < max_iterations) { if (iteration_cnt < max_iterations) {
log_map8(systematic1,yparity1,m11,m10,alpha,beta,ext,n2,0,F,offset8_flag,alpha_stats,beta_stats,gamma_stats,ext_stats); log_map8(systematic1,yparity1,m11,m10,alpha,beta,ext,n2,0,F,offset8_flag,alpha_stats,beta_stats,gamma_stats,ext_stats);
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
__m128i* ext_128=(__m128i*) ext; __m128i *ext_128=(__m128i *) ext;
__m128i* s1_128=(__m128i*) systematic1; __m128i *s1_128=(__m128i *) systematic1;
__m128i* s0_128=(__m128i*) systematic0; __m128i *s0_128=(__m128i *) systematic0;
#elif defined(__arm__) #elif defined(__arm__)
int8x16_t* ext_128=(int8x16_t*) ext; int8x16_t *ext_128=(int8x16_t *) ext;
int8x16_t* s1_128=(int8x16_t*) systematic1; int8x16_t *s1_128=(int8x16_t *) systematic1;
int8x16_t* s0_128=(int8x16_t*) systematic0; int8x16_t *s0_128=(int8x16_t *) systematic0;
#endif #endif
int myloop=n2>>4; int myloop=n2>>4;
...@@ -1394,5 +1306,4 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, ...@@ -1394,5 +1306,4 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
} }
return(iteration_cnt); return(iteration_cnt);
} }
...@@ -47,27 +47,20 @@ void ...@@ -47,27 +47,20 @@ void
ccodedot11_encode (unsigned int numbytes, ccodedot11_encode (unsigned int numbytes,
unsigned char *inPtr, unsigned char *inPtr,
unsigned char *outPtr, unsigned char *outPtr,
unsigned char puncturing) unsigned char puncturing) {
{
unsigned int state; unsigned int state;
unsigned char c, out, shiftbit =0; unsigned char c, out, shiftbit =0;
// printf("In ccodedot11_encode (%d,%p,%p,%d)\n",numbytes,inPtr,outPtr,puncturing); // printf("In ccodedot11_encode (%d,%p,%p,%d)\n",numbytes,inPtr,outPtr,puncturing);
#ifdef DEBUG_CCODE #ifdef DEBUG_CCODE
unsigned int dummy; unsigned int dummy;
#endif //DEBUG_CCODE #endif //DEBUG_CCODE
int bit_index; int bit_index;
/* The input bit is shifted in position 8 of the state. /* The input bit is shifted in position 8 of the state.
Shiftbit will take values between 1 and 8 */ Shiftbit will take values between 1 and 8 */
state = 0; state = 0;
#ifdef DEBUG_CCODE #ifdef DEBUG_CCODE
dummy = 0; dummy = 0;
#endif //DEBUG_CCODE #endif //DEBUG_CCODE
/* Do not increment inPtr until we read the next octet */ /* Do not increment inPtr until we read the next octet */
bit_index=0; bit_index=0;
...@@ -78,85 +71,75 @@ ccodedot11_encode (unsigned int numbytes, ...@@ -78,85 +71,75 @@ ccodedot11_encode (unsigned int numbytes,
#endif //DEBUG_CCODE #endif //DEBUG_CCODE
switch (puncturing) { switch (puncturing) {
case 0: //rate 1/2 case 0: //rate 1/2
for (shiftbit = 0; shiftbit<8; shiftbit++) { for (shiftbit = 0; shiftbit<8; shiftbit++) {
state >>= 1;
state >>= 1;
if ((c&(1<<shiftbit)) != 0) {
state |= 64;
}
out = ccodedot11_table[state]; if ((c&(1<<shiftbit)) != 0) {
state |= 64;
*outPtr++ = out & 1; }
*outPtr++ = (out>>1)&1;
out = ccodedot11_table[state];
*outPtr++ = out & 1;
*outPtr++ = (out>>1)&1;
#ifdef DEBUG_CCODE #ifdef DEBUG_CCODE
printf("%d: %d -> %d (%d)\n",dummy,state,out,ccodedot11_table[state]); printf("%u: %u -> %d (%u)\n",dummy,state,out,ccodedot11_table[state]);
dummy+=2; dummy+=2;
#endif //DEBUG_CCODE #endif //DEBUG_CCODE
}
} break;
break;
case 1: // rate 3/4
for (shiftbit = 0; shiftbit<8; shiftbit++) {
state >>= 1; case 1: // rate 3/4
for (shiftbit = 0; shiftbit<8; shiftbit++) {
state >>= 1;
if ((c&(1<<shiftbit)) != 0) { if ((c&(1<<shiftbit)) != 0) {
state |= 64; state |= 64;
} }
out = ccodedot11_table[state]; out = ccodedot11_table[state];
if (bit_index<2) if (bit_index<2)
*outPtr++ = out & 1; *outPtr++ = out & 1;
if (bit_index!=1) if (bit_index!=1)
*outPtr++ = (out>>1)&1; *outPtr++ = (out>>1)&1;
#ifdef DEBUG_CCODE #ifdef DEBUG_CCODE
printf("%d: %d -> %d (%d)\n",dummy,state,out,ccodedot11_table[state]); printf("%u: %u -> %d (%u)\n",dummy,state,out,ccodedot11_table[state]);
dummy+=2; dummy+=2;
#endif //DEBUG_CCODE #endif //DEBUG_CCODE
bit_index=(bit_index==2)?0:(bit_index+1);
}
bit_index=(bit_index==2)?0:(bit_index+1); break;
}
break;
case 2: // rate 2/3
for (shiftbit = 0; shiftbit<8; shiftbit++) {
state >>= 1;
if ((c&(1<<shiftbit)) != 0) { case 2: // rate 2/3
state |= 64; for (shiftbit = 0; shiftbit<8; shiftbit++) {
} state >>= 1;
out = ccodedot11_table[state]; if ((c&(1<<shiftbit)) != 0) {
state |= 64;
}
*outPtr++ = out & 1; out = ccodedot11_table[state];
*outPtr++ = out & 1;
if (bit_index==0) if (bit_index==0)
*outPtr++ = (out>>1)&1; *outPtr++ = (out>>1)&1;
#ifdef DEBUG_CCODE #ifdef DEBUG_CCODE
printf("%d: %d -> %d (%d)\n",dummy,state,out,ccodedot11_table[state]); printf("%d: %u -> %d (%u)\n",dummy,state,out,ccodedot11_table[state]);
dummy+=2; dummy+=2;
#endif //DEBUG_CCODE #endif //DEBUG_CCODE
bit_index=(bit_index==0)?1:0;
}
bit_index=(bit_index==0)?1:0; break;
}
break;
default: default:
break; break;
} }
} }
...@@ -181,8 +164,6 @@ ccodedot11_encode (unsigned int numbytes, ...@@ -181,8 +164,6 @@ ccodedot11_encode (unsigned int numbytes,
} }
*/ */
} }
...@@ -197,8 +178,7 @@ ccodedot11_encode (unsigned int numbytes, ...@@ -197,8 +178,7 @@ ccodedot11_encode (unsigned int numbytes,
/* Basic code table initialization for constraint length 7 */ /* Basic code table initialization for constraint length 7 */
/* Input in MSB, followed by state in 6 LSBs */ /* Input in MSB, followed by state in 6 LSBs */
void ccodedot11_init(void) void ccodedot11_init(void) {
{
unsigned int i, j, k, sum; unsigned int i, j, k, sum;
for (i = 0; i < 128; i++) { for (i = 0; i < 128; i++) {
...@@ -219,8 +199,7 @@ void ccodedot11_init(void) ...@@ -219,8 +199,7 @@ void ccodedot11_init(void)
} }
/* Input in LSB, followed by state in 6 MSBs */ /* Input in LSB, followed by state in 6 MSBs */
void ccodedot11_init_inv(void) void ccodedot11_init_inv(void) {
{
unsigned int i, j, k, sum; unsigned int i, j, k, sum;
for (i = 0; i < 128; i++) { for (i = 0; i < 128; i++) {
...@@ -251,21 +230,15 @@ void ccodedot11_init_inv(void) ...@@ -251,21 +230,15 @@ void ccodedot11_init_inv(void)
#ifdef DEBUG_CCODE #ifdef DEBUG_CCODE
#include <stdio.h> #include <stdio.h>
main() main() {
{
unsigned char test[] = "0Thebigredfox"; unsigned char test[] = "0Thebigredfox";
unsigned char output[512], *inPtr, *outPtr; unsigned char output[512], *inPtr, *outPtr;
unsigned int i; unsigned int i;
test[0] = 128; test[0] = 128;
test[1] = 0; test[1] = 0;
ccodedot11_init(); ccodedot11_init();
inPtr = test; inPtr = test;
outPtr = output; outPtr = output;
ccodedot11_encode(16, inPtr, outPtr,0); ccodedot11_encode(16, inPtr, outPtr,0);
for (i = 0; i < 32; i++) printf("%x ", output[i]); for (i = 0; i < 32; i++) printf("%x ", output[i]);
......
...@@ -52,20 +52,16 @@ ccodelte_encode (int32_t numbits, ...@@ -52,20 +52,16 @@ ccodelte_encode (int32_t numbits,
uint8_t add_crc, uint8_t add_crc,
uint8_t *inPtr, uint8_t *inPtr,
uint8_t *outPtr, uint8_t *outPtr,
uint16_t rnti) uint16_t rnti) {
{
uint32_t state; uint32_t state;
uint8_t c, out, first_bit; uint8_t c, out, first_bit;
int8_t shiftbit=0; int8_t shiftbit=0;
uint16_t c16; uint16_t c16;
uint16_t next_last_byte=0; uint16_t next_last_byte=0;
uint32_t crc=0; uint32_t crc=0;
#ifdef DEBUG_CCODE #ifdef DEBUG_CCODE
uint32_t dummy=0; uint32_t dummy=0;
#endif //DEBUG_CCODE #endif //DEBUG_CCODE
/* The input bit is shifted in position 8 of the state. /* The input bit is shifted in position 8 of the state.
Shiftbit will take values between 1 and 8 */ Shiftbit will take values between 1 and 8 */
state = 0; state = 0;
...@@ -137,17 +133,12 @@ ccodelte_encode (int32_t numbits, ...@@ -137,17 +133,12 @@ ccodelte_encode (int32_t numbits,
#endif //DEBUG_CCODE #endif //DEBUG_CCODE
/* Do not increment inPtr until we read the next octet */ /* Do not increment inPtr until we read the next octet */
while (numbits > 0) { while (numbits > 0) {
c = *inPtr++; c = *inPtr++;
#ifdef DEBUG_CCODE #ifdef DEBUG_CCODE
printf("** %x **\n",c); printf("** %x **\n",c);
#endif //DEBUG_CCODE #endif //DEBUG_CCODE
// for (shiftbit = 0; (shiftbit<8) && (numbits>0);shiftbit++,numbits--) { // for (shiftbit = 0; (shiftbit<8) && (numbits>0);shiftbit++,numbits--) {
for (shiftbit = 7; (shiftbit>=0) && (numbits>0); shiftbit--,numbits--) { for (shiftbit = 7; (shiftbit>=0) && (numbits>0); shiftbit--,numbits--) {
state >>= 1; state >>= 1;
...@@ -157,23 +148,18 @@ ccodelte_encode (int32_t numbits, ...@@ -157,23 +148,18 @@ ccodelte_encode (int32_t numbits,
} }
out = ccodelte_table[state]; out = ccodelte_table[state];
*outPtr++ = out & 1; *outPtr++ = out & 1;
*outPtr++ = (out>>1)&1; *outPtr++ = (out>>1)&1;
*outPtr++ = (out>>2)&1; *outPtr++ = (out>>2)&1;
#ifdef DEBUG_CCODE #ifdef DEBUG_CCODE
printf("numbits %d, input %d, outbit %d: %d -> %d (%d%d%d)\n",numbits,state>>6,dummy,state,out,out&1,(out>>1)&1,(out>>2)&1); printf("numbits %d, input %d, outbit %d: %d -> %d (%d%d%d)\n",numbits,state>>6,dummy,state,out,out&1,(out>>1)&1,(out>>2)&1);
dummy+=3; dummy+=3;
#endif //DEBUG_CCODE #endif //DEBUG_CCODE
} }
} }
// now code 8-bit CRC for UCI // now code 8-bit CRC for UCI
if (add_crc == 1) { if (add_crc == 1) {
c = (uint8_t)(crc>>24); c = (uint8_t)(crc>>24);
// for (shiftbit = 0; (shiftbit<8);shiftbit++) { // for (shiftbit = 0; (shiftbit<8);shiftbit++) {
...@@ -185,22 +171,18 @@ ccodelte_encode (int32_t numbits, ...@@ -185,22 +171,18 @@ ccodelte_encode (int32_t numbits,
} }
out = ccodelte_table[state]; out = ccodelte_table[state];
*outPtr++ = out & 1; *outPtr++ = out & 1;
*outPtr++ = (out>>1)&1; *outPtr++ = (out>>1)&1;
*outPtr++ = (out>>2)&1; *outPtr++ = (out>>2)&1;
#ifdef DEBUG_CCODE #ifdef DEBUG_CCODE
printf("crc bit %d input %d, outbit %d: %d -> %d (%d)\n",shiftbit,state>>6,dummy,state,out,ccodelte_table[state]); printf("crc bit %d input %d, outbit %d: %d -> %d (%u)\n",shiftbit,state>>6,dummy,state,out,ccodelte_table[state]);
dummy+=3; dummy+=3;
#endif //DEBUG_CCODE #endif //DEBUG_CCODE
} }
} }
// now code 16-bit CRC for DCI // now code 16-bit CRC for DCI
if (add_crc == 2) { if (add_crc == 2) {
c16 = (uint16_t)(crc>>16); c16 = (uint16_t)(crc>>16);
// for (shiftbit = 0; (shiftbit<16);shiftbit++) { // for (shiftbit = 0; (shiftbit<16);shiftbit++) {
...@@ -212,16 +194,13 @@ ccodelte_encode (int32_t numbits, ...@@ -212,16 +194,13 @@ ccodelte_encode (int32_t numbits,
} }
out = ccodelte_table[state]; out = ccodelte_table[state];
*outPtr++ = out & 1; *outPtr++ = out & 1;
*outPtr++ = (out>>1)&1; *outPtr++ = (out>>1)&1;
*outPtr++ = (out>>2)&1; *outPtr++ = (out>>2)&1;
#ifdef DEBUG_CCODE #ifdef DEBUG_CCODE
printf("crc bit %d input %d, outbit %d: %d -> %d (%d)\n",shiftbit,state>>6,dummy,state,out,ccodelte_table[state]); printf("crc bit %d input %d, outbit %d: %d -> %d (%u)\n",shiftbit,state>>6,dummy,state,out,ccodelte_table[state]);
dummy+=3; dummy+=3;
#endif //DEBUG_CCODE #endif //DEBUG_CCODE
} }
} }
} }
...@@ -238,8 +217,7 @@ ccodelte_encode (int32_t numbits, ...@@ -238,8 +217,7 @@ ccodelte_encode (int32_t numbits,
/* Basic code table initialization for constraint length 7 */ /* Basic code table initialization for constraint length 7 */
/* Input in MSB, followed by state in 6 LSBs */ /* Input in MSB, followed by state in 6 LSBs */
void ccodelte_init(void) void ccodelte_init(void) {
{
unsigned int i, j, k, sum; unsigned int i, j, k, sum;
for (i = 0; i < 128; i++) { for (i = 0; i < 128; i++) {
...@@ -260,8 +238,7 @@ void ccodelte_init(void) ...@@ -260,8 +238,7 @@ void ccodelte_init(void)
} }
/* Input in LSB, followed by state in 6 MSBs */ /* Input in LSB, followed by state in 6 MSBs */
void ccodelte_init_inv(void) void ccodelte_init_inv(void) {
{
unsigned int i, j, k, sum; unsigned int i, j, k, sum;
for (i = 0; i < 128; i++) { for (i = 0; i < 128; i++) {
...@@ -281,8 +258,7 @@ void ccodelte_init_inv(void) ...@@ -281,8 +258,7 @@ void ccodelte_init_inv(void)
} }
} }
void ccodedab_init(void) void ccodedab_init(void) {
{
unsigned int i, j, k, sum; unsigned int i, j, k, sum;
for (i = 0; i < 128; i++) { for (i = 0; i < 128; i++) {
...@@ -303,8 +279,7 @@ void ccodedab_init(void) ...@@ -303,8 +279,7 @@ void ccodedab_init(void)
} }
/* Input in LSB, followed by state in 6 MSBs */ /* Input in LSB, followed by state in 6 MSBs */
void ccodedab_init_inv(void) void ccodedab_init_inv(void) {
{
unsigned int i, j, k, sum; unsigned int i, j, k, sum;
for (i = 0; i < 128; i++) { for (i = 0; i < 128; i++) {
...@@ -334,21 +309,15 @@ void ccodedab_init_inv(void) ...@@ -334,21 +309,15 @@ void ccodedab_init_inv(void)
#ifdef CCODE_MAIN #ifdef CCODE_MAIN
#include <stdio.h> #include <stdio.h>
main() main() {
{
unsigned char test[] = "Thebigredfox"; unsigned char test[] = "Thebigredfox";
unsigned char output[512], *inPtr, *outPtr; unsigned char output[512], *inPtr, *outPtr;
unsigned int i; unsigned int i;
test[0] = 128; test[0] = 128;
test[1] = 0; test[1] = 0;
ccodelte_init(); ccodelte_init();
inPtr = test; inPtr = test;
outPtr = output; outPtr = output;
ccodelte_encode(21, inPtr, outPtr); ccodelte_encode(21, inPtr, outPtr);
for (i = 0; i < 21*3; i++) printf("%x ", output[i]); for (i = 0; i < 21*3; i++) printf("%x ", output[i]);
......
...@@ -25,8 +25,8 @@ ...@@ -25,8 +25,8 @@
date: 21.10.2009 date: 21.10.2009
*/ */
#ifdef MAIN #ifdef MAIN
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#endif #endif
#include "PHY/defs_eNB.h" #include "PHY/defs_eNB.h"
#include "PHY/LTE_TRANSPORT/transport_common.h" #include "PHY/LTE_TRANSPORT/transport_common.h"
...@@ -42,9 +42,7 @@ static uint32_t bitrev_cc[32] = {1,17,9,25,5,21,13,29,3,19,11,27,7,23,15,31,0,16 ...@@ -42,9 +42,7 @@ static uint32_t bitrev_cc[32] = {1,17,9,25,5,21,13,29,3,19,11,27,7,23,15,31,0,16
//#define RM_DEBUG2 1 //#define RM_DEBUG2 1
//#define RM_DEBUG_CC 1 //#define RM_DEBUG_CC 1
uint32_t sub_block_interleaving_turbo(uint32_t D, uint8_t *d,uint8_t *w) uint32_t sub_block_interleaving_turbo(uint32_t D, uint8_t *d,uint8_t *w) {
{
uint32_t RTC = (D>>5), ND, ND3; uint32_t RTC = (D>>5), ND, ND3;
uint32_t row,col,Kpi; uint32_t row,col,Kpi;
uint32_t index3,k,k2; uint32_t index3,k,k2;
...@@ -64,7 +62,6 @@ uint32_t sub_block_interleaving_turbo(uint32_t D, uint8_t *d,uint8_t *w) ...@@ -64,7 +62,6 @@ uint32_t sub_block_interleaving_turbo(uint32_t D, uint8_t *d,uint8_t *w)
printf("RTC = %d, Kpi=%d, ND=%d\n",RTC,Kpi,ND); printf("RTC = %d, Kpi=%d, ND=%d\n",RTC,Kpi,ND);
#endif #endif
ND3 = ND*3; ND3 = ND*3;
// copy d02 to dD2 (for mod Kpi operation from clause (4), p.16 of 36.212 // copy d02 to dD2 (for mod Kpi operation from clause (4), p.16 of 36.212
d[(3*D)+2] = d[2]; d[(3*D)+2] = d[2];
k=0; k=0;
...@@ -80,12 +77,9 @@ uint32_t sub_block_interleaving_turbo(uint32_t D, uint8_t *d,uint8_t *w) ...@@ -80,12 +77,9 @@ uint32_t sub_block_interleaving_turbo(uint32_t D, uint8_t *d,uint8_t *w)
index3 = bitrev_x3[col];//3*index; index3 = bitrev_x3[col];//3*index;
for (row=0; row<RTC; row++) { for (row=0; row<RTC; row++) {
w[k] = d1[index3];//d[index3-ND3]; w[k] = d1[index3];//d[index3-ND3];
w[Kpi+k2] = d2[index3];//d[index3-ND3+1]; w[Kpi+k2] = d2[index3];//d[index3-ND3+1];
w[Kpi+1+k2] = d3[index3];//d[index3-ND3+5]; w[Kpi+1+k2] = d3[index3];//d[index3-ND3+5];
#ifdef RM_DEBUG #ifdef RM_DEBUG
printf("row %d, index %d, index-Nd %d index-Nd+1 %d (k,Kpi+2k,Kpi+2k+1) (%d,%d,%d) w(%d,%d,%d)\n",row,index,index-ND,((index+1)%Kpi)-ND,k,Kpi+(k<<1),Kpi+(k<<1)+1,w[k],w[Kpi+(k<<1)],w[Kpi+1+(k<<1)]); printf("row %d, index %d, index-Nd %d index-Nd+1 %d (k,Kpi+2k,Kpi+2k+1) (%d,%d,%d) w(%d,%d,%d)\n",row,index,index-ND,((index+1)%Kpi)-ND,k,Kpi+(k<<1),Kpi+(k<<1)+1,w[k],w[Kpi+(k<<1)],w[Kpi+1+(k<<1)]);
...@@ -100,7 +94,8 @@ uint32_t sub_block_interleaving_turbo(uint32_t D, uint8_t *d,uint8_t *w) ...@@ -100,7 +94,8 @@ uint32_t sub_block_interleaving_turbo(uint32_t D, uint8_t *d,uint8_t *w)
#endif #endif
index3+=96; index3+=96;
k++;k2+=2; k++;
k2+=2;
} }
} }
...@@ -120,9 +115,7 @@ uint32_t sub_block_interleaving_turbo(uint32_t D, uint8_t *d,uint8_t *w) ...@@ -120,9 +115,7 @@ uint32_t sub_block_interleaving_turbo(uint32_t D, uint8_t *d,uint8_t *w)
} }
uint32_t sub_block_interleaving_cc(uint32_t D, uint8_t *d,uint8_t *w) uint32_t sub_block_interleaving_cc(uint32_t D, uint8_t *d,uint8_t *w) {
{
uint32_t RCC = (D>>5), ND, ND3; uint32_t RCC = (D>>5), ND, ND3;
uint32_t row,col,Kpi,index; uint32_t row,col,Kpi,index;
uint32_t index3,k; uint32_t index3,k;
...@@ -141,7 +134,6 @@ uint32_t sub_block_interleaving_cc(uint32_t D, uint8_t *d,uint8_t *w) ...@@ -141,7 +134,6 @@ uint32_t sub_block_interleaving_cc(uint32_t D, uint8_t *d,uint8_t *w)
printf("RCC = %d, Kpi=%d, ND=%d\n",RCC,Kpi,ND); printf("RCC = %d, Kpi=%d, ND=%d\n",RCC,Kpi,ND);
#endif #endif
ND3 = ND*3; ND3 = ND*3;
k=0; k=0;
for (col=0; col<32; col++) { for (col=0; col<32; col++) {
...@@ -180,9 +172,7 @@ uint32_t sub_block_interleaving_cc(uint32_t D, uint8_t *d,uint8_t *w) ...@@ -180,9 +172,7 @@ uint32_t sub_block_interleaving_cc(uint32_t D, uint8_t *d,uint8_t *w)
return(RCC); return(RCC);
} }
void sub_block_deinterleaving_turbo(uint32_t D,int16_t *d,int16_t *w) void sub_block_deinterleaving_turbo(uint32_t D,int16_t *d,int16_t *w) {
{
uint32_t RTC = (D>>5), ND, ND3; uint32_t RTC = (D>>5), ND, ND3;
uint32_t row,col,Kpi,index; uint32_t row,col,Kpi,index;
uint32_t index3,k,k2; uint32_t index3,k,k2;
...@@ -199,7 +189,6 @@ void sub_block_deinterleaving_turbo(uint32_t D,int16_t *d,int16_t *w) ...@@ -199,7 +189,6 @@ void sub_block_deinterleaving_turbo(uint32_t D,int16_t *d,int16_t *w)
printf("RTC = %d, Kpi=%d, ND=%d\n",RTC,Kpi,ND); printf("RTC = %d, Kpi=%d, ND=%d\n",RTC,Kpi,ND);
#endif #endif
ND3 = ND*3; ND3 = ND*3;
// copy d02 to dD2 (for mod Kpi operation from clause (4), p.16 of 36.212 // copy d02 to dD2 (for mod Kpi operation from clause (4), p.16 of 36.212
k=0; k=0;
k2=0; k2=0;
...@@ -215,7 +204,6 @@ void sub_block_deinterleaving_turbo(uint32_t D,int16_t *d,int16_t *w) ...@@ -215,7 +204,6 @@ void sub_block_deinterleaving_turbo(uint32_t D,int16_t *d,int16_t *w)
index3 = bitrev_x3[col];//3*index; index3 = bitrev_x3[col];//3*index;
for (row=0; row<RTC; row++) { for (row=0; row<RTC; row++) {
d1[index3] = w[k]; d1[index3] = w[k];
d2[index3] = w[Kpi+k2]; d2[index3] = w[Kpi+k2];
d3[index3] = w[Kpi+1+k2]; d3[index3] = w[Kpi+1+k2];
...@@ -229,12 +217,9 @@ void sub_block_deinterleaving_turbo(uint32_t D,int16_t *d,int16_t *w) ...@@ -229,12 +217,9 @@ void sub_block_deinterleaving_turbo(uint32_t D,int16_t *d,int16_t *w)
// if (ND>0) // if (ND>0)
// d[2] = LTE_NULL;//d[(3*D)+2]; // d[2] = LTE_NULL;//d[(3*D)+2];
} }
void sub_block_deinterleaving_cc(uint32_t D,int8_t *d,int8_t *w) void sub_block_deinterleaving_cc(uint32_t D,int8_t *d,int8_t *w) {
{
//WANG_Hao uint32_t RCC = (D>>5), ND, ND3; //WANG_Hao uint32_t RCC = (D>>5), ND, ND3;
uint32_t RCC = (D>>5); uint32_t RCC = (D>>5);
ptrdiff_t ND, ND3; ptrdiff_t ND, ND3;
...@@ -251,10 +236,9 @@ void sub_block_deinterleaving_cc(uint32_t D,int8_t *d,int8_t *w) ...@@ -251,10 +236,9 @@ void sub_block_deinterleaving_cc(uint32_t D,int8_t *d,int8_t *w)
ND = Kpi - D; ND = Kpi - D;
#ifdef RM_DEBUG2 #ifdef RM_DEBUG2
printf("sub_block_interleaving_cc : D = %d (%d), d %p, w %p\n",D,D*3,d,w); printf("sub_block_interleaving_cc : D = %d (%d), d %p, w %p\n",D,D*3,d,w);
printf("RCC = %d, Kpi=%d, ND=%d\n",RCC,Kpi,ND); printf("RCC = %d, Kpi=%d, ND=%ld\n",RCC,Kpi,ND);
#endif #endif
ND3 = ND*3; ND3 = ND*3;
k=0; k=0;
for (col=0; col<32; col++) { for (col=0; col<32; col++) {
...@@ -265,24 +249,20 @@ void sub_block_deinterleaving_cc(uint32_t D,int8_t *d,int8_t *w) ...@@ -265,24 +249,20 @@ void sub_block_deinterleaving_cc(uint32_t D,int8_t *d,int8_t *w)
index3 = 3*index; index3 = 3*index;
for (row=0; row<RCC; row++) { for (row=0; row<RCC; row++) {
d[index3-ND3] = w[k]; d[index3-ND3] = w[k];
d[index3-ND3+1] = w[Kpi+k]; d[index3-ND3+1] = w[Kpi+k];
d[index3-ND3+2] = w[(Kpi<<1)+k]; d[index3-ND3+2] = w[(Kpi<<1)+k];
#ifdef RM_DEBUG2 #ifdef RM_DEBUG2
printf("row %d, index %d k %d index3-ND3 %d w(%d,%d,%d)\n",row,index,k,index3-ND3,w[k],w[Kpi+k],w[(Kpi<<1)+k]); printf("row %d, index %d k %d index3-ND3 %ld w(%d,%d,%d)\n",row,index,k,index3-ND3,w[k],w[Kpi+k],w[(Kpi<<1)+k]);
#endif #endif
index3+=96; index3+=96;
index+=32; index+=32;
k++; k++;
} }
} }
} }
uint32_t generate_dummy_w(uint32_t D, uint8_t *w,uint8_t F) uint32_t generate_dummy_w(uint32_t D, uint8_t *w,uint8_t F) {
{
uint32_t RTC = (D>>5), ND; uint32_t RTC = (D>>5), ND;
uint32_t col,Kpi,index; uint32_t col,Kpi,index;
int32_t k,k2; int32_t k,k2;
...@@ -301,8 +281,6 @@ uint32_t generate_dummy_w(uint32_t D, uint8_t *w,uint8_t F) ...@@ -301,8 +281,6 @@ uint32_t generate_dummy_w(uint32_t D, uint8_t *w,uint8_t F)
printf("dummy sub_block_interleaving_turbo : D = %d (%d)\n",D,D*3); printf("dummy sub_block_interleaving_turbo : D = %d (%d)\n",D,D*3);
printf("RTC = %d, Kpi=%d, ND=%d, F=%d (Nulled %d)\n",RTC,Kpi,ND,F,(2*F + 3*ND)); printf("RTC = %d, Kpi=%d, ND=%d, F=%d (Nulled %d)\n",RTC,Kpi,ND,F,(2*F + 3*ND));
#endif #endif
k=0; k=0;
k2=0; k2=0;
wKpi = &w[Kpi]; wKpi = &w[Kpi];
...@@ -371,9 +349,7 @@ uint32_t generate_dummy_w(uint32_t D, uint8_t *w,uint8_t F) ...@@ -371,9 +349,7 @@ uint32_t generate_dummy_w(uint32_t D, uint8_t *w,uint8_t F)
return(RTC); return(RTC);
} }
uint32_t generate_dummy_w_cc(uint32_t D, uint8_t *w) uint32_t generate_dummy_w_cc(uint32_t D, uint8_t *w) {
{
uint32_t RCC = (D>>5), ND; uint32_t RCC = (D>>5), ND;
uint32_t col,Kpi,index; uint32_t col,Kpi,index;
int32_t k; int32_t k;
...@@ -392,7 +368,6 @@ uint32_t generate_dummy_w_cc(uint32_t D, uint8_t *w) ...@@ -392,7 +368,6 @@ uint32_t generate_dummy_w_cc(uint32_t D, uint8_t *w)
printf("RCC = %d, Kpi=%d, ND=%d, (Nulled %d)\n",RCC,Kpi,ND,3*ND); printf("RCC = %d, Kpi=%d, ND=%d, (Nulled %d)\n",RCC,Kpi,ND,3*ND);
#endif #endif
// ND3 = ND*3; // ND3 = ND*3;
// copy d02 to dD2 (for mod Kpi operation from clause (4), p.16 of 36.212 // copy d02 to dD2 (for mod Kpi operation from clause (4), p.16 of 36.212
k=0; k=0;
...@@ -466,8 +441,6 @@ uint32_t lte_rate_matching_turbo(uint32_t RTC, ...@@ -466,8 +441,6 @@ uint32_t lte_rate_matching_turbo(uint32_t RTC,
uint8_t nb_rb) uint8_t nb_rb)
// uint8_t m) // uint8_t m)
{ {
uint32_t Nir,Ncb,Gp,GpmodC,E,Ncbmod,ind,k; uint32_t Nir,Ncb,Gp,GpmodC,E,Ncbmod,ind,k;
// int cnt=0; // int cnt=0;
uint8_t *e2; uint8_t *e2;
...@@ -487,11 +460,11 @@ uint32_t lte_rate_matching_turbo(uint32_t RTC, ...@@ -487,11 +460,11 @@ uint32_t lte_rate_matching_turbo(uint32_t RTC,
if (Mdlharq>0) { // Downlink if (Mdlharq>0) { // Downlink
Nir = Nsoft/Kmimo/cmin(8,Mdlharq); Nir = Nsoft/Kmimo/cmin(8,Mdlharq);
Ncb = cmin(Nir/C,3*(RTC<<5)); Ncb = cmin(Nir/C,3*(RTC<<5));
} } else { // Uplink
else { // Uplink
Nir=0; Nir=0;
Ncb = 3*(RTC<<5); // Kw Ncb = 3*(RTC<<5); // Kw
} }
#ifdef RM_DEBUG_TX #ifdef RM_DEBUG_TX
if (rvidx==0 && r==0) { if (rvidx==0 && r==0) {
...@@ -518,7 +491,6 @@ uint32_t lte_rate_matching_turbo(uint32_t RTC, ...@@ -518,7 +491,6 @@ uint32_t lte_rate_matching_turbo(uint32_t RTC,
AssertFatal(Qm>0,"Qm is 0\n"); AssertFatal(Qm>0,"Qm is 0\n");
Gp = G/Nl/Qm; Gp = G/Nl/Qm;
GpmodC = Gp%C; GpmodC = Gp%C;
#ifdef RM_DEBUG #ifdef RM_DEBUG
printf("lte_rate_matching_turbo: Ncb %d, Kw %d, Nir/C %d, rvidx %d, G %d, Qm %d, Nl%d, r %d\n",Ncb,3*(RTC<<5),Nir/C,rvidx, G, Qm,Nl,r); printf("lte_rate_matching_turbo: Ncb %d, Kw %d, Nir/C %d, rvidx %d, G %d, Qm %d, Nl%d, r %d\n",Ncb,3*(RTC<<5),Nir/C,rvidx, G, Qm,Nl,r);
#endif #endif
...@@ -529,16 +501,12 @@ uint32_t lte_rate_matching_turbo(uint32_t RTC, ...@@ -529,16 +501,12 @@ uint32_t lte_rate_matching_turbo(uint32_t RTC,
E = Nl*Qm * ((GpmodC==0?0:1) + (Gp/C)); E = Nl*Qm * ((GpmodC==0?0:1) + (Gp/C));
Ncbmod = Ncb%(RTC<<3); Ncbmod = Ncb%(RTC<<3);
ind = RTC * (2+(rvidx*(((Ncbmod==0)?0:1) + (Ncb/(RTC<<3)))*2)); ind = RTC * (2+(rvidx*(((Ncbmod==0)?0:1) + (Ncb/(RTC<<3)))*2));
#ifdef RM_DEBUG_TX #ifdef RM_DEBUG_TX
printf("lte_rate_matching_turbo: E %d, k0 %d, Ncbmod %d, Ncb/(RTC<<3) %d\n",E,ind,Ncbmod,Ncb/(RTC<<3)); printf("lte_rate_matching_turbo: E %d, k0 %d, Ncbmod %d, Ncb/(RTC<<3) %d\n",E,ind,Ncbmod,Ncb/(RTC<<3));
#endif #endif
//e2=e+(r*E); //e2=e+(r*E);
e2 = e; e2 = e;
k=0; k=0;
for (; (ind<Ncb)&&(k<E); ind++) { for (; (ind<Ncb)&&(k<E); ind++) {
...@@ -633,25 +601,16 @@ uint32_t lte_rate_matching_turbo(uint32_t RTC, ...@@ -633,25 +601,16 @@ uint32_t lte_rate_matching_turbo(uint32_t RTC,
uint32_t lte_rate_matching_cc(uint32_t RCC, uint32_t lte_rate_matching_cc(uint32_t RCC,
uint16_t E, uint16_t E,
uint8_t *w, uint8_t *w,
uint8_t *e) uint8_t *e) {
{
uint32_t ind=0,k; uint32_t ind=0,k;
uint16_t Kw = 3*(RCC<<5); uint16_t Kw = 3*(RCC<<5);
#ifdef RM_DEBUG_CC #ifdef RM_DEBUG_CC
uint32_t nulled=0; uint32_t nulled=0;
printf("lte_rate_matching_cc: Kw %d, E %d\n",Kw, E); printf("lte_rate_matching_cc: Kw %d, E %d\n",Kw, E);
#endif #endif
for (k=0; k<E; k++) { for (k=0; k<E; k++) {
while(w[ind] == LTE_NULL) { while(w[ind] == LTE_NULL) {
#ifdef RM_DEBUG_CC #ifdef RM_DEBUG_CC
nulled++; nulled++;
printf("RM_TX_CC : ind %d, NULL\n",ind); printf("RM_TX_CC : ind %d, NULL\n",ind);
...@@ -662,7 +621,6 @@ uint32_t lte_rate_matching_cc(uint32_t RCC, ...@@ -662,7 +621,6 @@ uint32_t lte_rate_matching_cc(uint32_t RCC,
ind=0; ind=0;
} }
e[k] = w[ind]; e[k] = w[ind];
#ifdef RM_DEBUG_CC #ifdef RM_DEBUG_CC
// printf("k %d ind %d, w %c(%d)\n",k,ind,w[ind],w[ind]); // printf("k %d ind %d, w %c(%d)\n",k,ind,w[ind],w[ind]);
...@@ -695,10 +653,7 @@ int lte_rate_matching_turbo_rx(uint32_t RTC, ...@@ -695,10 +653,7 @@ int lte_rate_matching_turbo_rx(uint32_t RTC,
uint8_t Qm, uint8_t Qm,
uint8_t Nl, uint8_t Nl,
uint8_t r, uint8_t r,
uint32_t *E_out) uint32_t *E_out) {
{
uint32_t Nir,Ncb,Gp,GpmodC,E,Ncbmod,ind,k; uint32_t Nir,Ncb,Gp,GpmodC,E,Ncbmod,ind,k;
int16_t *soft_input2; int16_t *soft_input2;
// int32_t w_tmp; // int32_t w_tmp;
...@@ -708,15 +663,14 @@ int lte_rate_matching_turbo_rx(uint32_t RTC, ...@@ -708,15 +663,14 @@ int lte_rate_matching_turbo_rx(uint32_t RTC,
if (Kmimo==0 || C==0 || Qm==0 || Nl==0) { if (Kmimo==0 || C==0 || Qm==0 || Nl==0) {
printf("lte_rate_matching.c: invalid parameters (Kmimo %d, Mdlharq %d, C %d, Qm %d, Nl %d\n", printf("lte_rate_matching.c: invalid parameters (Kmimo %d, Mdlharq %d, C %d, Qm %d, Nl %d\n",
Kmimo,Mdlharq,C,Qm,Nl); Kmimo,Mdlharq,C,Qm,Nl);
return(-1); return(-1);
} }
if (Mdlharq>0) { // Downlink if (Mdlharq>0) { // Downlink
Nir = Nsoft/Kmimo/cmin(8,Mdlharq); Nir = Nsoft/Kmimo/cmin(8,Mdlharq);
Ncb = cmin(Nir/C,3*(RTC<<5)); Ncb = cmin(Nir/C,3*(RTC<<5));
} } else { // Uplink
else { // Uplink
Nir=0; Nir=0;
Ncb = 3*(RTC<<5); Ncb = 3*(RTC<<5);
} }
...@@ -726,17 +680,13 @@ int lte_rate_matching_turbo_rx(uint32_t RTC, ...@@ -726,17 +680,13 @@ int lte_rate_matching_turbo_rx(uint32_t RTC,
Gp = G/Nl/Qm; Gp = G/Nl/Qm;
GpmodC = Gp%C; GpmodC = Gp%C;
if (r < (C-(GpmodC))) if (r < (C-(GpmodC)))
E = Nl*Qm * (Gp/C); E = Nl*Qm * (Gp/C);
else else
E = Nl*Qm * ((GpmodC==0?0:1) + (Gp/C)); E = Nl*Qm * ((GpmodC==0?0:1) + (Gp/C));
Ncbmod = Ncb%(RTC<<3); Ncbmod = Ncb%(RTC<<3);
ind = RTC * (2+(rvidx*(((Ncbmod==0)?0:1) + (Ncb/(RTC<<3)))*2)); ind = RTC * (2+(rvidx*(((Ncbmod==0)?0:1) + (Ncb/(RTC<<3)))*2));
#ifdef RM_DEBUG #ifdef RM_DEBUG
printf("lte_rate_matching_turbo_rx: Clear %d, E %d, Ncb %d, Kw %d, rvidx %d, G %d, Qm %d, Nl%d, r %d\n",clear,E,Ncb,3*(RTC<<5),rvidx, G, Qm,Nl,r); printf("lte_rate_matching_turbo_rx: Clear %d, E %d, Ncb %d, Kw %d, rvidx %d, G %d, Qm %d, Nl%d, r %d\n",clear,E,Ncb,3*(RTC<<5),rvidx, G, Qm,Nl,r);
#endif #endif
...@@ -751,8 +701,8 @@ int lte_rate_matching_turbo_rx(uint32_t RTC, ...@@ -751,8 +701,8 @@ int lte_rate_matching_turbo_rx(uint32_t RTC,
if (dummy_w[ind] != LTE_NULL) { if (dummy_w[ind] != LTE_NULL) {
/* /*
if ((w[ind]>0 && soft_input2[k]<0) || if ((w[ind]>0 && soft_input2[k]<0) ||
(w[ind]<0 && soft_input2[k]>0)) (w[ind]<0 && soft_input2[k]>0))
printf("ind %d: w %d => soft_in %d\n",ind,w[ind],soft_input2[k]);*/ printf("ind %d: w %d => soft_in %d\n",ind,w[ind],soft_input2[k]);*/
w[ind] += soft_input2[k++]; w[ind] += soft_input2[k++];
#ifdef RM_DEBUG #ifdef RM_DEBUG
printf("RM_RX k%d Ind: %d (%d)\n",k-1,ind,w[ind]); printf("RM_RX k%d Ind: %d (%d)\n",k-1,ind,w[ind]);
...@@ -831,10 +781,8 @@ int lte_rate_matching_turbo_rx(uint32_t RTC, ...@@ -831,10 +781,8 @@ int lte_rate_matching_turbo_rx(uint32_t RTC,
ind=0; ind=0;
} }
*/ */
*E_out = E; *E_out = E;
return(0); return(0);
} }
...@@ -842,28 +790,19 @@ void lte_rate_matching_cc_rx(uint32_t RCC, ...@@ -842,28 +790,19 @@ void lte_rate_matching_cc_rx(uint32_t RCC,
uint16_t E, uint16_t E,
int8_t *w, int8_t *w,
uint8_t *dummy_w, uint8_t *dummy_w,
int8_t *soft_input) int8_t *soft_input) {
{
uint32_t ind=0,k; uint32_t ind=0,k;
uint16_t Kw = 3*(RCC<<5); uint16_t Kw = 3*(RCC<<5);
uint32_t acc=1; uint32_t acc=1;
int16_t w16[Kw]; int16_t w16[Kw];
#ifdef RM_DEBUG_CC #ifdef RM_DEBUG_CC
uint32_t nulled=0; uint32_t nulled=0;
printf("lte_rate_matching_cc_rx: Kw %d, E %d, w %p, soft_input %p\n",3*(RCC<<5),E,w,soft_input); printf("lte_rate_matching_cc_rx: Kw %d, E %d, w %p, soft_input %p\n",3*(RCC<<5),E,w,soft_input);
#endif #endif
memset(w,0,Kw); memset(w,0,Kw);
memset(w16,0,Kw*sizeof(int16_t)); memset(w16,0,Kw*sizeof(int16_t));
for (k=0; k<E; k++) { for (k=0; k<E; k++) {
while(dummy_w[ind] == LTE_NULL) { while(dummy_w[ind] == LTE_NULL) {
#ifdef RM_DEBUG_CC #ifdef RM_DEBUG_CC
nulled++; nulled++;
...@@ -883,10 +822,7 @@ void lte_rate_matching_cc_rx(uint32_t RCC, ...@@ -883,10 +822,7 @@ void lte_rate_matching_cc_rx(uint32_t RCC,
#ifdef RM_DEBUG_CC #ifdef RM_DEBUG_CC
printf("RM_RX_CC k %d (%d) ind: %d (%d)\n",k,soft_input[k],ind,w16[ind]); printf("RM_RX_CC k %d (%d) ind: %d (%d)\n",k,soft_input[k],ind,w16[ind]);
#endif #endif
w16[ind] += soft_input[k]; w16[ind] += soft_input[k];
ind++; ind++;
if (ind==Kw) { if (ind==Kw) {
...@@ -907,7 +843,6 @@ void lte_rate_matching_cc_rx(uint32_t RCC, ...@@ -907,7 +843,6 @@ void lte_rate_matching_cc_rx(uint32_t RCC,
} }
#ifdef RM_DEBUG_CC #ifdef RM_DEBUG_CC
printf("Nulled %d\n",nulled); printf("Nulled %d\n",nulled);
#endif #endif
} }
...@@ -915,8 +850,7 @@ void lte_rate_matching_cc_rx(uint32_t RCC, ...@@ -915,8 +850,7 @@ void lte_rate_matching_cc_rx(uint32_t RCC,
#ifdef MAIN #ifdef MAIN
void main() void main() {
{
uint8_t d[96+3+(3*6144)]; uint8_t d[96+3+(3*6144)];
uint8_t w[3*6144],e[12*6144]; uint8_t w[3*6144],e[12*6144];
uint32_t RTC,G,rvidx; uint32_t RTC,G,rvidx;
...@@ -924,7 +858,6 @@ void main() ...@@ -924,7 +858,6 @@ void main()
uint32_t mod_order = 4; uint32_t mod_order = 4;
uint32_t first_dlsch_symbol = 2; uint32_t first_dlsch_symbol = 2;
uint32_t i; uint32_t i;
G = ( nb_rb * (12 * mod_order) * (12-first_dlsch_symbol-3)) ;//( nb_rb * (12 * mod_order) * (14-first_dlsch_symbol-3)) : G = ( nb_rb * (12 * mod_order) * (12-first_dlsch_symbol-3)) ;//( nb_rb * (12 * mod_order) * (14-first_dlsch_symbol-3)) :
// initialize 96 first positions to "LTE_NULL" // initialize 96 first positions to "LTE_NULL"
......
...@@ -38,9 +38,7 @@ int lte_segmentation(unsigned char *input_buffer, ...@@ -38,9 +38,7 @@ int lte_segmentation(unsigned char *input_buffer,
unsigned int *Cminus, unsigned int *Cminus,
unsigned int *Kplus, unsigned int *Kplus,
unsigned int *Kminus, unsigned int *Kminus,
unsigned int *F) unsigned int *F) {
{
unsigned int L,Bprime,Bprime_by_C,r,Kr,k,s,crc; unsigned int L,Bprime,Bprime_by_C,r,Kr,k,s,crc;
if (B<=6144) { if (B<=6144) {
...@@ -56,19 +54,19 @@ int lte_segmentation(unsigned char *input_buffer, ...@@ -56,19 +54,19 @@ int lte_segmentation(unsigned char *input_buffer,
Bprime = B+((*C)*L); Bprime = B+((*C)*L);
#ifdef DEBUG_SEGMENTATION #ifdef DEBUG_SEGMENTATION
printf("Bprime %d\n",Bprime); printf("Bprime %u\n",Bprime);
#endif #endif
} }
if ((*C)>MAX_NUM_DLSCH_SEGMENTS) { if ((*C)>MAX_NUM_DLSCH_SEGMENTS) {
LOG_E(PHY,"lte_segmentation.c: too many segments %d, B %d, L %d, Bprime %d\n",*C,B,L,Bprime); LOG_E(PHY,"lte_segmentation.c: too many segments %d, B %d, L %d, Bprime %d\n",*C,B,L,Bprime);
return(-1); return(-1);
} }
// Find K+ // Find K+
Bprime_by_C = Bprime/(*C); Bprime_by_C = Bprime/(*C);
#ifdef DEBUG_SEGMENTATION #ifdef DEBUG_SEGMENTATION
printf("Bprime_by_C %d\n",Bprime_by_C); printf("Bprime_by_C %u\n",Bprime_by_C);
#endif #endif
// Bprime = Bprime_by_C>>3; // Bprime = Bprime_by_C>>3;
...@@ -93,17 +91,16 @@ int lte_segmentation(unsigned char *input_buffer, ...@@ -93,17 +91,16 @@ int lte_segmentation(unsigned char *input_buffer,
*Kminus = (*Kplus - 32); *Kminus = (*Kplus - 32);
} else if (Bprime_by_C <=6144 ) { // increase by 8 bytes til here } else if (Bprime_by_C <=6144 ) { // increase by 8 bytes til here
*Kplus = (Bprime_by_C>>6)<<6; *Kplus = (Bprime_by_C>>6)<<6;
#ifdef DEBUG_SEGMENTATION #ifdef DEBUG_SEGMENTATION
printf("Bprime_by_C_by_C %d , Kplus %d\n",Bprime_by_C,*Kplus); printf("Bprime_by_C_by_C %u , Kplus %u\n",Bprime_by_C,*Kplus);
#endif #endif
if (*Kplus < Bprime_by_C) if (*Kplus < Bprime_by_C)
*Kplus = *Kplus + 64; *Kplus = *Kplus + 64;
#ifdef DEBUG_SEGMENTATION #ifdef DEBUG_SEGMENTATION
printf("Bprime_by_C_by_C %d , Kplus2 %d\n",Bprime_by_C,*Kplus); printf("Bprime_by_C_by_C %u , Kplus2 %u\n",Bprime_by_C,*Kplus);
#endif #endif
*Kminus = (*Kplus - 64); *Kminus = (*Kplus - 64);
} else { } else {
...@@ -116,25 +113,21 @@ int lte_segmentation(unsigned char *input_buffer, ...@@ -116,25 +113,21 @@ int lte_segmentation(unsigned char *input_buffer,
*Kminus = 0; *Kminus = 0;
*Cminus = 0; *Cminus = 0;
} else { } else {
// printf("More than one segment (%d), exiting \n",*C); // printf("More than one segment (%d), exiting \n",*C);
// exit(-1); // exit(-1);
*Cminus = ((*C)*(*Kplus) - (Bprime))/((*Kplus) - (*Kminus)); *Cminus = ((*C)*(*Kplus) - (Bprime))/((*Kplus) - (*Kminus));
*Cplus = (*C) - (*Cminus); *Cplus = (*C) - (*Cminus);
} }
AssertFatal(Bprime <= (*Cplus)*(*Kplus) + (*Cminus)*(*Kminus), AssertFatal(Bprime <= (*Cplus)*(*Kplus) + (*Cminus)*(*Kminus),
"Bprime %d < (*Cplus %d)*(*Kplus %d) + (*Cminus %d)*(*Kminus %d)\n", "Bprime %d < (*Cplus %d)*(*Kplus %d) + (*Cminus %d)*(*Kminus %d)\n",
Bprime,*Cplus,*Kplus,*Cminus,*Kminus); Bprime,*Cplus,*Kplus,*Cminus,*Kminus);
*F = ((*Cplus)*(*Kplus) + (*Cminus)*(*Kminus) - (Bprime)); *F = ((*Cplus)*(*Kplus) + (*Cminus)*(*Kminus) - (Bprime));
#ifdef DEBUG_SEGMENTATION #ifdef DEBUG_SEGMENTATION
printf("C %d, Cplus %d, Cminus %d, Kplus %d, Kminus %d, Bprime_bytes %d, Bprime %d, F %d\n",*C,*Cplus,*Cminus,*Kplus,*Kminus,Bprime>>3,Bprime,*F); printf("C %u, Cplus %u, Cminus %u, Kplus %u, Kminus %u, Bprime_bytes %u, Bprime %u, F %u\n",*C,*Cplus,*Cminus,*Kplus,*Kminus,Bprime>>3,Bprime,*F);
#endif #endif
if ((input_buffer) && (output_buffers)) { if ((input_buffer) && (output_buffers)) {
for (k=0; k<*F>>3; k++) { for (k=0; k<*F>>3; k++) {
output_buffers[0][k] = 0; output_buffers[0][k] = 0;
} }
...@@ -142,7 +135,6 @@ int lte_segmentation(unsigned char *input_buffer, ...@@ -142,7 +135,6 @@ int lte_segmentation(unsigned char *input_buffer,
s=0; s=0;
for (r=0; r<*C; r++) { for (r=0; r<*C; r++) {
if (r<*Cminus) if (r<*Cminus)
Kr = *Kminus; Kr = *Kminus;
else else
...@@ -150,18 +142,18 @@ int lte_segmentation(unsigned char *input_buffer, ...@@ -150,18 +142,18 @@ int lte_segmentation(unsigned char *input_buffer,
while (k<((Kr - L)>>3)) { while (k<((Kr - L)>>3)) {
output_buffers[r][k] = input_buffer[s]; output_buffers[r][k] = input_buffer[s];
// printf("encoding segment %d : byte %d (%d) => %d\n",r,k,Kr>>3,input_buffer[s]); // printf("encoding segment %d : byte %d (%d) => %d\n",r,k,Kr>>3,input_buffer[s]);
k++; k++;
s++; s++;
} }
if (*C > 1) { // add CRC if (*C > 1) { // add CRC
crc = crc24b(output_buffers[r],Kr-24)>>8; crc = crc24b(output_buffers[r],Kr-24)>>8;
output_buffers[r][(Kr-24)>>3] = ((uint8_t*)&crc)[2]; output_buffers[r][(Kr-24)>>3] = ((uint8_t *)&crc)[2];
output_buffers[r][1+((Kr-24)>>3)] = ((uint8_t*)&crc)[1]; output_buffers[r][1+((Kr-24)>>3)] = ((uint8_t *)&crc)[1];
output_buffers[r][2+((Kr-24)>>3)] = ((uint8_t*)&crc)[0]; output_buffers[r][2+((Kr-24)>>3)] = ((uint8_t *)&crc)[0];
#ifdef DEBUG_SEGMENTATION #ifdef DEBUG_SEGMENTATION
printf("Segment %d : CRC %x\n",r,crc); printf("Segment %u : CRC %x\n",r,crc);
#endif #endif
} }
...@@ -175,9 +167,7 @@ int lte_segmentation(unsigned char *input_buffer, ...@@ -175,9 +167,7 @@ int lte_segmentation(unsigned char *input_buffer,
#ifdef MAIN #ifdef MAIN
main() main() {
{
unsigned int Kplus,Kminus,C,Cplus,Cminus,F,Bbytes; unsigned int Kplus,Kminus,C,Cplus,Cminus,F,Bbytes;
for (Bbytes=5; Bbytes<2*768; Bbytes++) { for (Bbytes=5; Bbytes<2*768; Bbytes++) {
......
...@@ -34,8 +34,7 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, ...@@ -34,8 +34,7 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
unsigned char Ns, unsigned char Ns,
unsigned char p, unsigned char p,
unsigned char l, unsigned char l,
unsigned char symbol) unsigned char symbol) {
{
int pilot[2][200] __attribute__((aligned(16))); int pilot[2][200] __attribute__((aligned(16)));
unsigned char nu,aarx; unsigned char nu,aarx;
unsigned short k; unsigned short k;
...@@ -45,16 +44,14 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, ...@@ -45,16 +44,14 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
// unsigned int n; // unsigned int n;
// int i; // int i;
static int interpolateS11S12 = 1; static int interpolateS11S12 = 1;
uint16_t Nid_cell = (eNB_offset == 0) ? ue->frame_parms.Nid_cell : ue->measurements.adj_cell_id[eNB_offset-1]; uint16_t Nid_cell = (eNB_offset == 0) ? ue->frame_parms.Nid_cell : ue->measurements.adj_cell_id[eNB_offset-1];
uint8_t nushift,pilot0,pilot1,pilot2,pilot3; uint8_t nushift,pilot0,pilot1,pilot2,pilot3;
uint8_t previous_thread_id = ue->current_thread_id[Ns>>1]==0 ? (RX_NB_TH-1):(ue->current_thread_id[Ns>>1]-1); uint8_t previous_thread_id = ue->current_thread_id[Ns>>1]==0 ? (RX_NB_TH-1):(ue->current_thread_id[Ns>>1]-1);
int **dl_ch_estimates =ue->common_vars.common_vars_rx_data_per_thread[ue->current_thread_id[Ns>>1]].dl_ch_estimates[eNB_offset]; int **dl_ch_estimates =ue->common_vars.common_vars_rx_data_per_thread[ue->current_thread_id[Ns>>1]].dl_ch_estimates[eNB_offset];
int **dl_ch_estimates_previous=ue->common_vars.common_vars_rx_data_per_thread[previous_thread_id].dl_ch_estimates[eNB_offset]; int **dl_ch_estimates_previous=ue->common_vars.common_vars_rx_data_per_thread[previous_thread_id].dl_ch_estimates[eNB_offset];
int **rxdataF=ue->common_vars.common_vars_rx_data_per_thread[ue->current_thread_id[Ns>>1]].rxdataF; int **rxdataF=ue->common_vars.common_vars_rx_data_per_thread[ue->current_thread_id[Ns>>1]].rxdataF;
pilot0 = 0; pilot0 = 0;
if (ue->frame_parms.Ncp == 0) { // normal prefix if (ue->frame_parms.Ncp == 0) { // normal prefix
pilot1 = 4; pilot1 = 4;
pilot2 = 7; pilot2 = 7;
...@@ -81,7 +78,6 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, ...@@ -81,7 +78,6 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
return(-1); return(-1);
} }
//ch_offset = (l*(ue->frame_parms.ofdm_symbol_size)); //ch_offset = (l*(ue->frame_parms.ofdm_symbol_size));
if (ue->high_speed_flag == 0) // use second channel estimate position for temporary storage if (ue->high_speed_flag == 0) // use second channel estimate position for temporary storage
ch_offset = ue->frame_parms.ofdm_symbol_size ; ch_offset = ue->frame_parms.ofdm_symbol_size ;
...@@ -89,93 +85,88 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, ...@@ -89,93 +85,88 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
ch_offset = ue->frame_parms.ofdm_symbol_size*symbol; ch_offset = ue->frame_parms.ofdm_symbol_size*symbol;
symbol_offset = ue->frame_parms.ofdm_symbol_size*symbol; symbol_offset = ue->frame_parms.ofdm_symbol_size*symbol;
k = (nu + nushift)%6; k = (nu + nushift)%6;
#ifdef DEBUG_CH #ifdef DEBUG_CH
printf("Channel Estimation : ThreadId %d, eNB_offset %d cell_id %d ch_offset %d, OFDM size %d, Ncp=%d, l=%d, Ns=%d, k=%d\n",ue->current_thread_id[Ns>>1], eNB_offset,Nid_cell,ch_offset,ue->frame_parms.ofdm_symbol_size, printf("Channel Estimation : ThreadId %d, eNB_offset %d cell_id %d ch_offset %d, OFDM size %d, Ncp=%d, l=%d, Ns=%d, k=%d\n",ue->current_thread_id[Ns>>1], eNB_offset,Nid_cell,ch_offset,
ue->frame_parms.ofdm_symbol_size,
ue->frame_parms.Ncp,l,Ns,k); ue->frame_parms.Ncp,l,Ns,k);
#endif #endif
switch (k) { switch (k) {
case 0 : case 0 :
f=filt24_0; //for first pilot of RB, first half f=filt24_0; //for first pilot of RB, first half
f2=filt24_2; //for second pilot of RB, first half f2=filt24_2; //for second pilot of RB, first half
fl=filt24_0; //for first pilot of leftmost RB fl=filt24_0; //for first pilot of leftmost RB
f2l2=filt24_2; f2l2=filt24_2;
// fr=filt24_2r; //for first pilot of rightmost RB // fr=filt24_2r; //for first pilot of rightmost RB
fr=filt24_0r2; //for first pilot of rightmost RB fr=filt24_0r2; //for first pilot of rightmost RB
// f2r2=filt24_0r2; // f2r2=filt24_0r2;
f2r2=filt24_2r; f2r2=filt24_2r;
f_dc=filt24_0_dcr;
f_dc=filt24_0_dcr; f2_dc=filt24_2_dcl;
f2_dc=filt24_2_dcl; break;
break; case 1 :
f=filt24_1;
case 1 : f2=filt24_3;
f=filt24_1; fl=filt24_1l;
f2=filt24_3; f2l2=filt24_3l2;
fl=filt24_1l; fr=filt24_1r2;
f2l2=filt24_3l2; f2r2=filt24_3r;
fr=filt24_1r2; f_dc=filt24_1_dcr; //for first pilot of RB, first half
f2r2=filt24_3r; f2_dc=filt24_3_dcl; //for first pilot of RB, first half
f_dc=filt24_1_dcr; //for first pilot of RB, first half break;
f2_dc=filt24_3_dcl; //for first pilot of RB, first half
break; case 2 :
f=filt24_2;
case 2 : f2=filt24_4;
f=filt24_2; fl=filt24_2l;
f2=filt24_4; f2l2=filt24_4l2;
fl=filt24_2l; fr=filt24_2r2;
f2l2=filt24_4l2; f2r2=filt24_4r;
fr=filt24_2r2; f_dc=filt24_2_dcr; //for first pilot of RB, first half
f2r2=filt24_4r; f2_dc=filt24_4_dcl; //for first pilot of RB, first half
f_dc=filt24_2_dcr; //for first pilot of RB, first half break;
f2_dc=filt24_4_dcl; //for first pilot of RB, first half
break; case 3 :
f=filt24_3;
case 3 : f2=filt24_5;
f=filt24_3; fl=filt24_3l;
f2=filt24_5; f2l2=filt24_5l2;
fl=filt24_3l; fr=filt24_3r2;
f2l2=filt24_5l2; f2r2=filt24_5r;
fr=filt24_3r2; f_dc=filt24_3_dcr; //for first pilot of RB, first half
f2r2=filt24_5r; f2_dc=filt24_5_dcl; //for first pilot of RB, first half
f_dc=filt24_3_dcr; //for first pilot of RB, first half break;
f2_dc=filt24_5_dcl; //for first pilot of RB, first half
break; case 4 :
f=filt24_4;
case 4 : f2=filt24_6;
f=filt24_4; fl=filt24_4l;
f2=filt24_6; f2l2=filt24_6l2;
fl=filt24_4l; fr=filt24_4r2;
f2l2=filt24_6l2; f2r2=filt24_6r;
fr=filt24_4r2; f_dc=filt24_4_dcr; //for first pilot of RB, first half
f2r2=filt24_6r; f2_dc=filt24_6_dcl; //for first pilot of RB, first half
f_dc=filt24_4_dcr; //for first pilot of RB, first half break;
f2_dc=filt24_6_dcl; //for first pilot of RB, first half
break; case 5 :
f=filt24_5;
case 5 : f2=filt24_7;
f=filt24_5; fl=filt24_5l;
f2=filt24_7; f2l2=filt24_7l2;
fl=filt24_5l; fr=filt24_5r2;
f2l2=filt24_7l2; f2r2=filt24_7r;
fr=filt24_5r2; f_dc=filt24_5_dcr; //for first pilot of RB, first half
f2r2=filt24_7r; f2_dc=filt24_7_dcl; //for first pilot of RB, first half
f_dc=filt24_5_dcr; //for first pilot of RB, first half break;
f2_dc=filt24_7_dcl; //for first pilot of RB, first half
break; default:
LOG_E(PHY,"lte_dl_channel_estimation: k=%d -> ERROR\n",k);
default: return(-1);
LOG_E(PHY,"lte_dl_channel_estimation: k=%d -> ERROR\n",k); break;
return(-1);
break;
} }
// generate pilot // generate pilot
lte_dl_cell_spec_rx(ue, lte_dl_cell_spec_rx(ue,
eNB_offset, eNB_offset,
...@@ -184,27 +175,25 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, ...@@ -184,27 +175,25 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
(l==0)?0:1, (l==0)?0:1,
p); p);
for (aarx=0; aarx<ue->frame_parms.nb_antennas_rx; aarx++) { for (aarx=0; aarx<ue->frame_parms.nb_antennas_rx; aarx++) {
pil = (int16_t *)&pilot[p][0]; pil = (int16_t *)&pilot[p][0];
rxF = (int16_t *)&rxdataF[aarx][((symbol_offset+k+ue->frame_parms.first_carrier_offset))]; rxF = (int16_t *)&rxdataF[aarx][((symbol_offset+k+ue->frame_parms.first_carrier_offset))];
dl_ch = (int16_t *)&dl_ch_estimates[(p<<1)+aarx][ch_offset]; dl_ch = (int16_t *)&dl_ch_estimates[(p<<1)+aarx][ch_offset];
// if (eNb_id==0) // if (eNb_id==0)
memset(dl_ch,0,4*(ue->frame_parms.ofdm_symbol_size)); memset(dl_ch,0,4*(ue->frame_parms.ofdm_symbol_size));
if (ue->high_speed_flag==0) // multiply previous channel estimate by ch_est_alpha if (ue->high_speed_flag==0) // multiply previous channel estimate by ch_est_alpha
multadd_complex_vector_real_scalar(dl_ch-(ue->frame_parms.ofdm_symbol_size<<1), multadd_complex_vector_real_scalar(dl_ch-(ue->frame_parms.ofdm_symbol_size<<1),
ue->ch_est_alpha,dl_ch-(ue->frame_parms.ofdm_symbol_size<<1), ue->ch_est_alpha,dl_ch-(ue->frame_parms.ofdm_symbol_size<<1),
1,ue->frame_parms.ofdm_symbol_size); 1,ue->frame_parms.ofdm_symbol_size);
#ifdef DEBUG_CH #ifdef DEBUG_CH
printf("k %d, first_carrier %d\n",k,ue->frame_parms.first_carrier_offset); printf("k %d, first_carrier %d\n",k,ue->frame_parms.first_carrier_offset);
#endif #endif
if ((ue->frame_parms.N_RB_DL==6) || if ((ue->frame_parms.N_RB_DL==6) ||
(ue->frame_parms.N_RB_DL==50) || (ue->frame_parms.N_RB_DL==50) ||
(ue->frame_parms.N_RB_DL==100)) { (ue->frame_parms.N_RB_DL==100)) {
//First half of pilots //First half of pilots
// Treat first 2 pilots specially (left edge) // Treat first 2 pilots specially (left edge)
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
...@@ -219,7 +208,6 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, ...@@ -219,7 +208,6 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
pil+=2; // Re Im pil+=2; // Re Im
rxF+=12; rxF+=12;
dl_ch+=8; dl_ch+=8;
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
#ifdef DEBUG_CH #ifdef DEBUG_CH
...@@ -234,28 +222,22 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, ...@@ -234,28 +222,22 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
dl_ch+=16; dl_ch+=16;
for (pilot_cnt=2; pilot_cnt<((ue->frame_parms.N_RB_DL)-1); pilot_cnt+=2) { for (pilot_cnt=2; pilot_cnt<((ue->frame_parms.N_RB_DL)-1); pilot_cnt+=2) {
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); //Re ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); //Re
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); //Im ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); //Im
#ifdef DEBUG_CH #ifdef DEBUG_CH
printf("pilot %d : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); printf("pilot %u : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
#endif #endif
multadd_real_vector_complex_scalar(f, multadd_real_vector_complex_scalar(f,
ch, ch,
dl_ch, dl_ch,
24); 24);
pil+=2; // Re Im pil+=2; // Re Im
rxF+=12; rxF+=12;
dl_ch+=8; dl_ch+=8;
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
#ifdef DEBUG_CH #ifdef DEBUG_CH
printf("pilot %d : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt+1,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); printf("pilot %u : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt+1,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
#endif #endif
multadd_real_vector_complex_scalar(f2, multadd_real_vector_complex_scalar(f2,
ch, ch,
...@@ -264,29 +246,25 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, ...@@ -264,29 +246,25 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
pil+=2; pil+=2;
rxF+=12; rxF+=12;
dl_ch+=16; dl_ch+=16;
} }
// printf("Second half\n"); // printf("Second half\n");
// Second half of RBs // Second half of RBs
k = (nu + nushift)%6; k = (nu + nushift)%6;
if (k > 6) if (k > 6)
k -=6; k -=6;
rxF = (int16_t *)&rxdataF[aarx][((symbol_offset+1+k))]; rxF = (int16_t *)&rxdataF[aarx][((symbol_offset+1+k))];
#ifdef DEBUG_CH #ifdef DEBUG_CH
printf("second half k %d\n",k); printf("second half k %d\n",k);
#endif #endif
for (pilot_cnt=0; pilot_cnt<((ue->frame_parms.N_RB_DL)-3); pilot_cnt+=2) {
for (pilot_cnt=0; pilot_cnt<((ue->frame_parms.N_RB_DL)-3); pilot_cnt+=2) {
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
#ifdef DEBUG_CH #ifdef DEBUG_CH
printf("pilot %d : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); printf("pilot %u : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
#endif #endif
multadd_real_vector_complex_scalar(f, multadd_real_vector_complex_scalar(f,
ch, ch,
...@@ -295,11 +273,10 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, ...@@ -295,11 +273,10 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
pil+=2; pil+=2;
rxF+=12; rxF+=12;
dl_ch+=8; dl_ch+=8;
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
#ifdef DEBUG_CH #ifdef DEBUG_CH
printf("pilot %d : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt+1,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); printf("pilot %u : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt+1,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
#endif #endif
multadd_real_vector_complex_scalar(f2, multadd_real_vector_complex_scalar(f2,
ch, ch,
...@@ -308,13 +285,12 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, ...@@ -308,13 +285,12 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
pil+=2; pil+=2;
rxF+=12; rxF+=12;
dl_ch+=16; dl_ch+=16;
} }
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
#ifdef DEBUG_CH #ifdef DEBUG_CH
printf("pilot %d: rxF -> (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); printf("pilot %u: rxF -> (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
#endif #endif
multadd_real_vector_complex_scalar(fr, multadd_real_vector_complex_scalar(fr,
ch, ch,
...@@ -323,34 +299,25 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, ...@@ -323,34 +299,25 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
pil+=2; // Re Im pil+=2; // Re Im
rxF+=12; rxF+=12;
dl_ch+=8; dl_ch+=8;
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
#ifdef DEBUG_CH #ifdef DEBUG_CH
printf("pilot %d: rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt+1,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); printf("pilot %u: rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt+1,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
#endif #endif
multadd_real_vector_complex_scalar(f2r2, multadd_real_vector_complex_scalar(f2r2,
ch, ch,
dl_ch, dl_ch,
24); 24);
} else if (ue->frame_parms.N_RB_DL==25) {
}
else if (ue->frame_parms.N_RB_DL==25) {
//printf("Channel estimation\n"); //printf("Channel estimation\n");
// Treat first 2 pilots specially (left edge) // Treat first 2 pilots specially (left edge)
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
#ifdef DEBUG_CH #ifdef DEBUG_CH
printf("pilot 0 : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); printf("pilot 0 : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
// ch[0] = 1024; // ch[0] = 1024;
// ch[1] = -128; // ch[1] = -128;
#endif #endif
multadd_real_vector_complex_scalar(fl, multadd_real_vector_complex_scalar(fl,
ch, ch,
dl_ch, dl_ch,
...@@ -358,17 +325,13 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, ...@@ -358,17 +325,13 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
pil+=2; // Re Im pil+=2; // Re Im
rxF+=12; rxF+=12;
dl_ch+=8; dl_ch+=8;
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
#ifdef DEBUG_CH #ifdef DEBUG_CH
printf("pilot 1 : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); printf("pilot 1 : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
// ch[0] = 1024; // ch[0] = 1024;
// ch[1] = -128; // ch[1] = -128;
#endif #endif
multadd_real_vector_complex_scalar(f2l2, multadd_real_vector_complex_scalar(f2l2,
ch, ch,
dl_ch, dl_ch,
...@@ -378,21 +341,15 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, ...@@ -378,21 +341,15 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
dl_ch+=16; dl_ch+=16;
for (pilot_cnt=2; pilot_cnt<24; pilot_cnt+=2) { for (pilot_cnt=2; pilot_cnt<24; pilot_cnt+=2) {
// printf("pilot[%d][%d] (%d,%d)\n",p,rb,pil[0],pil[1]); // printf("pilot[%d][%d] (%d,%d)\n",p,rb,pil[0],pil[1]);
// printf("rx[%d][%d] -> (%d,%d)\n",p,ue->frame_parms.first_carrier_offset + ue->frame_parms.nushift + 6*rb+(3*p),rxF[0],rxF[1]); // printf("rx[%d][%d] -> (%d,%d)\n",p,ue->frame_parms.first_carrier_offset + ue->frame_parms.nushift + 6*rb+(3*p),rxF[0],rxF[1]);
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
#ifdef DEBUG_CH #ifdef DEBUG_CH
printf("pilot %d : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); printf("pilot %u : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
// ch[0] = 1024; // ch[0] = 1024;
// ch[1] = -128; // ch[1] = -128;
#endif #endif
multadd_real_vector_complex_scalar(f, multadd_real_vector_complex_scalar(f,
ch, ch,
dl_ch, dl_ch,
...@@ -400,13 +357,10 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, ...@@ -400,13 +357,10 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
pil+=2; // Re Im pil+=2; // Re Im
rxF+=12; rxF+=12;
dl_ch+=8; dl_ch+=8;
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
#ifdef DEBUG_CH #ifdef DEBUG_CH
printf("pilot %d : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt+1,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); printf("pilot %u : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt+1,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
// ch[0] = 1024; // ch[0] = 1024;
// ch[1] = -128; // ch[1] = -128;
#endif #endif
...@@ -417,39 +371,31 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, ...@@ -417,39 +371,31 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
pil+=2; pil+=2;
rxF+=12; rxF+=12;
dl_ch+=16; dl_ch+=16;
} }
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
#ifdef DEBUG_CH #ifdef DEBUG_CH
printf("pilot 24: rxF -> (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); printf("pilot 24: rxF -> (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
// ch[0] = 1024; // ch[0] = 1024;
// ch[1] = -128; // ch[1] = -128;
#endif #endif
multadd_real_vector_complex_scalar(f_dc, multadd_real_vector_complex_scalar(f_dc,
ch, ch,
dl_ch, dl_ch,
24); 24);
pil+=2; // Re Im pil+=2; // Re Im
dl_ch+=8; dl_ch+=8;
// printf("Second half\n"); // printf("Second half\n");
// Second half of RBs // Second half of RBs
rxF = (int16_t *)&rxdataF[aarx][((symbol_offset+1+k))]; rxF = (int16_t *)&rxdataF[aarx][((symbol_offset+1+k))];
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
#ifdef DEBUG_CH #ifdef DEBUG_CH
printf("pilot 25: rxF -> (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); printf("pilot 25: rxF -> (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
// ch[0] = 1024; // ch[0] = 1024;
// ch[1] = -128; // ch[1] = -128;
#endif #endif
multadd_real_vector_complex_scalar(f2_dc, multadd_real_vector_complex_scalar(f2_dc,
ch, ch,
dl_ch, dl_ch,
...@@ -459,19 +405,15 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, ...@@ -459,19 +405,15 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
dl_ch+=16; dl_ch+=16;
for (pilot_cnt=0; pilot_cnt<22; pilot_cnt+=2) { for (pilot_cnt=0; pilot_cnt<22; pilot_cnt+=2) {
// printf("* pilot[%d][%d] (%d,%d)\n",p,rb,pil[0],pil[1]); // printf("* pilot[%d][%d] (%d,%d)\n",p,rb,pil[0],pil[1]);
// printf("rx[%d][%d] -> (%d,%d)\n",p,ue->frame_parms.first_carrier_offset + ue->frame_parms.nushift + 6*rb+(3*p),rxF[0],rxF[1]); // printf("rx[%d][%d] -> (%d,%d)\n",p,ue->frame_parms.first_carrier_offset + ue->frame_parms.nushift + 6*rb+(3*p),rxF[0],rxF[1]);
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
#ifdef DEBUG_CH #ifdef DEBUG_CH
printf("pilot %d rxF -> (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",26+pilot_cnt,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); printf("pilot %u rxF -> (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",26+pilot_cnt,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
// ch[0] = 1024; // ch[0] = 1024;
// ch[1] = -128; // ch[1] = -128;
#endif #endif
multadd_real_vector_complex_scalar(f, multadd_real_vector_complex_scalar(f,
ch, ch,
dl_ch, dl_ch,
...@@ -479,16 +421,13 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, ...@@ -479,16 +421,13 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
pil+=2; pil+=2;
rxF+=12; rxF+=12;
dl_ch+=8; dl_ch+=8;
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
#ifdef DEBUG_CH #ifdef DEBUG_CH
printf("pilot %d : rxF -> (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",27+pilot_cnt,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); printf("pilot %u : rxF -> (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",27+pilot_cnt,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
// ch[0] = 1024; // ch[0] = 1024;
// ch[1] = -128; // ch[1] = -128;
#endif #endif
multadd_real_vector_complex_scalar(f2, multadd_real_vector_complex_scalar(f2,
ch, ch,
dl_ch, dl_ch,
...@@ -496,20 +435,15 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, ...@@ -496,20 +435,15 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
pil+=2; pil+=2;
rxF+=12; rxF+=12;
dl_ch+=16; dl_ch+=16;
} }
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
#ifdef DEBUG_CH #ifdef DEBUG_CH
printf("pilot 49: rxF -> (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); printf("pilot 49: rxF -> (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
// ch[0] = 1024; // ch[0] = 1024;
// ch[1] = -128; // ch[1] = -128;
#endif #endif
multadd_real_vector_complex_scalar(fr, multadd_real_vector_complex_scalar(fr,
ch, ch,
dl_ch, dl_ch,
...@@ -517,28 +451,20 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, ...@@ -517,28 +451,20 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
pil+=2; // Re Im pil+=2; // Re Im
rxF+=12; rxF+=12;
dl_ch+=8; dl_ch+=8;
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
#ifdef DEBUG_CH #ifdef DEBUG_CH
printf("pilot 50: rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); printf("pilot 50: rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
// ch[0] = 1024; // ch[0] = 1024;
// ch[1] = -128; // ch[1] = -128;
#endif #endif
multadd_real_vector_complex_scalar(f2r2, multadd_real_vector_complex_scalar(f2r2,
ch, ch,
dl_ch, dl_ch,
24); 24);
} else if (ue->frame_parms.N_RB_DL==15) { } else if (ue->frame_parms.N_RB_DL==15) {
//printf("First Half\n"); //printf("First Half\n");
for (rb=0; rb<28; rb+=4) { for (rb=0; rb<28; rb+=4) {
//printf("aarx=%d\n",aarx); //printf("aarx=%d\n",aarx);
//printf("pilot[%d][%d] (%d,%d)\n",p,rb,pil[0],pil[1]); //printf("pilot[%d][%d] (%d,%d)\n",p,rb,pil[0],pil[1]);
//printf("rx[%d][%d] -> (%d,%d)\n",p, //printf("rx[%d][%d] -> (%d,%d)\n",p,
...@@ -555,7 +481,6 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, ...@@ -555,7 +481,6 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
pil+=2; // Re Im pil+=2; // Re Im
rxF+=12; rxF+=12;
dl_ch+=8; dl_ch+=8;
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
//printf("ch -> (%d,%d)\n",ch[0],ch[1]); //printf("ch -> (%d,%d)\n",ch[0],ch[1]);
...@@ -566,7 +491,6 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, ...@@ -566,7 +491,6 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
pil+=2; pil+=2;
rxF+=12; rxF+=12;
dl_ch+=16; dl_ch+=16;
} }
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
...@@ -578,13 +502,11 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, ...@@ -578,13 +502,11 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
24); 24);
pil+=2; // Re Im pil+=2; // Re Im
dl_ch+=8; dl_ch+=8;
//printf("Second half\n"); //printf("Second half\n");
//Second half of RBs //Second half of RBs
rxF = (int16_t *)&rxdataF[aarx][((symbol_offset+1+nushift + (3*p)))]; rxF = (int16_t *)&rxdataF[aarx][((symbol_offset+1+nushift + (3*p)))];
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
multadd_real_vector_complex_scalar(f2, multadd_real_vector_complex_scalar(f2,
ch, ch,
dl_ch, dl_ch,
...@@ -602,7 +524,6 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, ...@@ -602,7 +524,6 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
// rxF[1]); // rxF[1]);
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
multadd_real_vector_complex_scalar(f, multadd_real_vector_complex_scalar(f,
ch, ch,
dl_ch, dl_ch,
...@@ -610,10 +531,8 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, ...@@ -610,10 +531,8 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
pil+=2; pil+=2;
rxF+=12; rxF+=12;
dl_ch+=8; dl_ch+=8;
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
multadd_real_vector_complex_scalar(f2, multadd_real_vector_complex_scalar(f2,
ch, ch,
dl_ch, dl_ch,
...@@ -621,17 +540,14 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, ...@@ -621,17 +540,14 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
pil+=2; pil+=2;
rxF+=12; rxF+=12;
dl_ch+=16; dl_ch+=16;
} }
} else { } else {
LOG_E(PHY,"channel estimation not implemented for ue->frame_parms.N_RB_DL = %d\n",ue->frame_parms.N_RB_DL); LOG_E(PHY,"channel estimation not implemented for ue->frame_parms.N_RB_DL = %d\n",ue->frame_parms.N_RB_DL);
} }
if (ue->perfect_ce == 0) { if (ue->perfect_ce == 0) {
// Temporal Interpolation // Temporal Interpolation
// printf("ch_offset %d\n",ch_offset); // printf("ch_offset %d\n",ch_offset);
dl_ch = (int16_t *)&dl_ch_estimates[(p<<1)+aarx][ch_offset]; dl_ch = (int16_t *)&dl_ch_estimates[(p<<1)+aarx][ch_offset];
if (ue->high_speed_flag == 0) { if (ue->high_speed_flag == 0) {
...@@ -639,182 +555,155 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, ...@@ -639,182 +555,155 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
32767-ue->ch_est_alpha, 32767-ue->ch_est_alpha,
dl_ch-(ue->frame_parms.ofdm_symbol_size<<1),0,ue->frame_parms.ofdm_symbol_size); dl_ch-(ue->frame_parms.ofdm_symbol_size<<1),0,ue->frame_parms.ofdm_symbol_size);
} else { // high_speed_flag == 1 } else { // high_speed_flag == 1
if ((symbol == 0)) { if ((symbol == 0)) {
// printf("Interpolating %d->0\n",4-ue->frame_parms.Ncp); // printf("Interpolating %d->0\n",4-ue->frame_parms.Ncp);
// dl_ch_prev = (int16_t *)&dl_ch_estimates[(p<<1)+aarx][(4-ue->frame_parms.Ncp)*(ue->frame_parms.ofdm_symbol_size)]; // dl_ch_prev = (int16_t *)&dl_ch_estimates[(p<<1)+aarx][(4-ue->frame_parms.Ncp)*(ue->frame_parms.ofdm_symbol_size)];
if(((Ns>>1)!=0) || ( ((Ns>>1)==0) && interpolateS11S12)) if(((Ns>>1)!=0) || ( ((Ns>>1)==0) && interpolateS11S12)) {
{ //LOG_I(PHY,"Interpolate s11-->s0 to get s12 and s13 Ns %d \n", Ns);
//LOG_I(PHY,"Interpolate s11-->s0 to get s12 and s13 Ns %d \n", Ns); dl_ch_prev = (int16_t *)&dl_ch_estimates_previous[(p<<1)+aarx][pilot3*(ue->frame_parms.ofdm_symbol_size)];
dl_ch_prev = (int16_t *)&dl_ch_estimates_previous[(p<<1)+aarx][pilot3*(ue->frame_parms.ofdm_symbol_size)]; multadd_complex_vector_real_scalar(dl_ch_prev,21845,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),1,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch,10923,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),0,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch_prev,21845,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),1,ue->frame_parms.ofdm_symbol_size); multadd_complex_vector_real_scalar(dl_ch_prev,10923,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),1,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch,10923,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),0,ue->frame_parms.ofdm_symbol_size); multadd_complex_vector_real_scalar(dl_ch,21845,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),0,ue->frame_parms.ofdm_symbol_size);
}
multadd_complex_vector_real_scalar(dl_ch_prev,10923,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),1,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch,21845,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),0,ue->frame_parms.ofdm_symbol_size);
}
interpolateS11S12 = 1;
} // this is 1/3,2/3 combination for pilots spaced by 3 symbols
else if (symbol == pilot1) {
dl_ch_prev = (int16_t *)&dl_ch_estimates[(p<<1)+aarx][0];
//LOG_I(PHY,"Interpolate s0-->s4 to get s1 s2 and s3 Ns %d \n", Ns);
if (ue->frame_parms.Ncp==0) {// pilot spacing 4 symbols (1/4,1/2,3/4 combination)
uint8_t previous_subframe;
if(Ns>>1 == 0)
previous_subframe = 9;
else
previous_subframe = ((Ns>>1) - 1 )%9;
if((subframe_select(&ue->frame_parms,previous_subframe) == SF_UL))
{
multadd_complex_vector_real_scalar(dl_ch_prev,328,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),1,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch,32440,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),0,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch_prev,328,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),1,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch,32440,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),0,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch_prev,8192,dl_ch_prev+(3*2*(ue->frame_parms.ofdm_symbol_size)),1,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch,32440,dl_ch_prev+(3*2*(ue->frame_parms.ofdm_symbol_size)),0,ue->frame_parms.ofdm_symbol_size);
}
else
{
multadd_complex_vector_real_scalar(dl_ch_prev,24576,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),1,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch,8192,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),0,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch_prev,16384,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),1,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch,16384,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),0,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch_prev,8192,dl_ch_prev+(3*2*(ue->frame_parms.ofdm_symbol_size)),1,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch,24576,dl_ch_prev+(3*2*(ue->frame_parms.ofdm_symbol_size)),0,ue->frame_parms.ofdm_symbol_size);
}
} else {
multadd_complex_vector_real_scalar(dl_ch_prev,328,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),1,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch,21845,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),0,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch_prev,21845,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)<<1),1,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch,10923,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),0,ue->frame_parms.ofdm_symbol_size);
} // pilot spacing 3 symbols (1/3,2/3 combination)
} else if (symbol == pilot2) {
dl_ch_prev = (int16_t *)&dl_ch_estimates[(p<<1)+aarx][pilot1*(ue->frame_parms.ofdm_symbol_size)];
multadd_complex_vector_real_scalar(dl_ch_prev,21845,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),1,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch,10923,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),0,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch_prev,10923,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),1,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch,21845,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),0,ue->frame_parms.ofdm_symbol_size);
} else { // symbol == pilot3
// printf("Interpolating 0->%d\n",4-ue->frame_parms.Ncp);
dl_ch_prev = (int16_t *)&dl_ch_estimates[(p<<1)+aarx][pilot2*(ue->frame_parms.ofdm_symbol_size)];
if (ue->frame_parms.Ncp==0) {// pilot spacing 4 symbols (1/4,1/2,3/4 combination)
multadd_complex_vector_real_scalar(dl_ch_prev,24576,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),1,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch,8192,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),0,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch_prev,16384,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),1,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch,16384,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),0,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch_prev,8192,dl_ch_prev+(3*2*(ue->frame_parms.ofdm_symbol_size)),1,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch,24576,dl_ch_prev+(3*2*(ue->frame_parms.ofdm_symbol_size)),0,ue->frame_parms.ofdm_symbol_size);
} else {
multadd_complex_vector_real_scalar(dl_ch_prev,10923,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),1,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch,21845,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),0,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch_prev,21845,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)<<1),1,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch,10923,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),0,ue->frame_parms.ofdm_symbol_size);
} // pilot spacing 3 symbols (1/3,2/3 combination)
if((ue->rx_offset_diff !=0) && ((Ns>>1) == 9))
{
//LOG_I(PHY,"Extrapolate s7-->s11 to get s12 and s13 Ns %d\n", Ns);
interpolateS11S12 = 0;
//LOG_E(PHY,"Interpolate s7--s11 s12 s13 pilot 3 Ns %d l %d symbol %d \n", Ns, l, symbol);
int16_t *dlChEst_ofdm11 = (int16_t *)&dl_ch_estimates[(p<<1)+aarx][pilot3*(ue->frame_parms.ofdm_symbol_size)];
int16_t *dlChEst_ofdm7 = (int16_t *)&dl_ch_estimates[(p<<1)+aarx][pilot2*(ue->frame_parms.ofdm_symbol_size)];
// interpolate ofdm s12: 5/4*ofdms11 + -1/4*ofdms7 5/4 q1.15 40960 -1/4 q1.15 8192
int16_t *dlChEst_ofdm12 = (int16_t *)&dl_ch_estimates[(p<<1)+aarx][12*ue->frame_parms.ofdm_symbol_size];
for(int i=0; i<(2*ue->frame_parms.ofdm_symbol_size); i++)
{
int64_t tmp_mult = 0;
tmp_mult = ((int64_t)dlChEst_ofdm11[i] * 40960 - (int64_t)dlChEst_ofdm7[i] * 8192);
tmp_mult = tmp_mult >> 15;
dlChEst_ofdm12[i] = tmp_mult;
}
// interpolate ofdm s13: 3/2*ofdms11 + -1/2*ofdms7 3/2 q1.15 49152 1/2 q1.15 16384
int16_t *dlChEst_ofdm13 = (int16_t *)&dl_ch_estimates[(p<<1)+aarx][13*ue->frame_parms.ofdm_symbol_size];
for(int i=0; i<(2*ue->frame_parms.ofdm_symbol_size); i++)
{
int64_t tmp_mult = 0;
tmp_mult = ((int64_t)dlChEst_ofdm11[i] * 49152 - (int64_t)dlChEst_ofdm7[i] * 16384);
tmp_mult = tmp_mult >> 15;
dlChEst_ofdm13[i] = tmp_mult;
}
}
interpolateS11S12 = 1;
} // this is 1/3,2/3 combination for pilots spaced by 3 symbols
else if (symbol == pilot1) {
dl_ch_prev = (int16_t *)&dl_ch_estimates[(p<<1)+aarx][0];
//LOG_I(PHY,"Interpolate s0-->s4 to get s1 s2 and s3 Ns %d \n", Ns);
if (ue->frame_parms.Ncp==0) {// pilot spacing 4 symbols (1/4,1/2,3/4 combination)
uint8_t previous_subframe;
if(Ns>>1 == 0)
previous_subframe = 9;
else
previous_subframe = ((Ns>>1) - 1 )%9;
if((subframe_select(&ue->frame_parms,previous_subframe) == SF_UL)) {
multadd_complex_vector_real_scalar(dl_ch_prev,328,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),1,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch,32440,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),0,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch_prev,328,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),1,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch,32440,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),0,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch_prev,8192,dl_ch_prev+(3*2*(ue->frame_parms.ofdm_symbol_size)),1,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch,32440,dl_ch_prev+(3*2*(ue->frame_parms.ofdm_symbol_size)),0,ue->frame_parms.ofdm_symbol_size);
} else {
multadd_complex_vector_real_scalar(dl_ch_prev,24576,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),1,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch,8192,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),0,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch_prev,16384,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),1,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch,16384,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),0,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch_prev,8192,dl_ch_prev+(3*2*(ue->frame_parms.ofdm_symbol_size)),1,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch,24576,dl_ch_prev+(3*2*(ue->frame_parms.ofdm_symbol_size)),0,ue->frame_parms.ofdm_symbol_size);
}
} else {
multadd_complex_vector_real_scalar(dl_ch_prev,328,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),1,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch,21845,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),0,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch_prev,21845,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)<<1),1,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch,10923,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),0,ue->frame_parms.ofdm_symbol_size);
} // pilot spacing 3 symbols (1/3,2/3 combination)
} else if (symbol == pilot2) {
dl_ch_prev = (int16_t *)&dl_ch_estimates[(p<<1)+aarx][pilot1*(ue->frame_parms.ofdm_symbol_size)];
multadd_complex_vector_real_scalar(dl_ch_prev,21845,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),1,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch,10923,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),0,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch_prev,10923,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),1,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch,21845,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),0,ue->frame_parms.ofdm_symbol_size);
} else { // symbol == pilot3
// printf("Interpolating 0->%d\n",4-ue->frame_parms.Ncp);
dl_ch_prev = (int16_t *)&dl_ch_estimates[(p<<1)+aarx][pilot2*(ue->frame_parms.ofdm_symbol_size)];
if (ue->frame_parms.Ncp==0) {// pilot spacing 4 symbols (1/4,1/2,3/4 combination)
multadd_complex_vector_real_scalar(dl_ch_prev,24576,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),1,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch,8192,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),0,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch_prev,16384,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),1,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch,16384,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),0,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch_prev,8192,dl_ch_prev+(3*2*(ue->frame_parms.ofdm_symbol_size)),1,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch,24576,dl_ch_prev+(3*2*(ue->frame_parms.ofdm_symbol_size)),0,ue->frame_parms.ofdm_symbol_size);
} else {
multadd_complex_vector_real_scalar(dl_ch_prev,10923,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),1,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch,21845,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),0,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch_prev,21845,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)<<1),1,ue->frame_parms.ofdm_symbol_size);
multadd_complex_vector_real_scalar(dl_ch,10923,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),0,ue->frame_parms.ofdm_symbol_size);
} // pilot spacing 3 symbols (1/3,2/3 combination)
if((ue->rx_offset_diff !=0) && ((Ns>>1) == 9)) {
//LOG_I(PHY,"Extrapolate s7-->s11 to get s12 and s13 Ns %d\n", Ns);
interpolateS11S12 = 0;
//LOG_E(PHY,"Interpolate s7--s11 s12 s13 pilot 3 Ns %d l %d symbol %d \n", Ns, l, symbol);
int16_t *dlChEst_ofdm11 = (int16_t *)&dl_ch_estimates[(p<<1)+aarx][pilot3*(ue->frame_parms.ofdm_symbol_size)];
int16_t *dlChEst_ofdm7 = (int16_t *)&dl_ch_estimates[(p<<1)+aarx][pilot2*(ue->frame_parms.ofdm_symbol_size)];
// interpolate ofdm s12: 5/4*ofdms11 + -1/4*ofdms7 5/4 q1.15 40960 -1/4 q1.15 8192
int16_t *dlChEst_ofdm12 = (int16_t *)&dl_ch_estimates[(p<<1)+aarx][12*ue->frame_parms.ofdm_symbol_size];
for(int i=0; i<(2*ue->frame_parms.ofdm_symbol_size); i++) {
int64_t tmp_mult = 0;
tmp_mult = ((int64_t)dlChEst_ofdm11[i] * 40960 - (int64_t)dlChEst_ofdm7[i] * 8192);
tmp_mult = tmp_mult >> 15;
dlChEst_ofdm12[i] = tmp_mult;
} }
}
// interpolate ofdm s13: 3/2*ofdms11 + -1/2*ofdms7 3/2 q1.15 49152 1/2 q1.15 16384
int16_t *dlChEst_ofdm13 = (int16_t *)&dl_ch_estimates[(p<<1)+aarx][13*ue->frame_parms.ofdm_symbol_size];
for(int i=0; i<(2*ue->frame_parms.ofdm_symbol_size); i++) {
int64_t tmp_mult = 0;
tmp_mult = ((int64_t)dlChEst_ofdm11[i] * 49152 - (int64_t)dlChEst_ofdm7[i] * 16384);
tmp_mult = tmp_mult >> 15;
dlChEst_ofdm13[i] = tmp_mult;
}
}
}
}
} }
} }
void (*idft)(int16_t *,int16_t *, int); void (*idft)(int16_t *,int16_t *, int);
switch (ue->frame_parms.ofdm_symbol_size) { switch (ue->frame_parms.ofdm_symbol_size) {
case 128: case 128:
idft = idft128; idft = idft128;
break; break;
case 256: case 256:
idft = idft256; idft = idft256;
break; break;
case 512: case 512:
idft = idft512; idft = idft512;
break; break;
case 1024: case 1024:
idft = idft1024; idft = idft1024;
break; break;
case 1536: case 1536:
idft = idft1536; idft = idft1536;
break; break;
case 2048: case 2048:
idft = idft2048; idft = idft2048;
break; break;
default: default:
idft = idft512; idft = idft512;
break; break;
} }
if( ((Ns%2) == 0) && (l == pilot0)) if( ((Ns%2) == 0) && (l == pilot0)) {
{ // do ifft of channel estimate
// do ifft of channel estimate for (aarx=0; aarx<ue->frame_parms.nb_antennas_rx; aarx++)
for (aarx=0; aarx<ue->frame_parms.nb_antennas_rx; aarx++) for (p=0; p<ue->frame_parms.nb_antenna_ports_eNB; p++) {
for (p=0; p<ue->frame_parms.nb_antenna_ports_eNB; p++) { if (ue->common_vars.common_vars_rx_data_per_thread[ue->current_thread_id[Ns>>1]].dl_ch_estimates[eNB_offset][(p<<1)+aarx]) {
if (ue->common_vars.common_vars_rx_data_per_thread[ue->current_thread_id[Ns>>1]].dl_ch_estimates[eNB_offset][(p<<1)+aarx]) //LOG_I(PHY,"Channel Impulse Computation Slot %d ThreadId %d Symbol %d \n", Ns, ue->current_thread_id[Ns>>1], l);
{ idft((int16_t *) &ue->common_vars.common_vars_rx_data_per_thread[ue->current_thread_id[Ns>>1]].dl_ch_estimates[eNB_offset][(p<<1)+aarx][8],
//LOG_I(PHY,"Channel Impulse Computation Slot %d ThreadId %d Symbol %d \n", Ns, ue->current_thread_id[Ns>>1], l); (int16_t *) ue->common_vars.common_vars_rx_data_per_thread[ue->current_thread_id[Ns>>1]].dl_ch_estimates_time[eNB_offset][(p<<1)+aarx],1);
idft((int16_t*) &ue->common_vars.common_vars_rx_data_per_thread[ue->current_thread_id[Ns>>1]].dl_ch_estimates[eNB_offset][(p<<1)+aarx][8], }
(int16_t*) ue->common_vars.common_vars_rx_data_per_thread[ue->current_thread_id[Ns>>1]].dl_ch_estimates_time[eNB_offset][(p<<1)+aarx],1); }
}
}
} }
T(T_UE_PHY_DL_CHANNEL_ESTIMATE, T_INT(eNB_id), T(T_UE_PHY_DL_CHANNEL_ESTIMATE, T_INT(eNB_id),
T_INT(ue->proc.proc_rxtx[ue->current_thread_id[Ns>>1]].frame_rx%1024), T_INT(ue->proc.proc_rxtx[ue->current_thread_id[Ns>>1]].subframe_rx), T_INT(ue->proc.proc_rxtx[ue->current_thread_id[Ns>>1]].frame_rx%1024), T_INT(ue->proc.proc_rxtx[ue->current_thread_id[Ns>>1]].subframe_rx),
T_INT(0), T_BUFFER(&ue->common_vars.common_vars_rx_data_per_thread[ue->current_thread_id[Ns>>1]].dl_ch_estimates_time[eNB_offset][0][0], 512 * 4)); T_INT(0), T_BUFFER(&ue->common_vars.common_vars_rx_data_per_thread[ue->current_thread_id[Ns>>1]].dl_ch_estimates_time[eNB_offset][0][0], 512 * 4));
return(0); return(0);
} }
...@@ -39,37 +39,33 @@ ...@@ -39,37 +39,33 @@
#include "prach_extern.h" #include "prach_extern.h"
#if (LTE_RRC_VERSION < MAKE_VERSION(14, 0, 0)) #if (LTE_RRC_VERSION < MAKE_VERSION(14, 0, 0))
#define rx_prach0 rx_prach #define rx_prach0 rx_prach
#endif #endif
void rx_prach0(PHY_VARS_eNB *eNB, void rx_prach0(PHY_VARS_eNB *eNB,
RU_t *ru, RU_t *ru,
uint16_t *max_preamble, uint16_t *max_preamble,
uint16_t *max_preamble_energy, uint16_t *max_preamble_energy,
uint16_t *max_preamble_delay, uint16_t *max_preamble_delay,
uint16_t Nf, uint16_t Nf,
uint8_t tdd_mapindex uint8_t tdd_mapindex
#if (LTE_RRC_VERSION >= MAKE_VERSION(14, 0, 0)) #if (LTE_RRC_VERSION >= MAKE_VERSION(14, 0, 0))
,uint8_t br_flag, ,uint8_t br_flag,
uint8_t ce_level uint8_t ce_level
#endif #endif
) ) {
{
int i; int i;
LTE_DL_FRAME_PARMS *fp; LTE_DL_FRAME_PARMS *fp;
lte_frame_type_t frame_type; lte_frame_type_t frame_type;
uint16_t rootSequenceIndex; uint16_t rootSequenceIndex;
uint8_t prach_ConfigIndex; uint8_t prach_ConfigIndex;
uint8_t Ncs_config; uint8_t Ncs_config;
uint8_t restricted_set; uint8_t restricted_set;
uint8_t n_ra_prb; uint8_t n_ra_prb;
int subframe; int subframe;
int16_t *prachF=NULL; int16_t *prachF=NULL;
int16_t **rxsigF=NULL; int16_t **rxsigF=NULL;
int nb_rx; int nb_rx;
int16_t *prach2; int16_t *prach2;
uint8_t preamble_index; uint8_t preamble_index;
uint16_t NCS,NCS2; uint16_t NCS,NCS2;
...@@ -93,106 +89,108 @@ void rx_prach0(PHY_VARS_eNB *eNB, ...@@ -93,106 +89,108 @@ void rx_prach0(PHY_VARS_eNB *eNB,
int16_t levdB; int16_t levdB;
int fft_size,log2_ifft_size; int fft_size,log2_ifft_size;
int16_t prach_ifft_tmp[2048*2] __attribute__((aligned(32))); int16_t prach_ifft_tmp[2048*2] __attribute__((aligned(32)));
int32_t *prach_ifft=(int32_t*)NULL; int32_t *prach_ifft=(int32_t *)NULL;
int32_t **prach_ifftp=(int32_t **)NULL; int32_t **prach_ifftp=(int32_t **)NULL;
#if (LTE_RRC_VERSION >= MAKE_VERSION(14, 0, 0)) #if (LTE_RRC_VERSION >= MAKE_VERSION(14, 0, 0))
int prach_ifft_cnt=0; int prach_ifft_cnt=0;
#endif #endif
if (ru) {
if (ru) {
fp = &ru->frame_parms; fp = &ru->frame_parms;
nb_rx = ru->nb_rx; nb_rx = ru->nb_rx;
} } else if (eNB) {
else if (eNB) {
fp = &eNB->frame_parms; fp = &eNB->frame_parms;
nb_rx = fp->nb_antennas_rx; nb_rx = fp->nb_antennas_rx;
} } else AssertFatal(1==0,"rx_prach called without valid RU or eNB descriptor\n");
else AssertFatal(1==0,"rx_prach called without valid RU or eNB descriptor\n");
frame_type = fp->frame_type;
frame_type = fp->frame_type;
#if (LTE_RRC_VERSION >= MAKE_VERSION(14, 0, 0)) #if (LTE_RRC_VERSION >= MAKE_VERSION(14, 0, 0))
if (br_flag == 1) { if (br_flag == 1) {
AssertFatal(fp->prach_emtc_config_common.prach_Config_enabled==1, AssertFatal(fp->prach_emtc_config_common.prach_Config_enabled==1,
"emtc prach_Config is not enabled\n"); "emtc prach_Config is not enabled\n");
AssertFatal(fp->prach_emtc_config_common.prach_ConfigInfo.prach_CElevel_enable[ce_level]==1, AssertFatal(fp->prach_emtc_config_common.prach_ConfigInfo.prach_CElevel_enable[ce_level]==1,
"ce_level %d is not active\n",ce_level); "ce_level %d is not active\n",ce_level);
rootSequenceIndex = fp->prach_emtc_config_common.rootSequenceIndex; rootSequenceIndex = fp->prach_emtc_config_common.rootSequenceIndex;
prach_ConfigIndex = fp->prach_emtc_config_common.prach_ConfigInfo.prach_ConfigIndex[ce_level]; prach_ConfigIndex = fp->prach_emtc_config_common.prach_ConfigInfo.prach_ConfigIndex[ce_level];
Ncs_config = fp->prach_emtc_config_common.prach_ConfigInfo.zeroCorrelationZoneConfig; Ncs_config = fp->prach_emtc_config_common.prach_ConfigInfo.zeroCorrelationZoneConfig;
restricted_set = fp->prach_emtc_config_common.prach_ConfigInfo.highSpeedFlag; restricted_set = fp->prach_emtc_config_common.prach_ConfigInfo.highSpeedFlag;
n_ra_prb = get_prach_prb_offset(fp,prach_ConfigIndex, n_ra_prb = get_prach_prb_offset(fp,prach_ConfigIndex,
fp->prach_emtc_config_common.prach_ConfigInfo.prach_FreqOffset[ce_level], fp->prach_emtc_config_common.prach_ConfigInfo.prach_FreqOffset[ce_level],
tdd_mapindex,Nf); tdd_mapindex,Nf);
// update pointers to results for ce_level // update pointers to results for ce_level
max_preamble += ce_level; max_preamble += ce_level;
max_preamble_energy += ce_level; max_preamble_energy += ce_level;
max_preamble_delay += ce_level; max_preamble_delay += ce_level;
} } else
else
#endif #endif
{ {
rootSequenceIndex = fp->prach_config_common.rootSequenceIndex; rootSequenceIndex = fp->prach_config_common.rootSequenceIndex;
prach_ConfigIndex = fp->prach_config_common.prach_ConfigInfo.prach_ConfigIndex; prach_ConfigIndex = fp->prach_config_common.prach_ConfigInfo.prach_ConfigIndex;
Ncs_config = fp->prach_config_common.prach_ConfigInfo.zeroCorrelationZoneConfig; Ncs_config = fp->prach_config_common.prach_ConfigInfo.zeroCorrelationZoneConfig;
restricted_set = fp->prach_config_common.prach_ConfigInfo.highSpeedFlag; restricted_set = fp->prach_config_common.prach_ConfigInfo.highSpeedFlag;
n_ra_prb = get_prach_prb_offset(fp,prach_ConfigIndex, n_ra_prb = get_prach_prb_offset(fp,prach_ConfigIndex,
fp->prach_config_common.prach_ConfigInfo.prach_FreqOffset, fp->prach_config_common.prach_ConfigInfo.prach_FreqOffset,
tdd_mapindex,Nf); tdd_mapindex,Nf);
} }
int16_t *prach[nb_rx]; int16_t *prach[nb_rx];
uint8_t prach_fmt = get_prach_fmt(prach_ConfigIndex,frame_type); uint8_t prach_fmt = get_prach_fmt(prach_ConfigIndex,frame_type);
uint16_t N_ZC = (prach_fmt <4)?839:139; uint16_t N_ZC = (prach_fmt <4)?839:139;
if (eNB) { if (eNB) {
#if (LTE_RRC_VERSION >= MAKE_VERSION(14, 0, 0)) #if (LTE_RRC_VERSION >= MAKE_VERSION(14, 0, 0))
if (br_flag == 1) { if (br_flag == 1) {
prach_ifftp = eNB->prach_vars_br.prach_ifft[ce_level]; prach_ifftp = eNB->prach_vars_br.prach_ifft[ce_level];
subframe = eNB->proc.subframe_prach_br; subframe = eNB->proc.subframe_prach_br;
prachF = eNB->prach_vars_br.prachF; prachF = eNB->prach_vars_br.prachF;
rxsigF = eNB->prach_vars_br.rxsigF[ce_level]; rxsigF = eNB->prach_vars_br.rxsigF[ce_level];
if (LOG_DEBUGFLAG(PRACH)){
if (((ru->proc.frame_prach)&1023) < 20) LOG_I(PHY,"PRACH (eNB) : running rx_prach (br_flag %d, ce_level %d) for frame %d subframe %d, prach_FreqOffset %d, prach_ConfigIndex %d, rootSequenceIndex %d, repetition number %d,numRepetitionsPrePreambleAttempt %d\n", if (LOG_DEBUGFLAG(PRACH)) {
br_flag,ce_level,ru->proc.frame_prach,subframe, if (((ru->proc.frame_prach)&1023) < 20) LOG_I(PHY,
fp->prach_emtc_config_common.prach_ConfigInfo.prach_FreqOffset[ce_level], "PRACH (eNB) : running rx_prach (br_flag %d, ce_level %d) for frame %d subframe %d, prach_FreqOffset %d, prach_ConfigIndex %d, rootSequenceIndex %d, repetition number %d,numRepetitionsPrePreambleAttempt %d\n",
prach_ConfigIndex,rootSequenceIndex, br_flag,ce_level,ru->proc.frame_prach,subframe,
eNB->prach_vars_br.repetition_number[ce_level], fp->prach_emtc_config_common.prach_ConfigInfo.prach_FreqOffset[ce_level],
fp->prach_emtc_config_common.prach_ConfigInfo.prach_numRepetitionPerPreambleAttempt[ce_level]); prach_ConfigIndex,rootSequenceIndex,
eNB->prach_vars_br.repetition_number[ce_level],
fp->prach_emtc_config_common.prach_ConfigInfo.prach_numRepetitionPerPreambleAttempt[ce_level]);
} }
} else } else
#endif #endif
{ {
prach_ifftp = eNB->prach_vars.prach_ifft[0]; prach_ifftp = eNB->prach_vars.prach_ifft[0];
subframe = eNB->proc.subframe_prach; subframe = eNB->proc.subframe_prach;
prachF = eNB->prach_vars.prachF; prachF = eNB->prach_vars.prachF;
rxsigF = eNB->prach_vars.rxsigF[0]; rxsigF = eNB->prach_vars.rxsigF[0];
if (LOG_DEBUGFLAG(PRACH)){
if (((ru->proc.frame_prach)&1023) < 20) LOG_I(PHY,"PRACH (eNB) : running rx_prach for subframe %d, prach_FreqOffset %d, prach_ConfigIndex %d , rootSequenceIndex %d\n", subframe,fp->prach_config_common.prach_ConfigInfo.prach_FreqOffset,prach_ConfigIndex,rootSequenceIndex); if (LOG_DEBUGFLAG(PRACH)) {
} if (((ru->proc.frame_prach)&1023) < 20) LOG_I(PHY,"PRACH (eNB) : running rx_prach for subframe %d, prach_FreqOffset %d, prach_ConfigIndex %d , rootSequenceIndex %d\n", subframe,
fp->prach_config_common.prach_ConfigInfo.prach_FreqOffset,prach_ConfigIndex,rootSequenceIndex);
} }
} }
else { } else {
#if (LTE_RRC_VERSION >= MAKE_VERSION(14, 0, 0)) #if (LTE_RRC_VERSION >= MAKE_VERSION(14, 0, 0))
if (br_flag == 1) { if (br_flag == 1) {
subframe = ru->proc.subframe_prach_br; subframe = ru->proc.subframe_prach_br;
rxsigF = ru->prach_rxsigF_br[ce_level]; rxsigF = ru->prach_rxsigF_br[ce_level];
if (LOG_DEBUGFLAG(PRACH)){
if (((ru->proc.frame_prach)&1023) < 20) LOG_I(PHY,"PRACH (RU) : running rx_prach (br_flag %d, ce_level %d) for frame %d subframe %d, prach_FreqOffset %d, prach_ConfigIndex %d\n", if (LOG_DEBUGFLAG(PRACH)) {
br_flag,ce_level,ru->proc.frame_prach,subframe,fp->prach_emtc_config_common.prach_ConfigInfo.prach_FreqOffset[ce_level],prach_ConfigIndex); if (((ru->proc.frame_prach)&1023) < 20) LOG_I(PHY,"PRACH (RU) : running rx_prach (br_flag %d, ce_level %d) for frame %d subframe %d, prach_FreqOffset %d, prach_ConfigIndex %d\n",
} br_flag,ce_level,ru->proc.frame_prach,subframe,fp->prach_emtc_config_common.prach_ConfigInfo.prach_FreqOffset[ce_level],prach_ConfigIndex);
}
} else } else
#endif #endif
{ {
subframe = ru->proc.subframe_prach; subframe = ru->proc.subframe_prach;
rxsigF = ru->prach_rxsigF; rxsigF = ru->prach_rxsigF;
if (LOG_DEBUGFLAG(PRACH)){
if (((ru->proc.frame_prach)&1023) < 20) LOG_I(PHY,"PRACH (RU) : running rx_prach for subframe %d, prach_FreqOffset %d, prach_ConfigIndex %d\n",
subframe,fp->prach_config_common.prach_ConfigInfo.prach_FreqOffset,prach_ConfigIndex);
}
}
if (LOG_DEBUGFLAG(PRACH)) {
if (((ru->proc.frame_prach)&1023) < 20) LOG_I(PHY,"PRACH (RU) : running rx_prach for subframe %d, prach_FreqOffset %d, prach_ConfigIndex %d\n",
subframe,fp->prach_config_common.prach_ConfigInfo.prach_FreqOffset,prach_ConfigIndex);
}
}
} }
AssertFatal(ru!=NULL,"ru is null\n"); AssertFatal(ru!=NULL,"ru is null\n");
...@@ -200,25 +198,29 @@ void rx_prach0(PHY_VARS_eNB *eNB, ...@@ -200,25 +198,29 @@ void rx_prach0(PHY_VARS_eNB *eNB,
for (aa=0; aa<nb_rx; aa++) { for (aa=0; aa<nb_rx; aa++) {
if (ru->if_south == LOCAL_RF) { // set the time-domain signal if we have to use it in this node if (ru->if_south == LOCAL_RF) { // set the time-domain signal if we have to use it in this node
// DJP - indexing below in subframe zero takes us off the beginning of the array??? // DJP - indexing below in subframe zero takes us off the beginning of the array???
prach[aa] = (int16_t*)&ru->common.rxdata[aa][(subframe*fp->samples_per_tti)-ru->N_TA_offset]; prach[aa] = (int16_t *)&ru->common.rxdata[aa][(subframe*fp->samples_per_tti)-ru->N_TA_offset];
if (LOG_DUMPFLAG(PRACH)){ if (LOG_DUMPFLAG(PRACH)) {
int32_t en0=signal_energy((int32_t*)prach[aa],fp->samples_per_tti); int32_t en0=signal_energy((int32_t *)prach[aa],fp->samples_per_tti);
int8_t dbEn0 = dB_fixed(en0); int8_t dbEn0 = dB_fixed(en0);
int8_t rach_dBm = dbEn0 - ru->rx_total_gain_dB; int8_t rach_dBm = dbEn0 - ru->rx_total_gain_dB;
char buffer[80]; char buffer[80];
if (dbEn0>32 && prach[0]!= NULL) {
static int counter=0; if (dbEn0>32 && prach[0]!= NULL) {
sprintf(buffer, "%s%d", "/tmp/prach_rx",counter); static int counter=0;
LOG_M(buffer,"prach_rx",prach[0],fp->samples_per_tti,1,13); sprintf(buffer, "%s%d", "/tmp/prach_rx",counter);
} LOG_M(buffer,"prach_rx",prach[0],fp->samples_per_tti,1,13);
}
if (dB_fixed(en0)>32) { if (dB_fixed(en0)>32) {
sprintf(buffer, "rach_dBm:%d",rach_dBm); sprintf(buffer, "rach_dBm:%d",rach_dBm);
if (prach[0]!= NULL) LOG_M("prach_rx","prach_rx",prach[0],fp->samples_per_tti,1,1); if (prach[0]!= NULL) LOG_M("prach_rx","prach_rx",prach[0],fp->samples_per_tti,1,1);
LOG_I(PHY,"RU %d, br_flag %d ce_level %d frame %d subframe %d per_tti:%d prach:%p (energy %d) TA:%d %s rxdata:%p index:%d\n",
ru->idx,br_flag,ce_level,ru->proc.frame_prach,subframe,fp->samples_per_tti, LOG_I(PHY,"RU %d, br_flag %d ce_level %d frame %d subframe %d per_tti:%d prach:%p (energy %d) TA:%d %s rxdata:%p index:%d\n",
prach[aa],dbEn0,ru->N_TA_offset,buffer,ru->common.rxdata[aa], ru->idx,br_flag,ce_level,ru->proc.frame_prach,subframe,fp->samples_per_tti,
(subframe*fp->samples_per_tti)-ru->N_TA_offset); prach[aa],dbEn0,ru->N_TA_offset,buffer,ru->common.rxdata[aa],
(subframe*fp->samples_per_tti)-ru->N_TA_offset);
} }
} }
} }
...@@ -227,19 +229,17 @@ void rx_prach0(PHY_VARS_eNB *eNB, ...@@ -227,19 +229,17 @@ void rx_prach0(PHY_VARS_eNB *eNB,
// First compute physical root sequence // First compute physical root sequence
if (restricted_set == 0) { if (restricted_set == 0) {
AssertFatal(Ncs_config<=15, AssertFatal(Ncs_config<=15,
"Illegal Ncs_config for unrestricted format %d\n",Ncs_config); "Illegal Ncs_config for unrestricted format %d\n",Ncs_config);
NCS = NCS_unrestricted[Ncs_config]; NCS = NCS_unrestricted[Ncs_config];
} else { } else {
AssertFatal(Ncs_config<=14, AssertFatal(Ncs_config<=14,
"FATAL, Illegal Ncs_config for restricted format %d\n",Ncs_config); "FATAL, Illegal Ncs_config for restricted format %d\n",Ncs_config);
NCS = NCS_restricted[Ncs_config]; NCS = NCS_restricted[Ncs_config];
} }
if (eNB) start_meas(&eNB->rx_prach); if (eNB) start_meas(&eNB->rx_prach);
prach_root_sequence_map = (prach_fmt < 4) ? prach_root_sequence_map0_3 : prach_root_sequence_map4; prach_root_sequence_map = (prach_fmt < 4) ? prach_root_sequence_map0_3 : prach_root_sequence_map4;
// PDP is oversampled, e.g. 1024 sample instead of 839 // PDP is oversampled, e.g. 1024 sample instead of 839
// Adapt the NCS (zero-correlation zones) with oversampling factor e.g. 1024/839 // Adapt the NCS (zero-correlation zones) with oversampling factor e.g. 1024/839
NCS2 = (N_ZC==839) ? ((NCS<<10)/839) : ((NCS<<8)/139); NCS2 = (N_ZC==839) ? ((NCS<<10)/839) : ((NCS<<8)/139);
...@@ -248,56 +248,56 @@ void rx_prach0(PHY_VARS_eNB *eNB, ...@@ -248,56 +248,56 @@ void rx_prach0(PHY_VARS_eNB *eNB,
NCS2 = N_ZC; NCS2 = N_ZC;
switch (prach_fmt) { switch (prach_fmt) {
case 0: case 0:
Ncp = 3168; Ncp = 3168;
break; break;
case 1: case 1:
case 3: case 3:
Ncp = 21024; Ncp = 21024;
break; break;
case 2: case 2:
Ncp = 6240; Ncp = 6240;
break; break;
case 4: case 4:
Ncp = 448; Ncp = 448;
break; break;
default: default:
Ncp = 3168; Ncp = 3168;
break; break;
} }
// Adjust CP length based on UL bandwidth // Adjust CP length based on UL bandwidth
switch (fp->N_RB_UL) { switch (fp->N_RB_UL) {
case 6: case 6:
Ncp>>=4; Ncp>>=4;
break; break;
case 15: case 15:
Ncp>>=3; Ncp>>=3;
break; break;
case 25: case 25:
Ncp>>=2; Ncp>>=2;
break; break;
case 50: case 50:
Ncp>>=1; Ncp>>=1;
break; break;
case 75: case 75:
Ncp=(Ncp*3)>>2;
break;
case 100:
if (fp->threequarter_fs == 1)
Ncp=(Ncp*3)>>2; Ncp=(Ncp*3)>>2;
break; break;
}
case 100:
if (fp->threequarter_fs == 1)
Ncp=(Ncp*3)>>2;
break;
}
if (((eNB!=NULL) && (ru->function != NGFI_RAU_IF4p5))|| if (((eNB!=NULL) && (ru->function != NGFI_RAU_IF4p5))||
((eNB==NULL) && (ru->function == NGFI_RRU_IF4p5))) { // compute the DFTs of the PRACH temporal resources ((eNB==NULL) && (ru->function == NGFI_RRU_IF4p5))) { // compute the DFTs of the PRACH temporal resources
...@@ -305,192 +305,188 @@ void rx_prach0(PHY_VARS_eNB *eNB, ...@@ -305,192 +305,188 @@ void rx_prach0(PHY_VARS_eNB *eNB,
if (LOG_DEBUGFLAG(PRACH)) { if (LOG_DEBUGFLAG(PRACH)) {
LOG_D(PHY,"rx_prach: Doing FFT for N_RB_UL %d nb_rx:%d Ncp:%d\n",fp->N_RB_UL, nb_rx, Ncp); LOG_D(PHY,"rx_prach: Doing FFT for N_RB_UL %d nb_rx:%d Ncp:%d\n",fp->N_RB_UL, nb_rx, Ncp);
} }
for (aa=0; aa<nb_rx; aa++) { for (aa=0; aa<nb_rx; aa++) {
AssertFatal(prach[aa]!=NULL,"prach[%d] is null\n",aa); AssertFatal(prach[aa]!=NULL,"prach[%d] is null\n",aa);
prach2 = prach[aa] + (Ncp<<1); prach2 = prach[aa] + (Ncp<<1);
// do DFT // do DFT
switch (fp->N_RB_UL) { switch (fp->N_RB_UL) {
case 6: case 6:
if (prach_fmt == 4) { if (prach_fmt == 4) {
dft256(prach2,rxsigF[aa],1); dft256(prach2,rxsigF[aa],1);
} else { } else {
dft1536(prach2,rxsigF[aa],1); dft1536(prach2,rxsigF[aa],1);
if (prach_fmt>1) if (prach_fmt>1)
dft1536(prach2+3072,rxsigF[aa]+3072,1); dft1536(prach2+3072,rxsigF[aa]+3072,1);
} }
break; break;
case 15: case 15:
if (prach_fmt == 4) { if (prach_fmt == 4) {
dft256(prach2,rxsigF[aa],1); dft256(prach2,rxsigF[aa],1);
} else { } else {
dft3072(prach2,rxsigF[aa],1); dft3072(prach2,rxsigF[aa],1);
if (prach_fmt>1) if (prach_fmt>1)
dft3072(prach2+6144,rxsigF[aa]+6144,1); dft3072(prach2+6144,rxsigF[aa]+6144,1);
} }
break; break;
case 25: case 25:
default: default:
if (prach_fmt == 4) { if (prach_fmt == 4) {
dft1024(prach2,rxsigF[aa],1); dft1024(prach2,rxsigF[aa],1);
fft_size = 1024; fft_size = 1024;
} else { } else {
dft6144(prach2,rxsigF[aa],1); dft6144(prach2,rxsigF[aa],1);
if (prach_fmt>1) if (prach_fmt>1)
dft6144(prach2+12288,rxsigF[aa]+12288,1); dft6144(prach2+12288,rxsigF[aa]+12288,1);
fft_size = 6144; fft_size = 6144;
} }
break; break;
case 50: case 50:
if (prach_fmt == 4) { if (prach_fmt == 4) {
dft2048(prach2,rxsigF[aa],1); dft2048(prach2,rxsigF[aa],1);
} else { } else {
dft12288(prach2,rxsigF[aa],1); dft12288(prach2,rxsigF[aa],1);
if (prach_fmt>1) if (prach_fmt>1)
dft12288(prach2+24576,rxsigF[aa]+24576,1); dft12288(prach2+24576,rxsigF[aa]+24576,1);
} }
break; break;
case 75: case 75:
if (prach_fmt == 4) { if (prach_fmt == 4) {
dft3072(prach2,rxsigF[aa],1); dft3072(prach2,rxsigF[aa],1);
} else { } else {
dft18432(prach2,rxsigF[aa],1); dft18432(prach2,rxsigF[aa],1);
if (prach_fmt>1) if (prach_fmt>1)
dft18432(prach2+36864,rxsigF[aa]+36864,1); dft18432(prach2+36864,rxsigF[aa]+36864,1);
} }
break; break;
case 100: case 100:
if (fp->threequarter_fs==0) { if (fp->threequarter_fs==0) {
if (prach_fmt == 4) { if (prach_fmt == 4) {
dft4096(prach2,rxsigF[aa],1); dft4096(prach2,rxsigF[aa],1);
} else { } else {
dft24576(prach2,rxsigF[aa],1); dft24576(prach2,rxsigF[aa],1);
if (prach_fmt>1) if (prach_fmt>1)
dft24576(prach2+49152,rxsigF[aa]+49152,1); dft24576(prach2+49152,rxsigF[aa]+49152,1);
} }
} else { } else {
if (prach_fmt == 4) { if (prach_fmt == 4) {
dft3072(prach2,rxsigF[aa],1); dft3072(prach2,rxsigF[aa],1);
} else { } else {
dft18432(prach2,rxsigF[aa],1); dft18432(prach2,rxsigF[aa],1);
if (prach_fmt>1) if (prach_fmt>1)
dft18432(prach2+36864,rxsigF[aa]+36864,1); dft18432(prach2+36864,rxsigF[aa]+36864,1);
} }
} }
break; break;
} }
k = (12*n_ra_prb) - 6*fp->N_RB_UL; k = (12*n_ra_prb) - 6*fp->N_RB_UL;
if (k<0) { if (k<0) {
k+=(fp->ofdm_symbol_size); k+=(fp->ofdm_symbol_size);
} }
k*=12; k*=12;
k+=13; k+=13;
k*=2; k*=2;
int dftsize_x2 = fp->ofdm_symbol_size*24; int dftsize_x2 = fp->ofdm_symbol_size*24;
//LOG_D(PHY,"Shifting prach_rxF from %d to 0\n",k); //LOG_D(PHY,"Shifting prach_rxF from %d to 0\n",k);
if ((k+(839*2)) > dftsize_x2) { // PRACH signal is split around DC if ((k+(839*2)) > dftsize_x2) { // PRACH signal is split around DC
memmove((void*)&rxsigF[aa][dftsize_x2-k],(void*)&rxsigF[aa][0],(k+(839*2)-dftsize_x2)*2); memmove((void *)&rxsigF[aa][dftsize_x2-k],(void *)&rxsigF[aa][0],(k+(839*2)-dftsize_x2)*2);
memmove((void*)&rxsigF[aa][0],(void*)(&rxsigF[aa][k]),(dftsize_x2-k)*2); memmove((void *)&rxsigF[aa][0],(void *)(&rxsigF[aa][k]),(dftsize_x2-k)*2);
} } else // PRACH signal is not split around DC
else // PRACH signal is not split around DC memmove((void *)&rxsigF[aa][0],(void *)(&rxsigF[aa][k]),839*4);
memmove((void*)&rxsigF[aa][0],(void*)(&rxsigF[aa][k]),839*4);
} }
} }
if ((eNB==NULL) && (ru!=NULL) && ru->function == NGFI_RRU_IF4p5) { if ((eNB==NULL) && ru->function == NGFI_RRU_IF4p5) {
/// **** send_IF4 of rxsigF to RAU **** /// /// **** send_IF4 of rxsigF to RAU **** ///
#if (LTE_RRC_VERSION >= MAKE_VERSION(14, 0, 0)) #if (LTE_RRC_VERSION >= MAKE_VERSION(14, 0, 0))
if (br_flag == 1) send_IF4p5(ru, ru->proc.frame_prach, ru->proc.subframe_prach, IF4p5_PRACH+1+ce_level); if (br_flag == 1) send_IF4p5(ru, ru->proc.frame_prach, ru->proc.subframe_prach, IF4p5_PRACH+1+ce_level);
else else
#endif #endif
send_IF4p5(ru, ru->proc.frame_prach, ru->proc.subframe_prach, IF4p5_PRACH); send_IF4p5(ru, ru->proc.frame_prach, ru->proc.subframe_prach, IF4p5_PRACH);
return; return;
} else if (eNB!=NULL) { } else if (eNB!=NULL) {
if ( LOG_DEBUGFLAG(PRACH)) { if ( LOG_DEBUGFLAG(PRACH)) {
int en = dB_fixed(signal_energy((int32_t*)&rxsigF[0][0],840)); int en = dB_fixed(signal_energy((int32_t *)&rxsigF[0][0],840));
if ((en > 60)&&(br_flag==1)) LOG_I(PHY,"PRACH (br_flag %d,ce_level %d, n_ra_prb %d, k %d): Frame %d, Subframe %d => %d dB\n",br_flag,ce_level,n_ra_prb,k,eNB->proc.frame_rx,eNB->proc.subframe_rx,en); if ((en > 60)&&(br_flag==1)) LOG_I(PHY,"PRACH (br_flag %d,ce_level %d, n_ra_prb %d, k %d): Frame %d, Subframe %d => %d dB\n",br_flag,ce_level,n_ra_prb,k,eNB->proc.frame_rx,eNB->proc.subframe_rx,en);
} }
} }
// in case of RAU and prach received rx_thread wakes up prach
// in case of RAU and prach received rx_thread wakes up prach
// here onwards is for eNodeB_3GPP or NGFI_RAU_IF4p5 // here onwards is for eNodeB_3GPP or NGFI_RAU_IF4p5
preamble_offset_old = 99; preamble_offset_old = 99;
uint8_t update_TA = 4; uint8_t update_TA = 4;
uint8_t update_TA2 = 1; uint8_t update_TA2 = 1;
switch (eNB->frame_parms.N_RB_DL) { switch (eNB->frame_parms.N_RB_DL) {
case 6: case 6:
update_TA = 16; update_TA = 16;
break; break;
case 25: case 25:
update_TA = 4; update_TA = 4;
break; break;
case 50: case 50:
update_TA = 2; update_TA = 2;
break; break;
case 75: case 75:
update_TA = 3; update_TA = 3;
update_TA2 = 2; update_TA2 = 2;
case 100: break;
update_TA = 1;
break; case 100:
update_TA = 1;
break;
} }
*max_preamble_energy=0; *max_preamble_energy=0;
for (preamble_index=0 ; preamble_index<64 ; preamble_index++) { for (preamble_index=0 ; preamble_index<64 ; preamble_index++) {
if (LOG_DEBUGFLAG(PRACH)) {
int en = dB_fixed(signal_energy((int32_t *)&rxsigF[0][0],840));
if (LOG_DEBUGFLAG(PRACH)){
int en = dB_fixed(signal_energy((int32_t*)&rxsigF[0][0],840));
if (en>60) LOG_I(PHY,"frame %d, subframe %d : Trying preamble %d (br_flag %d)\n",ru->proc.frame_prach,subframe,preamble_index,br_flag); if (en>60) LOG_I(PHY,"frame %d, subframe %d : Trying preamble %d (br_flag %d)\n",ru->proc.frame_prach,subframe,preamble_index,br_flag);
} }
if (restricted_set == 0) { if (restricted_set == 0) {
// This is the relative offset in the root sequence table (5.7.2-4 from 36.211) for the given preamble index // This is the relative offset in the root sequence table (5.7.2-4 from 36.211) for the given preamble index
preamble_offset = ((NCS==0)? preamble_index : (preamble_index/(N_ZC/NCS))); preamble_offset = ((NCS==0)? preamble_index : (preamble_index/(N_ZC/NCS)));
if (preamble_offset != preamble_offset_old) { if (preamble_offset != preamble_offset_old) {
preamble_offset_old = preamble_offset; preamble_offset_old = preamble_offset;
new_dft = 1; new_dft = 1;
// This is the \nu corresponding to the preamble index // This is the \nu corresponding to the preamble index
preamble_shift = 0; preamble_shift = 0;
} } else {
else {
preamble_shift -= NCS; preamble_shift -= NCS;
if (preamble_shift < 0) if (preamble_shift < 0)
preamble_shift+=N_ZC; preamble_shift+=N_ZC;
} }
...@@ -519,7 +515,6 @@ void rx_prach0(PHY_VARS_eNB *eNB, ...@@ -519,7 +515,6 @@ void rx_prach0(PHY_VARS_eNB *eNB,
} }
u = prach_root_sequence_map[index]; u = prach_root_sequence_map[index];
uint16_t n_group_ra = 0; uint16_t n_group_ra = 0;
if ( (du[u]<(N_ZC/3)) && (du[u]>=NCS) ) { if ( (du[u]<(N_ZC/3)) && (du[u]>=NCS) ) {
...@@ -560,177 +555,184 @@ void rx_prach0(PHY_VARS_eNB *eNB, ...@@ -560,177 +555,184 @@ void rx_prach0(PHY_VARS_eNB *eNB,
// Compute DFT of RX signal (conjugate input, results in conjugate output) for each new rootSequenceIndex // Compute DFT of RX signal (conjugate input, results in conjugate output) for each new rootSequenceIndex
if (LOG_DEBUGFLAG(PRACH)) { if (LOG_DEBUGFLAG(PRACH)) {
int en = dB_fixed(signal_energy((int32_t*)&rxsigF[0][0],840)); int en = dB_fixed(signal_energy((int32_t *)&rxsigF[0][0],840));
if (en>60) LOG_I(PHY,"frame %d, subframe %d : preamble index %d: offset %d, preamble shift %d (br_flag %d, en %d)\n", if (en>60) LOG_I(PHY,"frame %d, subframe %d : preamble index %d: offset %d, preamble shift %d (br_flag %d, en %d)\n",
ru->proc.frame_prach,subframe,preamble_index,preamble_offset,preamble_shift,br_flag,en); ru->proc.frame_prach,subframe,preamble_index,preamble_offset,preamble_shift,br_flag,en);
} }
log2_ifft_size = 10; log2_ifft_size = 10;
fft_size = 6144; fft_size = 6144;
if (new_dft == 1) { if (new_dft == 1) {
new_dft = 0; new_dft = 0;
#if (LTE_RRC_VERSION >= MAKE_VERSION(14, 0, 0)) #if (LTE_RRC_VERSION >= MAKE_VERSION(14, 0, 0))
if (br_flag == 1) { if (br_flag == 1) {
Xu=(int16_t*)eNB->X_u_br[ce_level][preamble_offset-first_nonzero_root_idx]; Xu=(int16_t *)eNB->X_u_br[ce_level][preamble_offset-first_nonzero_root_idx];
prach_ifft = prach_ifftp[prach_ifft_cnt++]; prach_ifft = prach_ifftp[prach_ifft_cnt++];
if (eNB->prach_vars_br.repetition_number[ce_level]==1) memset(prach_ifft,0,((N_ZC==839)?2048:256)*sizeof(int32_t));
} if (eNB->prach_vars_br.repetition_number[ce_level]==1) memset(prach_ifft,0,((N_ZC==839)?2048:256)*sizeof(int32_t));
else } else
#endif #endif
{ {
Xu=(int16_t*)eNB->X_u[preamble_offset-first_nonzero_root_idx]; Xu=(int16_t *)eNB->X_u[preamble_offset-first_nonzero_root_idx];
prach_ifft = prach_ifftp[0]; prach_ifft = prach_ifftp[0];
memset(prach_ifft,0,((N_ZC==839) ? 2048 : 256)*sizeof(int32_t)); memset(prach_ifft,0,((N_ZC==839) ? 2048 : 256)*sizeof(int32_t));
} }
memset(prachF, 0, sizeof(int16_t)*2*1024 ); memset(prachF, 0, sizeof(int16_t)*2*1024 );
if (LOG_DUMPFLAG(PRACH)) {
if (LOG_DUMPFLAG(PRACH)) {
if (prach[0]!= NULL) LOG_M("prach_rx0.m","prach_rx0",prach[0],6144+792,1,1); if (prach[0]!= NULL) LOG_M("prach_rx0.m","prach_rx0",prach[0],6144+792,1,1);
LOG_M("prach_rx1.m","prach_rx1",prach[1],6144+792,1,1);
LOG_M("prach_rxF0.m","prach_rxF0",rxsigF[0],24576,1,1); LOG_M("prach_rx1.m","prach_rx1",prach[1],6144+792,1,1);
LOG_M("prach_rxF1.m","prach_rxF1",rxsigF[1],6144,1,1); LOG_M("prach_rxF0.m","prach_rxF0",rxsigF[0],24576,1,1);
LOG_M("prach_rxF1.m","prach_rxF1",rxsigF[1],6144,1,1);
} }
for (aa=0;aa<nb_rx; aa++) { for (aa=0; aa<nb_rx; aa++) {
// Do componentwise product with Xu* on each antenna // Do componentwise product with Xu* on each antenna
k=0;
k=0;
for (offset=0; offset<(N_ZC<<1); offset+=2) { for (offset=0; offset<(N_ZC<<1); offset+=2) {
prachF[offset] = (int16_t)(((int32_t)Xu[offset]*rxsigF[aa][k] + (int32_t)Xu[offset+1]*rxsigF[aa][k+1])>>15); prachF[offset] = (int16_t)(((int32_t)Xu[offset]*rxsigF[aa][k] + (int32_t)Xu[offset+1]*rxsigF[aa][k+1])>>15);
prachF[offset+1] = (int16_t)(((int32_t)Xu[offset]*rxsigF[aa][k+1] - (int32_t)Xu[offset+1]*rxsigF[aa][k])>>15); prachF[offset+1] = (int16_t)(((int32_t)Xu[offset]*rxsigF[aa][k+1] - (int32_t)Xu[offset+1]*rxsigF[aa][k])>>15);
k+=2; k+=2;
if (k==(12*2*fp->ofdm_symbol_size))
k=0; if (k==(12*2*fp->ofdm_symbol_size))
} k=0;
}
// Now do IFFT of size 1024 (N_ZC=839) or 256 (N_ZC=139)
if (N_ZC == 839) { // Now do IFFT of size 1024 (N_ZC=839) or 256 (N_ZC=139)
log2_ifft_size = 10; if (N_ZC == 839) {
idft1024(prachF,prach_ifft_tmp,1); log2_ifft_size = 10;
// compute energy and accumulate over receive antennas and repetitions for BR idft1024(prachF,prach_ifft_tmp,1);
for (i=0;i<2048;i++)
prach_ifft[i] += (prach_ifft_tmp[i<<1]*prach_ifft_tmp[i<<1] + prach_ifft_tmp[1+(i<<1)]*prach_ifft_tmp[1+(i<<1)])>>10; // compute energy and accumulate over receive antennas and repetitions for BR
} else { for (i=0; i<2048; i++)
idft256(prachF,prach_ifft_tmp,1); prach_ifft[i] += (prach_ifft_tmp[i<<1]*prach_ifft_tmp[i<<1] + prach_ifft_tmp[1+(i<<1)]*prach_ifft_tmp[1+(i<<1)])>>10;
log2_ifft_size = 8; } else {
// compute energy and accumulate over receive antennas and repetitions for BR idft256(prachF,prach_ifft_tmp,1);
for (i=0;i<256;i++) log2_ifft_size = 8;
prach_ifft[i] += (prach_ifft_tmp[i<<1]*prach_ifft_tmp[(i<<1)] + prach_ifft_tmp[1+(i<<1)]*prach_ifft_tmp[1+(i<<1)])>>10;
} // compute energy and accumulate over receive antennas and repetitions for BR
for (i=0; i<256; i++)
if (LOG_DUMPFLAG(PRACH)) { prach_ifft[i] += (prach_ifft_tmp[i<<1]*prach_ifft_tmp[(i<<1)] + prach_ifft_tmp[1+(i<<1)]*prach_ifft_tmp[1+(i<<1)])>>10;
if (aa==0) LOG_M("prach_rxF_comp0.m","prach_rxF_comp0",prachF,1024,1,1); }
if (LOG_DUMPFLAG(PRACH)) {
if (aa==0) LOG_M("prach_rxF_comp0.m","prach_rxF_comp0",prachF,1024,1,1);
if (aa==1) LOG_M("prach_rxF_comp1.m","prach_rxF_comp1",prachF,1024,1,1); if (aa==1) LOG_M("prach_rxF_comp1.m","prach_rxF_comp1",prachF,1024,1,1);
} }
}// antennas_rx }// antennas_rx
} // new dft } // new dft
// check energy in nth time shift, for // check energy in nth time shift, for
#if (LTE_RRC_VERSION >= MAKE_VERSION(14, 0, 0)) #if (LTE_RRC_VERSION >= MAKE_VERSION(14, 0, 0))
if ((br_flag==0) || if ((br_flag==0) ||
(eNB->prach_vars_br.repetition_number[ce_level]== (eNB->prach_vars_br.repetition_number[ce_level]==
eNB->frame_parms.prach_emtc_config_common.prach_ConfigInfo.prach_numRepetitionPerPreambleAttempt[ce_level])) eNB->frame_parms.prach_emtc_config_common.prach_ConfigInfo.prach_numRepetitionPerPreambleAttempt[ce_level]))
#endif #endif
{ {
if (LOG_DEBUGFLAG(PRACH)){ if (LOG_DEBUGFLAG(PRACH)) {
int en = dB_fixed(signal_energy((int32_t*)&rxsigF[0][0],840)); int en = dB_fixed(signal_energy((int32_t *)&rxsigF[0][0],840));
if (en>60) LOG_I(PHY,"frame %d, subframe %d: Checking for peak in time-domain (br_flag %d, en %d)\n",ru->proc.frame_prach,subframe,br_flag,en);
if (en>60) LOG_I(PHY,"frame %d, subframe %d: Checking for peak in time-domain (br_flag %d, en %d)\n",ru->proc.frame_prach,subframe,br_flag,en);
} }
preamble_shift2 = ((preamble_shift==0) ? 0 : ((preamble_shift<<log2_ifft_size)/N_ZC));
for (i=0; i<NCS2; i++) {
lev = (int32_t)prach_ifft[(preamble_shift2+i)];
levdB = dB_fixed_times10(lev);
if (levdB>*max_preamble_energy) {
*max_preamble_energy = levdB;
*max_preamble_delay = ((i*fft_size)>>log2_ifft_size)*update_TA/update_TA2;
*max_preamble = preamble_index;
if (LOG_DEBUGFLAG(PRACH)){
int en = dB_fixed(signal_energy((int32_t*)&rxsigF[0][0],840));
if ((en>60) && (br_flag==1))
LOG_D(PHY,"frame %d, subframe %d : max_preamble_energy %d, max_preamble_delay %d, max_preamble %d (br_flag %d,ce_level %d, levdB %d, lev %d)\n",
ru->proc.frame_prach,subframe,
*max_preamble_energy,*max_preamble_delay,
*max_preamble,br_flag,ce_level,levdB,lev);
}
}
}
preamble_shift2 = ((preamble_shift==0) ? 0 : ((preamble_shift<<log2_ifft_size)/N_ZC));
for (i=0; i<NCS2; i++) {
lev = (int32_t)prach_ifft[(preamble_shift2+i)];
levdB = dB_fixed_times10(lev);
if (levdB>*max_preamble_energy) {
*max_preamble_energy = levdB;
*max_preamble_delay = ((i*fft_size)>>log2_ifft_size)*update_TA/update_TA2;
*max_preamble = preamble_index;
if (LOG_DEBUGFLAG(PRACH)) {
int en = dB_fixed(signal_energy((int32_t *)&rxsigF[0][0],840));
if ((en>60) && (br_flag==1))
LOG_D(PHY,"frame %d, subframe %d : max_preamble_energy %d, max_preamble_delay %d, max_preamble %d (br_flag %d,ce_level %d, levdB %d, lev %d)\n",
ru->proc.frame_prach,subframe,
*max_preamble_energy,*max_preamble_delay,
*max_preamble,br_flag,ce_level,levdB,lev);
}
}
} }
}
}// preamble_index }// preamble_index
if (LOG_DUMPFLAG(PRACH)) { if (LOG_DUMPFLAG(PRACH)) {
int en = dB_fixed(signal_energy((int32_t*)&rxsigF[0][0],840)); int en = dB_fixed(signal_energy((int32_t *)&rxsigF[0][0],840));
if (en>60) { if (en>60) {
k = (12*n_ra_prb) - 6*fp->N_RB_UL; k = (12*n_ra_prb) - 6*fp->N_RB_UL;
if (k<0) k+=fp->ofdm_symbol_size; if (k<0) k+=fp->ofdm_symbol_size;
k*=12; k*=12;
k+=13; k+=13;
k*=2; k*=2;
if (br_flag == 0) { if (br_flag == 0) {
LOG_M("rxsigF.m","prach_rxF",&rxsigF[0][0],12288,1,1); LOG_M("rxsigF.m","prach_rxF",&rxsigF[0][0],12288,1,1);
LOG_M("prach_rxF_comp0.m","prach_rxF_comp0",prachF,1024,1,1); LOG_M("prach_rxF_comp0.m","prach_rxF_comp0",prachF,1024,1,1);
LOG_M("Xu.m","xu",Xu,N_ZC,1,1); LOG_M("Xu.m","xu",Xu,N_ZC,1,1);
LOG_M("prach_ifft0.m","prach_t0",prach_ifft,1024,1,1); LOG_M("prach_ifft0.m","prach_t0",prach_ifft,1024,1,1);
} else {
LOG_E(PHY,"Dumping prach (br_flag %d), k = %d (n_ra_prb %d)\n",br_flag,k,n_ra_prb);
LOG_M("rxsigF_br.m","prach_rxF_br",&rxsigF[0][0],12288,1,1);
LOG_M("prach_rxF_comp0_br.m","prach_rxF_comp0_br",prachF,1024,1,1);
LOG_M("Xu_br.m","xu_br",Xu,N_ZC,1,1);
LOG_M("prach_ifft0_br.m","prach_t0_br",prach_ifft,1024,1,1);
exit(-1);
} }
else {
LOG_E(PHY,"Dumping prach (br_flag %d), k = %d (n_ra_prb %d)\n",br_flag,k,n_ra_prb);
LOG_M("rxsigF_br.m","prach_rxF_br",&rxsigF[0][0],12288,1,1);
LOG_M("prach_rxF_comp0_br.m","prach_rxF_comp0_br",prachF,1024,1,1);
LOG_M("Xu_br.m","xu_br",Xu,N_ZC,1,1);
LOG_M("prach_ifft0_br.m","prach_t0_br",prach_ifft,1024,1,1);
exit(-1);
}
} }
} /* LOG_DUMPFLAG(PRACH) */ } /* LOG_DUMPFLAG(PRACH) */
if (eNB) stop_meas(&eNB->rx_prach);
if (eNB) stop_meas(&eNB->rx_prach);
} }
#if (LTE_RRC_VERSION >= MAKE_VERSION(14, 0, 0)) #if (LTE_RRC_VERSION >= MAKE_VERSION(14, 0, 0))
void rx_prach(PHY_VARS_eNB *eNB, void rx_prach(PHY_VARS_eNB *eNB,
RU_t *ru, RU_t *ru,
uint16_t *max_preamble, uint16_t *max_preamble,
uint16_t *max_preamble_energy, uint16_t *max_preamble_energy,
uint16_t *max_preamble_delay, uint16_t *max_preamble_delay,
uint16_t Nf, uint16_t Nf,
uint8_t tdd_mapindex, uint8_t tdd_mapindex,
uint8_t br_flag) { uint8_t br_flag) {
int i; int i;
int prach_mask=0; int prach_mask=0;
if (br_flag == 0) { if (br_flag == 0) {
rx_prach0(eNB,ru,max_preamble,max_preamble_energy,max_preamble_delay,Nf,tdd_mapindex,0,0); rx_prach0(eNB,ru,max_preamble,max_preamble_energy,max_preamble_delay,Nf,tdd_mapindex,0,0);
} } else { // This is procedure for eMTC, basically handling the repetitions
else { // This is procedure for eMTC, basically handling the repetitions
prach_mask = is_prach_subframe(&eNB->frame_parms,eNB->proc.frame_prach_br,eNB->proc.subframe_prach_br); prach_mask = is_prach_subframe(&eNB->frame_parms,eNB->proc.frame_prach_br,eNB->proc.subframe_prach_br);
for (i=0;i<4;i++) {
if ((eNB->frame_parms.prach_emtc_config_common.prach_ConfigInfo.prach_CElevel_enable[i]==1) &&
((prach_mask&(1<<(i+1))) > 0)) { // check that prach CE level is active now
// if first reception in group of repetitions store frame for later (in RA-RNTI for Msg2) for (i=0; i<4; i++) {
if (eNB->prach_vars_br.repetition_number[i]==0) eNB->prach_vars_br.first_frame[i]=eNB->proc.frame_prach_br; if ((eNB->frame_parms.prach_emtc_config_common.prach_ConfigInfo.prach_CElevel_enable[i]==1) &&
((prach_mask&(1<<(i+1))) > 0)) { // check that prach CE level is active now
// increment repetition number // if first reception in group of repetitions store frame for later (in RA-RNTI for Msg2)
eNB->prach_vars_br.repetition_number[i]++; if (eNB->prach_vars_br.repetition_number[i]==0) eNB->prach_vars_br.first_frame[i]=eNB->proc.frame_prach_br;
// do basic PRACH reception // increment repetition number
rx_prach0(eNB,ru,max_preamble,max_preamble_energy,max_preamble_delay,Nf,tdd_mapindex,1,i); eNB->prach_vars_br.repetition_number[i]++;
// do basic PRACH reception
// if last repetition, clear counter rx_prach0(eNB,ru,max_preamble,max_preamble_energy,max_preamble_delay,Nf,tdd_mapindex,1,i);
if (eNB->prach_vars_br.repetition_number[i] == eNB->frame_parms.prach_emtc_config_common.prach_ConfigInfo.prach_numRepetitionPerPreambleAttempt[i]) {
eNB->prach_vars_br.repetition_number[i]=0;
} // if last repetition, clear counter
if (eNB->prach_vars_br.repetition_number[i] == eNB->frame_parms.prach_emtc_config_common.prach_ConfigInfo.prach_numRepetitionPerPreambleAttempt[i]) {
eNB->prach_vars_br.repetition_number[i]=0;
}
} }
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment