Commit d8ea9ea7 authored by Raymond Knopp's avatar Raymond Knopp

fixed an #if defined() bug in 3gpplte_turbo_decoder_sse_8bit.c, which was...

fixed an #if defined() bug in 3gpplte_turbo_decoder_sse_8bit.c, which was introduced with the ARM NEON code

git-svn-id: http://svn.eurecom.fr/openair4G/trunk@7555 818b1a75-f10b-46b9-bf7c-635c3b92a50f
parent f10e3f77
...@@ -1021,9 +1021,12 @@ unsigned char phy_threegpplte_turbo_decoder8(short *y, ...@@ -1021,9 +1021,12 @@ unsigned char phy_threegpplte_turbo_decoder8(short *y,
else if (round_avg < 64 ) else if (round_avg < 64 )
for (i=0,j=0; i<(3*(n2>>4))+1; i++,j+=2) for (i=0,j=0; i<(3*(n2>>4))+1; i++,j+=2)
((__m128i *)y8)[i] = _mm_packs_epi16(_mm_srai_epi16(((__m128i *)y)[j],2),_mm_srai_epi16(((__m128i *)y)[j+1],2)); ((__m128i *)y8)[i] = _mm_packs_epi16(_mm_srai_epi16(((__m128i *)y)[j],2),_mm_srai_epi16(((__m128i *)y)[j+1],2));
else else if (round_avg < 128)
for (i=0,j=0; i<(3*(n2>>4))+1; i++,j+=2) for (i=0,j=0; i<(3*(n2>>4))+1; i++,j+=2)
((__m128i *)y8)[i] = _mm_packs_epi16(_mm_srai_epi16(((__m128i *)y)[j],3),_mm_srai_epi16(((__m128i *)y)[j+1],3)); ((__m128i *)y8)[i] = _mm_packs_epi16(_mm_srai_epi16(((__m128i *)y)[j],3),_mm_srai_epi16(((__m128i *)y)[j+1],3));
else
for (i=0,j=0; i<(3*(n2>>4))+1; i++,j+=2)
((__m128i *)y8)[i] = _mm_packs_epi16(_mm_srai_epi16(((__m128i *)y)[j],3),_mm_srai_epi16(((__m128i *)y)[j+1],4));
yp128 = (__m128i*)y8; yp128 = (__m128i*)y8;
...@@ -1331,7 +1334,7 @@ unsigned char phy_threegpplte_turbo_decoder8(short *y, ...@@ -1331,7 +1334,7 @@ unsigned char phy_threegpplte_turbo_decoder8(short *y,
while (iteration_cnt++ < max_iterations) { while (iteration_cnt++ < max_iterations) {
#ifdef DEBUG_LOGMAP #ifdef DEBUG_LOGMAP
printf("\n*******************ITERATION %d (n %d), ext %p\n\n",iteration_cnt,n,ext); printf("\n*******************ITERATION %d (n %d, n2 %d), ext %p\n\n",iteration_cnt,n,n2,ext);
#endif //DEBUG_LOGMAP #endif //DEBUG_LOGMAP
start_meas(intl1_stats); start_meas(intl1_stats);
...@@ -1430,6 +1433,7 @@ unsigned char phy_threegpplte_turbo_decoder8(short *y, ...@@ -1430,6 +1433,7 @@ unsigned char phy_threegpplte_turbo_decoder8(short *y,
((int8x16_t *)systematic1)[i] = vqaddq_s8(vqsubq_s8(tmp,((int8x16_t*)ext)[i]),((int8x16_t *)systematic0)[i]); ((int8x16_t *)systematic1)[i] = vqaddq_s8(vqsubq_s8(tmp,((int8x16_t*)ext)[i]),((int8x16_t *)systematic0)[i]);
#endif #endif
} }
} else { } else {
for (i=0; i<(n2>>4); i++) { for (i=0; i<(n2>>4); i++) {
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
...@@ -1485,7 +1489,7 @@ unsigned char phy_threegpplte_turbo_decoder8(short *y, ...@@ -1485,7 +1489,7 @@ unsigned char phy_threegpplte_turbo_decoder8(short *y,
// re-order the decoded bits in theregular order // re-order the decoded bits in theregular order
// as it is presently ordered as 16 sequential columns // as it is presently ordered as 16 sequential columns
#if defined(__x86__64) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
__m128i* dbytes=(__m128i*)decoded_bytes_interl; __m128i* dbytes=(__m128i*)decoded_bytes_interl;
__m128i shuffle=SHUFFLE16(7,6,5,4,3,2,1,0); __m128i shuffle=SHUFFLE16(7,6,5,4,3,2,1,0);
__m128i mask __attribute__((aligned(16))); __m128i mask __attribute__((aligned(16)));
...@@ -1500,7 +1504,6 @@ unsigned char phy_threegpplte_turbo_decoder8(short *y, ...@@ -1500,7 +1504,6 @@ unsigned char phy_threegpplte_turbo_decoder8(short *y,
tmp2=_mm_and_si128(tmp,mask); tmp2=_mm_and_si128(tmp,mask);
tmp2=_mm_cmpeq_epi16(tmp2,mask); tmp2=_mm_cmpeq_epi16(tmp2,mask);
decoded_bytes[n_128*0+i]=(uint8_t) _mm_movemask_epi8(_mm_packs_epi16(tmp2,zeros)); decoded_bytes[n_128*0+i]=(uint8_t) _mm_movemask_epi8(_mm_packs_epi16(tmp2,zeros));
int j; int j;
for (j=1; j<16; j++) { for (j=1; j<16; j++) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment