Commit dc2d948b authored by Sakthivel Velumani's avatar Sakthivel Velumani

memory bugfix and other minor fixes

parent 92595bb2
...@@ -38,11 +38,7 @@ void nr_modulation(uint32_t *in, ...@@ -38,11 +38,7 @@ void nr_modulation(uint32_t *in,
uint32_t bit_cnt; uint32_t bit_cnt;
uint64_t x,x1,x2; uint64_t x,x1,x2;
#if 0//defined(__AVX2__) #if defined(__SSE2__)
uint16_t *in_2bytes = (uint16_t*) in;
__m256i *nr_mod_table256;
__m256i *out256;
#elif defined(__SSE2__)
__m128i *nr_mod_table128; __m128i *nr_mod_table128;
__m128i *out128; __m128i *out128;
#endif #endif
...@@ -51,6 +47,44 @@ void nr_modulation(uint32_t *in, ...@@ -51,6 +47,44 @@ void nr_modulation(uint32_t *in,
switch (mod_order) { switch (mod_order) {
#if defined(__SSE2__)
case 2:
nr_mod_table128 = (__m128i*) nr_qpsk_byte_mod_table;
out128 = (__m128i*) out;
for (i=0; i<length/8; i++)
out128[i] = nr_mod_table128[in_bytes[i]];
// the bits that are left out
i = i*8/2;
nr_mod_table32 = (int32_t*) nr_qpsk_mod_table;
while (i<length/2){
idx = ((in_bytes[(i*2)/8]>>((i*2)&0x7)) & mask);
out32[i] = nr_mod_table32[idx];
i++;
}
return;
#else
case 2:
nr_mod_table32 = (int32_t*) nr_qpsk_mod_table;
for (i=0; i<length/mod_order; i++) {
idx = ((in[i*2/32]>>((i*2)&0x1f)) & mask);
out32[i] = nr_mod_table32[idx];
}
return;
#endif
case 4:
out64 = (int64_t*) out;
for (i=0; i<length/8; i++)
out64[i] = nr_16qam_byte_mod_table[in_bytes[i]];
// the bits that are left out
i = i*8/4;
while (i<length/4){
idx = ((in_bytes[(i*4)/8]>>((i*4)&0x7)) & mask);
out32[i] = nr_16qam_mod_table[idx];
i++;
}
return;
case 6: case 6:
j = 0; j = 0;
for (i=0; i<length/192; i++) { for (i=0; i<length/192; i++) {
...@@ -113,46 +147,10 @@ void nr_modulation(uint32_t *in, ...@@ -113,46 +147,10 @@ void nr_modulation(uint32_t *in,
out32[i] = nr_mod_table32[in_bytes[i]]; out32[i] = nr_mod_table32[in_bytes[i]];
return; return;
#if 0//defined(__AVX2__)
// disabling this as it is slower than SSE2
// (maybe because L1 cache could not hold the large table)
case 2:
nr_mod_table256 = (__m256i*) nr_qpsk_2byte_mod_table;
out256 = (__m256i*) out;
for (i=0; i<length/16; i++)
out256[i] = nr_mod_table256[in_2bytes[i]];
if (length%16)
out256[i+1] = nr_mod_table256[in_2bytes[i]];
return;
#elif defined(__SSE2__)
case 2:
nr_mod_table128 = (__m128i*) nr_qpsk_byte_mod_table;
out128 = (__m128i*) out;
for (i=0; i<length/8; i++)
out128[i] = nr_mod_table128[in_bytes[i]];
if (length%8)
out128[i+1] = nr_mod_table128[in_bytes[i]];
return;
#endif
case 4:
out64 = (int64_t*) out;
for (int i=0; i<length/8; i++)
out64[i] = nr_16qam_byte_mod_table[in_bytes[i]];
return;
default: default:
break; break;
} }
nr_mod_table32 = (int32_t*) nr_qpsk_mod_table;
for (int i=0; i<length/mod_order; i++)
{
idx = ((in[i*mod_order/32]>>((i*mod_order)&0x1f)) & mask);
out32[i] = nr_mod_table32[idx];
}
} }
void nr_layer_mapping(int16_t **mod_symbs, void nr_layer_mapping(int16_t **mod_symbs,
......
...@@ -21,7 +21,6 @@ ...@@ -21,7 +21,6 @@
#include "nr_refsig.h" #include "nr_refsig.h"
#include "nr_mod_table.h" #include "nr_mod_table.h"
#include <math.h>
void nr_generate_modulation_table() { void nr_generate_modulation_table() {
float sqrt2 = 0.70711; float sqrt2 = 0.70711;
...@@ -32,27 +31,14 @@ void nr_generate_modulation_table() { ...@@ -32,27 +31,14 @@ void nr_generate_modulation_table() {
uint32_t i,j; uint32_t i,j;
short* table; short* table;
nr_qpsk_mod_table[0] = 16384; // QPSK
nr_qpsk_mod_table[1] = 16384; for (i=0; i<4; i++) {
nr_qpsk_mod_table[2] = -16384; nr_qpsk_mod_table[i*2] = (short)(1-2*(i&1))*val*sqrt2*sqrt2;
nr_qpsk_mod_table[3] = 16384; nr_qpsk_mod_table[i*2+1] = (short)(1-2*((i>>1)&1))*val*sqrt2*sqrt2;
nr_qpsk_mod_table[4] = 16384; //printf("%d j%d\n",nr_qpsk_mod_table[i*2],nr_qpsk_mod_table[i*2+1]);
nr_qpsk_mod_table[5] = -16384;
nr_qpsk_mod_table[6] = -16384;
nr_qpsk_mod_table[7] = -16384;
#if 0//defined(__AVX2__)
//QPSK m256
table = (short*) nr_qpsk_2byte_mod_table;
for (i=0; i<65536; i++) {
for (j=0; j<8; j++) {
table[i*16+(j*2)] = (short)(1-2*((i>>(j*2))&1))*val*sqrt2*sqrt2;
table[i*16+(j*2)+1] = (short)(1-2*((i>>(j*2+1))&1))*val*sqrt2*sqrt2;
//printf("%d j%d\n",nr_qpsk_byte_mod_table[i*8+(j*2)],nr_qpsk_byte_mod_table[i*8+(j*2)+1]);
}
} }
#elif defined(__SSE2__)
#if defined(__SSE2__)
//QPSK m128 //QPSK m128
table = (short*) nr_qpsk_byte_mod_table; table = (short*) nr_qpsk_byte_mod_table;
for (i=0; i<256; i++) { for (i=0; i<256; i++) {
...@@ -74,6 +60,13 @@ nr_qpsk_mod_table[7] = -16384; ...@@ -74,6 +60,13 @@ nr_qpsk_mod_table[7] = -16384;
} }
} }
table = (short*) nr_16qam_mod_table;
for (i=0; i<16; i++) {
table[i*2] = (short)((1-2*(i&1))*(2-(1-2*((i>>2)&1))))*val*sqrt10*sqrt2;
table[i*2+1] = (short)((1-2*((i>>1)&1))*(2-(1-2*((i>>3)&1))))*val*sqrt10*sqrt2;
//printf("%d j%d\n",table[i*2],table[i*2+1]);
}
//64QAM //64QAM
table = (short*) nr_64qam_mod_table; table = (short*) nr_64qam_mod_table;
for (i=0; i<4096; i++) { for (i=0; i<4096; i++) {
......
...@@ -31,9 +31,8 @@ ...@@ -31,9 +31,8 @@
short nr_qpsk_mod_table[8]; short nr_qpsk_mod_table[8];
#if 0//defined(__AVX2__) int32_t nr_16qam_mod_table[16];
__m256i nr_qpsk_2byte_mod_table[65536]; #if defined(__SSE2__)
#elif defined(__SSE2__)
__m128i nr_qpsk_byte_mod_table[2048]; __m128i nr_qpsk_byte_mod_table[2048];
#endif #endif
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment