Commit 8c5e8126 authored by Florian Kaltenberger's avatar Florian Kaltenberger

Added support for ARM NEON, lots of changes in openair1 and some in cmake_targets

git-svn-id: http://svn.eurecom.fr/openair4G/trunk@7543 818b1a75-f10b-46b9-bf7c-635c3b92a50f
parent 85ff3abc
...@@ -126,7 +126,7 @@ add_list_string_option(CMAKE_BUILD_TYPE "RelWithDebInfo" "Choose the type of bui ...@@ -126,7 +126,7 @@ add_list_string_option(CMAKE_BUILD_TYPE "RelWithDebInfo" "Choose the type of bui
Message("Architecture is ${CMAKE_SYSTEM_PROCESSOR}") Message("Architecture is ${CMAKE_SYSTEM_PROCESSOR}")
if (CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7l") if (CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7l")
set(C_FLAGS_PROCESSOR "-mfloat-abi=softfp -mfpu=neon") set(C_FLAGS_PROCESSOR "-gdwarf-2 -mfloat-abi=hard -mfpu=neon -lgcc -lrt")
else (CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7l") else (CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7l")
set(C_FLAGS_PROCESSOR "-msse4.2") set(C_FLAGS_PROCESSOR "-msse4.2")
endif() endif()
...@@ -140,8 +140,8 @@ set(CMAKE_C_FLAGS ...@@ -140,8 +140,8 @@ set(CMAKE_C_FLAGS
# set a flag for changes in the source code # set a flag for changes in the source code
# these changes are related to hardcoded path to include .h files # these changes are related to hardcoded path to include .h files
add_definitions(-DCMAKER) add_definitions(-DCMAKER)
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS} -ggdb -DMALLOC_CHECK_=3") set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS} -g -DMALLOC_CHECK_=3")
set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS} -ggdb -DMALLOC_CHECK_=3 -O2") set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS} -g -DMALLOC_CHECK_=3 -O2")
# Below has been put in comment because does not work with # Below has been put in comment because does not work with
# SVN authentication. # SVN authentication.
...@@ -778,7 +778,6 @@ set(PHY_SRC ...@@ -778,7 +778,6 @@ set(PHY_SRC
${OPENAIR1_DIR}/PHY/TOOLS/log2_approx.c ${OPENAIR1_DIR}/PHY/TOOLS/log2_approx.c
${OPENAIR1_DIR}/PHY/TOOLS/cmult_sv.c ${OPENAIR1_DIR}/PHY/TOOLS/cmult_sv.c
${OPENAIR1_DIR}/PHY/TOOLS/cmult_vv.c ${OPENAIR1_DIR}/PHY/TOOLS/cmult_vv.c
${OPENAIR1_DIR}/PHY/TOOLS/cadd_vv.c
${OPENAIR1_DIR}/PHY/TOOLS/cdot_prod.c ${OPENAIR1_DIR}/PHY/TOOLS/cdot_prod.c
${OPENAIR1_DIR}/PHY/TOOLS/signal_energy.c ${OPENAIR1_DIR}/PHY/TOOLS/signal_energy.c
${OPENAIR1_DIR}/PHY/TOOLS/dB_routines.c ${OPENAIR1_DIR}/PHY/TOOLS/dB_routines.c
...@@ -1692,7 +1691,7 @@ foreach(myExe dlsim ulsim pbchsim scansim mbmssim pdcchsim pucchsim prachsim syn ...@@ -1692,7 +1691,7 @@ foreach(myExe dlsim ulsim pbchsim scansim mbmssim pdcchsim pucchsim prachsim syn
${XFORMS_SOURCE} ${XFORMS_SOURCE}
) )
target_link_libraries (${myExe} target_link_libraries (${myExe}
-Wl,--start-group SIMU UTIL SCHED_LIB PHY LFDS MSC ${ITTI_LIB} -Wl,--end-group -Wl,--start-group SIMU UTIL SCHED_LIB PHY LFDS ${ITTI_LIB} -Wl,--end-group
pthread m rt ${CONFIG_LIBRARIES} ${ATLAS_LIBRARIES} ${XFORMS_LIBRARIES} pthread m rt ${CONFIG_LIBRARIES} ${ATLAS_LIBRARIES} ${XFORMS_LIBRARIES}
) )
endforeach(myExe) endforeach(myExe)
......
...@@ -10,5 +10,6 @@ set(RANDOM_BF False) ...@@ -10,5 +10,6 @@ set(RANDOM_BF False)
set(PBS_SIM False) set(PBS_SIM False)
set(PERFECT_CE False) set(PERFECT_CE False)
set(NAS_UE False) set(NAS_UE False)
set(MESSAGE_CHART_GENERATOR False)
include(${CMAKE_CURRENT_SOURCE_DIR}/../CMakeLists.txt) include(${CMAKE_CURRENT_SOURCE_DIR}/../CMakeLists.txt)
...@@ -31,8 +31,9 @@ ...@@ -31,8 +31,9 @@
author: raymond.knopp@eurecom.fr author: raymond.knopp@eurecom.fr
date: 10.2009 date: 10.2009
*/ */
#include "defs.h" #ifndef TC_MAIN
//#include "lte_interleaver_inline.h" //#include "defs.h"
#endif
#include "extern_3GPPinterleaver.h" #include "extern_3GPPinterleaver.h"
......
This diff is collapsed.
TURBO_SRC = 3gpplte.c 3gpplte_turbo_decoder_sse.c crc_byte.c TURBO_SRC = 3gpplte_sse.c 3gpplte_turbo_decoder_sse.c crc_byte.c
RATE13CC_SRC = ccoding_byte_lte.c viterbi_lte.c crc_byte.c RATE13CC_SRC = ccoding_byte_lte.c viterbi_lte.c crc_byte.c
RATE12CC_SRC = ccoding_byte.c viterbi.c crc_byte.c RATE12CC_SRC = ccoding_byte.c viterbi.c crc_byte.c
all: turbolte_test rate13cc_test rate12cc_test run_turbo run_rate13cc run_rate13ccdab run_rate12cc all: 3gpplte_sse
turbolte_test: $(TURBO_SRC) 3gpplte_sse: $(TURBO_SRC)
gcc -o turbo_test $(TURBO_SRC) -DTEST_DEBUG -DUSER_MODE -msse2 -mssse3 -Wall gcc -o 3gpplte_sse 3gpplte_sse.c -msse4 -Wall -g -ggdb -DMAIN
rate13cc_test: $(RATE13CC_SRC)
gcc -o rate13cc_test $(RATE13CC_SRC) -DTEST_DEBUG -DUSER_MODE -msse2 -mssse3 -Wall
rate12cc_test: $(RATE12CC_SRC)
gcc -o rate12cc_test $(RATE12CC_SRC) -DTEST_DEBUG -DUSER_MODE -msse2 -mssse3 -Wall
run_turbo: turbolte_test
./turbo_test
run_rate13cc: rate13cc_test
./rate13cc_test
run_rate13ccdab: rate13cc_test
./rate13cc_test -d
run_rate12cc: rate12cc_test
./rate12cc_test
clean: clean:
rm *.o rm *.o
...@@ -55,22 +55,22 @@ unsigned char ccodelte_table_rev[128]; // for receiver ...@@ -55,22 +55,22 @@ unsigned char ccodelte_table_rev[128]; // for receiver
void void
ccodelte_encode (unsigned int numbits, ccodelte_encode (int32_t numbits,
unsigned char add_crc, uint8_t add_crc,
unsigned char *inPtr, uint8_t *inPtr,
unsigned char *outPtr, uint8_t *outPtr,
unsigned short rnti) uint16_t rnti)
{ {
unsigned int state; uint32_t state;
unsigned char c, out, first_bit; uint8_t c, out, first_bit;
char shiftbit=0; int8_t shiftbit=0;
unsigned short c16; uint16_t c16;
unsigned short next_last_byte=0; uint16_t next_last_byte=0;
unsigned int crc=0; uint32_t crc=0;
#ifdef DEBUG_CCODE #ifdef DEBUG_CCODE
unsigned int dummy=0; uint32_t dummy=0;
#endif //DEBUG_CCODE #endif //DEBUG_CCODE
/* The input bit is shifted in position 8 of the state. /* The input bit is shifted in position 8 of the state.
...@@ -80,20 +80,19 @@ ccodelte_encode (unsigned int numbits, ...@@ -80,20 +80,19 @@ ccodelte_encode (unsigned int numbits,
if (add_crc == 1) { if (add_crc == 1) {
crc = crc8(inPtr,numbits); crc = crc8(inPtr,numbits);
first_bit = 2; first_bit = 2;
c = (unsigned char)(crc>>24); c = (uint8_t)(crc>>24);
} else if (add_crc == 2) { } else if (add_crc == 2) {
crc = crc16(inPtr,numbits); crc = crc16(inPtr,numbits);
#ifdef DEBUG_CCODE #ifdef DEBUG_CCODE
printf("ccode_lte : crc %x\n",crc); printf("ccode_lte : crc %x\n",crc);
#endif #endif
// scramble with RNTI // scramble with RNTI
crc ^= (((unsigned int)rnti)<<16); crc ^= (((uint32_t)rnti)<<16);
#ifdef DEBUG_CCODE #ifdef DEBUG_CCODE
printf("ccode_lte : crc %x (rnti %x)\n",crc,rnti); printf("ccode_lte : crc %x (rnti %x)\n",crc,rnti);
#endif #endif
first_bit = 2; first_bit = 2;
// c = (unsigned char)(crc>>24); c = (uint8_t)((crc>>16)&0xff);
c = (unsigned char)((crc>>16)&0xff);
} else { } else {
next_last_byte = numbits>>3; next_last_byte = numbits>>3;
first_bit = (numbits-6)&7; first_bit = (numbits-6)&7;
...@@ -182,7 +181,7 @@ ccodelte_encode (unsigned int numbits, ...@@ -182,7 +181,7 @@ ccodelte_encode (unsigned int numbits,
// now code 8-bit CRC for UCI // now code 8-bit CRC for UCI
if (add_crc == 1) { if (add_crc == 1) {
c = (unsigned char)(crc>>24); c = (uint8_t)(crc>>24);
// for (shiftbit = 0; (shiftbit<8);shiftbit++) { // for (shiftbit = 0; (shiftbit<8);shiftbit++) {
for (shiftbit = 7; (shiftbit>=0); shiftbit--) { for (shiftbit = 7; (shiftbit>=0); shiftbit--) {
...@@ -209,7 +208,7 @@ ccodelte_encode (unsigned int numbits, ...@@ -209,7 +208,7 @@ ccodelte_encode (unsigned int numbits,
// now code 16-bit CRC for DCI // now code 16-bit CRC for DCI
if (add_crc == 2) { if (add_crc == 2) {
c16 = (unsigned short)(crc>>16); c16 = (uint16_t)(crc>>16);
// for (shiftbit = 0; (shiftbit<16);shiftbit++) { // for (shiftbit = 0; (shiftbit<16);shiftbit++) {
for (shiftbit = 15; (shiftbit>=0); shiftbit--) { for (shiftbit = 15; (shiftbit>=0); shiftbit--) {
......
...@@ -320,7 +320,7 @@ void threegpplte_turbo_encoder(uint8_t *input, ...@@ -320,7 +320,7 @@ void threegpplte_turbo_encoder(uint8_t *input,
uint16_t interleaver_f2); uint16_t interleaver_f2);
/** \fn void ccodelte_encode(uint32_t numbits,uint8_t add_crc, uint8_t *inPtr,uint8_t *outPtr,uint16_t rnti) /** \fn void ccodelte_encode(int32_t numbits,uint8_t add_crc, uint8_t *inPtr,uint8_t *outPtr,uint16_t rnti)
\brief This function implements the LTE convolutional code of rate 1/3 \brief This function implements the LTE convolutional code of rate 1/3
with a constraint length of 7 bits. The inputs are bit packed in octets with a constraint length of 7 bits. The inputs are bit packed in octets
(from MSB to LSB). Trellis tail-biting is included here. (from MSB to LSB). Trellis tail-biting is included here.
...@@ -331,7 +331,7 @@ void threegpplte_turbo_encoder(uint8_t *input, ...@@ -331,7 +331,7 @@ void threegpplte_turbo_encoder(uint8_t *input,
@param rnti RNTI for CRC scrambling @param rnti RNTI for CRC scrambling
*/ */
void void
ccodelte_encode (uint32_t numbits, ccodelte_encode (int32_t numbits,
uint8_t add_crc, uint8_t add_crc,
uint8_t *inPtr, uint8_t *inPtr,
uint8_t *outPtr, uint8_t *outPtr,
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -299,11 +299,17 @@ void freq_equalization(LTE_DL_FRAME_PARMS *frame_parms, ...@@ -299,11 +299,17 @@ void freq_equalization(LTE_DL_FRAME_PARMS *frame_parms,
{ {
uint16_t re; uint16_t re;
int16_t amp; int16_t amp;
#if defined(__x86_64__) || defined(__i386__)
__m128i *ul_ch_mag128,*ul_ch_magb128,*rxdataF_comp128; __m128i *ul_ch_mag128,*ul_ch_magb128,*rxdataF_comp128;
rxdataF_comp128 = (__m128i *)&rxdataF_comp[0][symbol*frame_parms->N_RB_DL*12]; rxdataF_comp128 = (__m128i *)&rxdataF_comp[0][symbol*frame_parms->N_RB_DL*12];
ul_ch_mag128 = (__m128i *)&ul_ch_mag[0][symbol*frame_parms->N_RB_DL*12]; ul_ch_mag128 = (__m128i *)&ul_ch_mag[0][symbol*frame_parms->N_RB_DL*12];
ul_ch_magb128 = (__m128i *)&ul_ch_magb[0][symbol*frame_parms->N_RB_DL*12]; ul_ch_magb128 = (__m128i *)&ul_ch_magb[0][symbol*frame_parms->N_RB_DL*12];
#elif defined(__arm__)
int16x8_t *ul_ch_mag128,*ul_ch_magb128,*rxdataF_comp128;
rxdataF_comp128 = (int16x8_t*)&rxdataF_comp[0][symbol*frame_parms->N_RB_DL*12];
ul_ch_mag128 = (int16x8_t*)&ul_ch_mag[0][symbol*frame_parms->N_RB_DL*12];
ul_ch_magb128 = (int16x8_t*)&ul_ch_magb[0][symbol*frame_parms->N_RB_DL*12];
#endif
for (re=0; re<(Msc_RS>>2); re++) { for (re=0; re<(Msc_RS>>2); re++) {
...@@ -313,15 +319,25 @@ void freq_equalization(LTE_DL_FRAME_PARMS *frame_parms, ...@@ -313,15 +319,25 @@ void freq_equalization(LTE_DL_FRAME_PARMS *frame_parms,
amp=255; amp=255;
// printf("freq_eq: symbol %d re %d => %d,%d,%d, (%d) (%d,%d) => ",symbol,re,*((int16_t*)(&ul_ch_mag128[re])),amp,inv_ch[8*amp],*((int16_t*)(&ul_ch_mag128[re]))*inv_ch[8*amp],*(int16_t*)&(rxdataF_comp128[re]),*(1+(int16_t*)&(rxdataF_comp128[re]))); // printf("freq_eq: symbol %d re %d => %d,%d,%d, (%d) (%d,%d) => ",symbol,re,*((int16_t*)(&ul_ch_mag128[re])),amp,inv_ch[8*amp],*((int16_t*)(&ul_ch_mag128[re]))*inv_ch[8*amp],*(int16_t*)&(rxdataF_comp128[re]),*(1+(int16_t*)&(rxdataF_comp128[re])));
#if defined(__x86_64__) || defined(__i386__)
rxdataF_comp128[re] = _mm_mullo_epi16(rxdataF_comp128[re],*((__m128i *)&inv_ch[8*amp])); rxdataF_comp128[re] = _mm_mullo_epi16(rxdataF_comp128[re],*((__m128i *)&inv_ch[8*amp]));
if (Qm==4) if (Qm==4)
ul_ch_mag128[re] = _mm_set1_epi16(324); // this is 512*2/sqrt(10) ul_ch_mag128[re] = _mm_set1_epi16(324); // this is 512*2/sqrt(10)
else { else {
ul_ch_mag128[re] = _mm_set1_epi16(316); // this is 512*4/sqrt(42) ul_ch_mag128[re] = _mm_set1_epi16(316); // this is 512*4/sqrt(42)
ul_ch_magb128[re] = _mm_set1_epi16(158); // this is 512*2/sqrt(42) ul_ch_magb128[re] = _mm_set1_epi16(158); // this is 512*2/sqrt(42)
} }
#elif defined(__arm__)
rxdataF_comp128[re] = vmulq_s16(rxdataF_comp128[re],*((int16x8_t *)&inv_ch[8*amp]));
if (Qm==4)
ul_ch_mag128[re] = vdupq_n_s16(324); // this is 512*2/sqrt(10)
else {
ul_ch_mag128[re] = vdupq_n_s16(316); // this is 512*4/sqrt(42)
ul_ch_magb128[re] = vdupq_n_s16(158); // this is 512*2/sqrt(42)
}
#endif
// printf("(%d,%d)\n",*(int16_t*)&(rxdataF_comp128[re]),*(1+(int16_t*)&(rxdataF_comp128[re]))); // printf("(%d,%d)\n",*(int16_t*)&(rxdataF_comp128[re]),*(1+(int16_t*)&(rxdataF_comp128[re])));
} }
......
...@@ -35,7 +35,11 @@ ...@@ -35,7 +35,11 @@
#include "PHY/defs.h" #include "PHY/defs.h"
//#define DEBUG_PHY //#define DEBUG_PHY
#if defined(__x86_64__) || defined(__i386__)
__m128i avg128F; __m128i avg128F;
#elif defined(__arm__)
int32x4_t avg128F;
#endif
//compute average channel_level on each (TX,RX) antenna pair //compute average channel_level on each (TX,RX) antenna pair
int dl_channel_level(int16_t *dl_ch, int dl_channel_level(int16_t *dl_ch,
...@@ -43,10 +47,15 @@ int dl_channel_level(int16_t *dl_ch, ...@@ -43,10 +47,15 @@ int dl_channel_level(int16_t *dl_ch,
{ {
int16_t rb; int16_t rb;
#if defined(__x86_64__) || defined(__i386__)
__m128i *dl_ch128; __m128i *dl_ch128;
#elif defined(__arm__)
int16x4_t *dl_ch128;
#endif
int avg; int avg;
//clear average level //clear average level
#if defined(__x86_64__) || defined(__i386__)
avg128F = _mm_setzero_si128(); avg128F = _mm_setzero_si128();
dl_ch128=(__m128i *)dl_ch; dl_ch128=(__m128i *)dl_ch;
...@@ -59,7 +68,25 @@ int dl_channel_level(int16_t *dl_ch, ...@@ -59,7 +68,25 @@ int dl_channel_level(int16_t *dl_ch,
dl_ch128+=3; dl_ch128+=3;
} }
#elif defined(__arm__)
avg128F = vdupq_n_s32(0);
dl_ch128=(int16x4_t *)dl_ch;
for (rb=0; rb<frame_parms->N_RB_DL; rb++) {
avg128F = vqaddq_s32(avg128F,vmull_s16(dl_ch128[0],dl_ch128[0]));
avg128F = vqaddq_s32(avg128F,vmull_s16(dl_ch128[1],dl_ch128[1]));
avg128F = vqaddq_s32(avg128F,vmull_s16(dl_ch128[2],dl_ch128[2]));
avg128F = vqaddq_s32(avg128F,vmull_s16(dl_ch128[3],dl_ch128[3]));
avg128F = vqaddq_s32(avg128F,vmull_s16(dl_ch128[4],dl_ch128[4]));
avg128F = vqaddq_s32(avg128F,vmull_s16(dl_ch128[5],dl_ch128[5]));
dl_ch128+=6;
}
#endif
DevAssert( frame_parms->N_RB_DL ); DevAssert( frame_parms->N_RB_DL );
avg = (((int*)&avg128F)[0] + avg = (((int*)&avg128F)[0] +
((int*)&avg128F)[1] + ((int*)&avg128F)[1] +
...@@ -67,10 +94,10 @@ int dl_channel_level(int16_t *dl_ch, ...@@ -67,10 +94,10 @@ int dl_channel_level(int16_t *dl_ch,
((int*)&avg128F)[3])/(frame_parms->N_RB_DL*12); ((int*)&avg128F)[3])/(frame_parms->N_RB_DL*12);
#if defined(__x86_64__) || defined(__i386__)
_mm_empty(); _mm_empty();
_m_empty(); _m_empty();
#endif
return(avg); return(avg);
} }
......
...@@ -46,14 +46,15 @@ ...@@ -46,14 +46,15 @@
//#include "defs.h" //#include "defs.h"
#include "PHY/defs.h" #include "PHY/defs.h"
#include "PHY/extern.h" #include "PHY/extern.h"
#include "pss6144.h"
#if defined(__x86_64__) || defined(__i386__)
#include "pss6144.h"
extern void print_shorts(char*,__m128i*); extern void print_shorts(char*,__m128i*);
#endif
void lte_sync_timefreq(PHY_VARS_UE *ue,int band,unsigned int DL_freq) void lte_sync_timefreq(PHY_VARS_UE *ue,int band,unsigned int DL_freq)
{ {
#if defined(__x86_64__) || defined(__i386__)
UE_SCAN_INFO_t *scan_info = &ue->scan_info[band]; UE_SCAN_INFO_t *scan_info = &ue->scan_info[band];
int16_t spectrum[12288] __attribute__((aligned(16))); int16_t spectrum[12288] __attribute__((aligned(16)));
int16_t spectrum_p5ms[12288] __attribute__((aligned(16))); int16_t spectrum_p5ms[12288] __attribute__((aligned(16)));
...@@ -358,5 +359,6 @@ void lte_sync_timefreq(PHY_VARS_UE *ue,int band,unsigned int DL_freq) ...@@ -358,5 +359,6 @@ void lte_sync_timefreq(PHY_VARS_UE *ue,int band,unsigned int DL_freq)
for (band_idx=0; band_idx<10; band_idx++) for (band_idx=0; band_idx<10; band_idx++)
printf("pss 2: level %d dB, freq %u\n", dB_fixed(scan_info->amp[2][band_idx]),scan_info->freq_offset_Hz[2][band_idx]); printf("pss 2: level %d dB, freq %u\n", dB_fixed(scan_info->amp[2][band_idx]),scan_info->freq_offset_Hz[2][band_idx]);
#endif
} }
...@@ -42,34 +42,26 @@ ...@@ -42,34 +42,26 @@
//#define DEBUG_MEAS //#define DEBUG_MEAS
#ifdef USER_MODE #ifdef USER_MODE
void print_shorts(char *s,__m128i *x) void print_shorts(char *s,short *x)
{ {
short *tempb = (short *)x;
printf("%s : %d,%d,%d,%d,%d,%d,%d,%d\n",s, printf("%s : %d,%d,%d,%d,%d,%d,%d,%d\n",s,
tempb[0],tempb[1],tempb[2],tempb[3],tempb[4],tempb[5],tempb[6],tempb[7] x[0],x[1],x[2],x[3],x[4],x[5],x[6],x[7]
); );
} }
void print_ints(char *s,__m128i *x) void print_ints(char *s,int *x)
{ {
int *tempb = (int *)x;
printf("%s : %d,%d,%d,%d\n",s, printf("%s : %d,%d,%d,%d\n",s,
tempb[0],tempb[1],tempb[2],tempb[3] x[0],x[1],x[2],x[3]
); );
} }
#endif #endif
__m128i pmi128_re __attribute__ ((aligned(16)));
__m128i pmi128_im __attribute__ ((aligned(16)));
__m128i mmtmpPMI0 __attribute__ ((aligned(16)));
__m128i mmtmpPMI1 __attribute__ ((aligned(16)));
__m128i mmtmpPMI2 __attribute__ ((aligned(16)));
__m128i mmtmpPMI3 __attribute__ ((aligned(16)));
int16_t get_PL(uint8_t Mod_id,uint8_t CC_id,uint8_t eNB_index) int16_t get_PL(uint8_t Mod_id,uint8_t CC_id,uint8_t eNB_index)
{ {
...@@ -421,7 +413,11 @@ void lte_ue_measurements(PHY_VARS_UE *phy_vars_ue, ...@@ -421,7 +413,11 @@ void lte_ue_measurements(PHY_VARS_UE *phy_vars_ue,
//int rx_power[NUMBER_OF_CONNECTED_eNB_MAX]; //int rx_power[NUMBER_OF_CONNECTED_eNB_MAX];
int i; int i;
unsigned int limit,subband; unsigned int limit,subband;
#if defined(__x86_64__) || defined(__i386__)
__m128i *dl_ch0_128,*dl_ch1_128; __m128i *dl_ch0_128,*dl_ch1_128;
#elif defined(__arm__)
int16x8_t *dl_ch0_128, *dl_ch1_128;
#endif
int *dl_ch0,*dl_ch1; int *dl_ch0,*dl_ch1;
LTE_DL_FRAME_PARMS *frame_parms = &phy_vars_ue->lte_frame_parms; LTE_DL_FRAME_PARMS *frame_parms = &phy_vars_ue->lte_frame_parms;
int nb_subbands,subband_size,last_subband_size; int nb_subbands,subband_size,last_subband_size;
...@@ -605,26 +601,30 @@ void lte_ue_measurements(PHY_VARS_UE *phy_vars_ue, ...@@ -605,26 +601,30 @@ void lte_ue_measurements(PHY_VARS_UE *phy_vars_ue,
for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) { for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
// skip the first 4 RE due to interpolation filter length of 5 (not possible to skip 5 due to 128i alignment, must be multiple of 128bit) // skip the first 4 RE due to interpolation filter length of 5 (not possible to skip 5 due to 128i alignment, must be multiple of 128bit)
#if defined(__x86_64__) || defined(__i386__)
__m128i pmi128_re,pmi128_im,mmtmpPMI0,mmtmpPMI1,mmtmpPMI2,mmtmpPMI3;
dl_ch0_128 = (__m128i *)&phy_vars_ue->lte_ue_common_vars.dl_ch_estimates[eNB_id][aarx][4]; dl_ch0_128 = (__m128i *)&phy_vars_ue->lte_ue_common_vars.dl_ch_estimates[eNB_id][aarx][4];
dl_ch1_128 = (__m128i *)&phy_vars_ue->lte_ue_common_vars.dl_ch_estimates[eNB_id][2+aarx][4]; dl_ch1_128 = (__m128i *)&phy_vars_ue->lte_ue_common_vars.dl_ch_estimates[eNB_id][2+aarx][4];
#elif defined(__arm__)
int32x4_t pmi128_re,pmi128_im,mmtmpPMI0,mmtmpPMI1,mmtmpPMI0b,mmtmpPMI1b;
/* dl_ch0_128 = (int16x8_t *)&phy_vars_ue->lte_ue_common_vars.dl_ch_estimates[eNB_id][aarx][4];
#ifdef DEBUG_PHY dl_ch1_128 = (int16x8_t *)&phy_vars_ue->lte_ue_common_vars.dl_ch_estimates[eNB_id][2+aarx][4];
if(eNB_id==0){
print_shorts("Ch0",dl_ch0_128); #endif
print_shorts("Ch1",dl_ch1_128);
printf("eNB_ID = %d\n",eNB_id);
}
#endif
*/
for (subband=0; subband<nb_subbands; subband++) { for (subband=0; subband<nb_subbands; subband++) {
// pmi // pmi
#if defined(__x86_64__) || defined(__i386__)
pmi128_re = _mm_setzero_si128(); pmi128_re = _mm_setzero_si128();
pmi128_im = _mm_setzero_si128(); pmi128_im = _mm_setzero_si128();
#elif defined(__arm__)
pmi128_re = vdupq_n_s32(0);
pmi128_im = vdupq_n_s32(0);
#endif
// limit is the number of groups of 4 REs in a subband (12 = 4 RBs, 3 = 1 RB) // limit is the number of groups of 4 REs in a subband (12 = 4 RBs, 3 = 1 RB)
// for 5 MHz channelization, there are 7 subbands, 6 of size 4 RBs and 1 of size 1 RB // for 5 MHz channelization, there are 7 subbands, 6 of size 4 RBs and 1 of size 1 RB
if ((N_RB_DL==6) || (subband<(nb_subbands-1))) if ((N_RB_DL==6) || (subband<(nb_subbands-1)))
...@@ -636,52 +636,33 @@ void lte_ue_measurements(PHY_VARS_UE *phy_vars_ue, ...@@ -636,52 +636,33 @@ void lte_ue_measurements(PHY_VARS_UE *phy_vars_ue,
// For each RE in subband perform ch0 * conj(ch1) // For each RE in subband perform ch0 * conj(ch1)
// multiply by conjugated channel // multiply by conjugated channel
// if(eNB_id==0){ #if defined(__x86_64__) || defined(__i386__)
//print_shorts("ch0",dl_ch0_128);
//print_shorts("ch1",dl_ch1_128);
// }
// if(i==0){
mmtmpPMI0 = _mm_setzero_si128();
mmtmpPMI1 = _mm_setzero_si128();
// }
// if(eNB_id==0)
// print_ints("Pre_re",&mmtmpPMI0);
mmtmpPMI0 = _mm_madd_epi16(dl_ch0_128[0],dl_ch1_128[0]);
// if(eNB_id==0)
// print_ints("re",&mmtmpPMI0);
// mmtmpPMI0 contains real part of 4 consecutive outputs (32-bit)
// print_shorts("Ch1",dl_ch1_128);
mmtmpPMI1 = _mm_shufflelo_epi16(dl_ch1_128[0],_MM_SHUFFLE(2,3,0,1));//_MM_SHUFFLE(2,3,0,1) mmtmpPMI1 = _mm_shufflelo_epi16(dl_ch1_128[0],_MM_SHUFFLE(2,3,0,1));//_MM_SHUFFLE(2,3,0,1)
// print_shorts("mmtmpPMI1:",&mmtmpPMI1);
mmtmpPMI1 = _mm_shufflehi_epi16(mmtmpPMI1,_MM_SHUFFLE(2,3,0,1)); mmtmpPMI1 = _mm_shufflehi_epi16(mmtmpPMI1,_MM_SHUFFLE(2,3,0,1));
// print_shorts("mmtmpPMI1:",&mmtmpPMI1);
mmtmpPMI1 = _mm_sign_epi16(mmtmpPMI1,*(__m128i*)&conjugate[0]); mmtmpPMI1 = _mm_sign_epi16(mmtmpPMI1,*(__m128i*)&conjugate[0]);
// print_shorts("mmtmpPMI1:",&mmtmpPMI1);
mmtmpPMI1 = _mm_madd_epi16(mmtmpPMI1,dl_ch0_128[0]); mmtmpPMI1 = _mm_madd_epi16(mmtmpPMI1,dl_ch0_128[0]);
// if(eNB_id==0)
// print_ints("im",&mmtmpPMI1);
// mmtmpPMI1 contains imag part of 4 consecutive outputs (32-bit) // mmtmpPMI1 contains imag part of 4 consecutive outputs (32-bit)
pmi128_re = _mm_add_epi32(pmi128_re,mmtmpPMI0); pmi128_re = _mm_add_epi32(pmi128_re,mmtmpPMI0);
pmi128_im = _mm_add_epi32(pmi128_im,mmtmpPMI1); pmi128_im = _mm_add_epi32(pmi128_im,mmtmpPMI1);
#elif defined(__arm__)
mmtmpPMI0 = vmull_s16(((int16x4_t*)dl_ch0_128)[0], ((int16x4_t*)dl_ch1_128)[0]);
mmtmpPMI1 = vmull_s16(((int16x4_t*)dl_ch0_128)[1], ((int16x4_t*)dl_ch1_128)[1]);
pmi128_re = vqaddq_s32(pmi128_re,vcombine_s32(vpadd_s32(vget_low_s32(mmtmpPMI0),vget_high_s32(mmtmpPMI0)),vpadd_s32(vget_low_s32(mmtmpPMI1),vget_high_s32(mmtmpPMI1))));
mmtmpPMI0b = vmull_s16(vrev32_s16(vmul_s16(((int16x4_t*)dl_ch0_128)[0],*(int16x4_t*)conjugate)), ((int16x4_t*)dl_ch1_128)[0]);
mmtmpPMI1b = vmull_s16(vrev32_s16(vmul_s16(((int16x4_t*)dl_ch0_128)[1],*(int16x4_t*)conjugate)), ((int16x4_t*)dl_ch1_128)[1]);
pmi128_im = vqaddq_s32(pmi128_im,vcombine_s32(vpadd_s32(vget_low_s32(mmtmpPMI0b),vget_high_s32(mmtmpPMI0b)),vpadd_s32(vget_low_s32(mmtmpPMI1b),vget_high_s32(mmtmpPMI1b))));
#endif
dl_ch0_128++; dl_ch0_128++;
dl_ch1_128++; dl_ch1_128++;
} }
phy_vars_ue->PHY_measurements.subband_pmi_re[eNB_id][subband][aarx] = (((int *)&pmi128_re)[0] + ((int *)&pmi128_re)[1] + ((int *)&pmi128_re)[2] + ((int *)&pmi128_re)[3])>>2; phy_vars_ue->PHY_measurements.subband_pmi_re[eNB_id][subband][aarx] = (((int *)&pmi128_re)[0] + ((int *)&pmi128_re)[1] + ((int *)&pmi128_re)[2] + ((int *)&pmi128_re)[3])>>2;
// if(eNB_id==0)
// printf("in lte_ue_measurements.c: pmi_re %d\n",phy_vars_ue->PHY_measurements.subband_pmi_re[eNB_id][subband][aarx]);
phy_vars_ue->PHY_measurements.subband_pmi_im[eNB_id][subband][aarx] = (((int *)&pmi128_im)[0] + ((int *)&pmi128_im)[1] + ((int *)&pmi128_im)[2] + ((int *)&pmi128_im)[3])>>2; phy_vars_ue->PHY_measurements.subband_pmi_im[eNB_id][subband][aarx] = (((int *)&pmi128_im)[0] + ((int *)&pmi128_im)[1] + ((int *)&pmi128_im)[2] + ((int *)&pmi128_im)[3])>>2;
// if(eNB_id==0)
// printf("in lte_ue_measurements.c: pmi_im %d\n",phy_vars_ue->PHY_measurements.subband_pmi_im[eNB_id][subband][aarx]);
phy_vars_ue->PHY_measurements.wideband_pmi_re[eNB_id][aarx] += phy_vars_ue->PHY_measurements.subband_pmi_re[eNB_id][subband][aarx]; phy_vars_ue->PHY_measurements.wideband_pmi_re[eNB_id][aarx] += phy_vars_ue->PHY_measurements.subband_pmi_re[eNB_id][subband][aarx];
phy_vars_ue->PHY_measurements.wideband_pmi_im[eNB_id][aarx] += phy_vars_ue->PHY_measurements.subband_pmi_im[eNB_id][subband][aarx]; phy_vars_ue->PHY_measurements.wideband_pmi_im[eNB_id][aarx] += phy_vars_ue->PHY_measurements.subband_pmi_im[eNB_id][subband][aarx];
// msg("subband_pmi[%d][%d][%d] => (%d,%d)\n",eNB_id,subband,aarx,phy_vars_ue->PHY_measurements.subband_pmi_re[eNB_id][subband][aarx],phy_vars_ue->PHY_measurements.subband_pmi_im[eNB_id][subband][aarx]);
} // subband loop } // subband loop
} // rx antenna loop } // rx antenna loop
} // if frame_parms->mode1_flag == 0 } // if frame_parms->mode1_flag == 0
...@@ -742,9 +723,10 @@ void lte_ue_measurements(PHY_VARS_UE *phy_vars_ue, ...@@ -742,9 +723,10 @@ void lte_ue_measurements(PHY_VARS_UE *phy_vars_ue,
// printf("in lte_ue_measurements: selected rx_antenna[eNB_id==0]:%u\n", phy_vars_ue->PHY_measurements.selected_rx_antennas[eNB_id][i]); // printf("in lte_ue_measurements: selected rx_antenna[eNB_id==0]:%u\n", phy_vars_ue->PHY_measurements.selected_rx_antennas[eNB_id][i]);
} // eNB_id loop } // eNB_id loop
#if defined(__x86_64__) || defined(__i386__)
_mm_empty(); _mm_empty();
_m_empty(); _m_empty();
#endif
} }
......
...@@ -106,9 +106,13 @@ int32_t lte_ul_channel_estimation(PHY_VARS_eNB *phy_vars_eNB, ...@@ -106,9 +106,13 @@ int32_t lte_ul_channel_estimation(PHY_VARS_eNB *phy_vars_eNB,
*temp_out_fft_1_ptr = (int32_t*)0,*out_fft_ptr_1 = (int32_t*)0, *temp_out_fft_1_ptr = (int32_t*)0,*out_fft_ptr_1 = (int32_t*)0,
*temp_in_ifft_ptr = (int32_t*)0; *temp_in_ifft_ptr = (int32_t*)0;
#if defined(__x86_64__) || defined(__i386__)
__m128i *rxdataF128,*ul_ref128,*ul_ch128; __m128i *rxdataF128,*ul_ref128,*ul_ch128;
__m128i mmtmpU0,mmtmpU1,mmtmpU2,mmtmpU3; __m128i mmtmpU0,mmtmpU1,mmtmpU2,mmtmpU3;
#elif defined(__arm__)
int16x8_t *rxdataF128,*ul_ref128,*ul_ch128;
int32x4_t mmtmp0,mmtmp1,mmtmp_re,mmtmp_im;
#endif
Msc_RS = N_rb_alloc*12; Msc_RS = N_rb_alloc*12;
cyclic_shift = (frame_parms->pusch_config_common.ul_ReferenceSignalsPUSCH.cyclicShift + cyclic_shift = (frame_parms->pusch_config_common.ul_ReferenceSignalsPUSCH.cyclicShift +
...@@ -156,11 +160,18 @@ int32_t lte_ul_channel_estimation(PHY_VARS_eNB *phy_vars_eNB, ...@@ -156,11 +160,18 @@ int32_t lte_ul_channel_estimation(PHY_VARS_eNB *phy_vars_eNB,
for (aa=0; aa<nb_antennas_rx; aa++) { for (aa=0; aa<nb_antennas_rx; aa++) {
// msg("Componentwise prod aa %d, symbol_offset %d,ul_ch_estimates %p,ul_ch_estimates[aa] %p,ul_ref_sigs_rx[0][0][Msc_RS_idx] %p\n",aa,symbol_offset,ul_ch_estimates,ul_ch_estimates[aa],ul_ref_sigs_rx[0][0][Msc_RS_idx]); // msg("Componentwise prod aa %d, symbol_offset %d,ul_ch_estimates %p,ul_ch_estimates[aa] %p,ul_ref_sigs_rx[0][0][Msc_RS_idx] %p\n",aa,symbol_offset,ul_ch_estimates,ul_ch_estimates[aa],ul_ref_sigs_rx[0][0][Msc_RS_idx]);
#if defined(__x86_64__) || defined(__i386__)
rxdataF128 = (__m128i *)&rxdataF_ext[aa][symbol_offset]; rxdataF128 = (__m128i *)&rxdataF_ext[aa][symbol_offset];
ul_ch128 = (__m128i *)&ul_ch_estimates[aa][symbol_offset]; ul_ch128 = (__m128i *)&ul_ch_estimates[aa][symbol_offset];
ul_ref128 = (__m128i *)ul_ref_sigs_rx[u][v][Msc_RS_idx]; ul_ref128 = (__m128i *)ul_ref_sigs_rx[u][v][Msc_RS_idx];
#elif defined(__arm__)
rxdataF128 = (int16x8_t *)&rxdataF_ext[aa][symbol_offset];
ul_ch128 = (int16x8_t *)&ul_ch_estimates[aa][symbol_offset];
ul_ref128 = (int16x8_t *)ul_ref_sigs_rx[u][v][Msc_RS_idx];
#endif
for (i=0; i<Msc_RS/12; i++) { for (i=0; i<Msc_RS/12; i++) {
#if defined(__x86_64__) || defined(__i386__)
// multiply by conjugated channel // multiply by conjugated channel
mmtmpU0 = _mm_madd_epi16(ul_ref128[0],rxdataF128[0]); mmtmpU0 = _mm_madd_epi16(ul_ref128[0],rxdataF128[0]);
// mmtmpU0 contains real part of 4 consecutive outputs (32-bit) // mmtmpU0 contains real part of 4 consecutive outputs (32-bit)
...@@ -204,7 +215,50 @@ int32_t lte_ul_channel_estimation(PHY_VARS_eNB *phy_vars_eNB, ...@@ -204,7 +215,50 @@ int32_t lte_ul_channel_estimation(PHY_VARS_eNB *phy_vars_eNB,
mmtmpU3 = _mm_unpackhi_epi32(mmtmpU0,mmtmpU1); mmtmpU3 = _mm_unpackhi_epi32(mmtmpU0,mmtmpU1);
ul_ch128[2] = _mm_packs_epi32(mmtmpU2,mmtmpU3); ul_ch128[2] = _mm_packs_epi32(mmtmpU2,mmtmpU3);
#elif defined(__arm__)
mmtmp0 = vmull_s16(((int16x4_t*)ul_ref128)[0],((int16x4_t*)rxdataF128)[0]);
mmtmp1 = vmull_s16(((int16x4_t*)ul_ref128)[1],((int16x4_t*)rxdataF128)[1]);
mmtmp_re = vcombine_s32(vpadd_s32(vget_low_s32(mmtmp0),vget_high_s32(mmtmp0)),
vpadd_s32(vget_low_s32(mmtmp1),vget_high_s32(mmtmp1)));
mmtmp0 = vmull_s16(vrev32_s16(vmul_s16(((int16x4_t*)ul_ref128)[0],*(int16x4_t*)conjugate)), ((int16x4_t*)rxdataF128)[0]);
mmtmp1 = vmull_s16(vrev32_s16(vmul_s16(((int16x4_t*)ul_ref128)[1],*(int16x4_t*)conjugate)), ((int16x4_t*)rxdataF128)[1]);
mmtmp_im = vcombine_s32(vpadd_s32(vget_low_s32(mmtmp0),vget_high_s32(mmtmp0)),
vpadd_s32(vget_low_s32(mmtmp1),vget_high_s32(mmtmp1)));
ul_ch128[0] = vcombine_s16(vmovn_s32(mmtmp_re),vmovn_s32(mmtmp_im));
ul_ch128++;
ul_ref128++;
rxdataF128++;
mmtmp0 = vmull_s16(((int16x4_t*)ul_ref128)[0],((int16x4_t*)rxdataF128)[0]);
mmtmp1 = vmull_s16(((int16x4_t*)ul_ref128)[1],((int16x4_t*)rxdataF128)[1]);
mmtmp_re = vcombine_s32(vpadd_s32(vget_low_s32(mmtmp0),vget_high_s32(mmtmp0)),
vpadd_s32(vget_low_s32(mmtmp1),vget_high_s32(mmtmp1)));
mmtmp0 = vmull_s16(vrev32_s16(vmul_s16(((int16x4_t*)ul_ref128)[0],*(int16x4_t*)conjugate)), ((int16x4_t*)rxdataF128)[0]);
mmtmp1 = vmull_s16(vrev32_s16(vmul_s16(((int16x4_t*)ul_ref128)[1],*(int16x4_t*)conjugate)), ((int16x4_t*)rxdataF128)[1]);
mmtmp_im = vcombine_s32(vpadd_s32(vget_low_s32(mmtmp0),vget_high_s32(mmtmp0)),
vpadd_s32(vget_low_s32(mmtmp1),vget_high_s32(mmtmp1)));
ul_ch128[0] = vcombine_s16(vmovn_s32(mmtmp_re),vmovn_s32(mmtmp_im));
ul_ch128++;
ul_ref128++;
rxdataF128++;
mmtmp0 = vmull_s16(((int16x4_t*)ul_ref128)[0],((int16x4_t*)rxdataF128)[0]);
mmtmp1 = vmull_s16(((int16x4_t*)ul_ref128)[1],((int16x4_t*)rxdataF128)[1]);
mmtmp_re = vcombine_s32(vpadd_s32(vget_low_s32(mmtmp0),vget_high_s32(mmtmp0)),
vpadd_s32(vget_low_s32(mmtmp1),vget_high_s32(mmtmp1)));
mmtmp0 = vmull_s16(vrev32_s16(vmul_s16(((int16x4_t*)ul_ref128)[0],*(int16x4_t*)conjugate)), ((int16x4_t*)rxdataF128)[0]);
mmtmp1 = vmull_s16(vrev32_s16(vmul_s16(((int16x4_t*)ul_ref128)[1],*(int16x4_t*)conjugate)), ((int16x4_t*)rxdataF128)[1]);
mmtmp_im = vcombine_s32(vpadd_s32(vget_low_s32(mmtmp0),vget_high_s32(mmtmp0)),
vpadd_s32(vget_low_s32(mmtmp1),vget_high_s32(mmtmp1)));
ul_ch128[0] = vcombine_s16(vmovn_s32(mmtmp_re),vmovn_s32(mmtmp_im));
ul_ch128++;
ul_ref128++;
rxdataF128++;
#endif
ul_ch128+=3; ul_ch128+=3;
ul_ref128+=3; ul_ref128+=3;
rxdataF128+=3; rxdataF128+=3;
...@@ -538,17 +592,17 @@ int32_t lte_ul_channel_estimation(PHY_VARS_eNB *phy_vars_eNB, ...@@ -538,17 +592,17 @@ int32_t lte_ul_channel_estimation(PHY_VARS_eNB *phy_vars_eNB,
// msg("sym: %d, current_phase1: %d, ru: %d + j%d, current_phase2: %d, ru: %d + j%d\n",k,current_phase1,ru1[2*current_phase1],ru1[2*current_phase1+1],current_phase2,ru2[2*current_phase2],ru2[2*current_phase2+1]); // msg("sym: %d, current_phase1: %d, ru: %d + j%d, current_phase2: %d, ru: %d + j%d\n",k,current_phase1,ru1[2*current_phase1],ru1[2*current_phase1+1],current_phase2,ru2[2*current_phase2],ru2[2*current_phase2+1]);
// rotate channel estimates by estimated phase // rotate channel estimates by estimated phase
rotate_cpx_vector_norep((int16_t*) ul_ch1, rotate_cpx_vector((int16_t*) ul_ch1,
&ru1[2*current_phase1], &ru1[2*current_phase1],
(int16_t*) &ul_ch_estimates[aa][frame_parms->N_RB_UL*12*k], (int16_t*) &ul_ch_estimates[aa][frame_parms->N_RB_UL*12*k],
Msc_RS, Msc_RS,
15); 15);
rotate_cpx_vector_norep((int16_t*) ul_ch2, rotate_cpx_vector((int16_t*) ul_ch2,
&ru2[2*current_phase2], &ru2[2*current_phase2],
(int16_t*) &tmp_estimates[0], (int16_t*) &tmp_estimates[0],
Msc_RS, Msc_RS,
15); 15);
// Combine the two rotated estimates // Combine the two rotated estimates
multadd_complex_vector_real_scalar((int16_t*) &ul_ch_estimates[aa][frame_parms->N_RB_UL*12*k],SCALE,(int16_t*) &ul_ch_estimates[aa][frame_parms->N_RB_UL*12*k],1,Msc_RS); multadd_complex_vector_real_scalar((int16_t*) &ul_ch_estimates[aa][frame_parms->N_RB_UL*12*k],SCALE,(int16_t*) &ul_ch_estimates[aa][frame_parms->N_RB_UL*12*k],1,Msc_RS);
...@@ -664,11 +718,11 @@ int32_t lte_srs_channel_estimation(LTE_DL_FRAME_PARMS *frame_parms, ...@@ -664,11 +718,11 @@ int32_t lte_srs_channel_estimation(LTE_DL_FRAME_PARMS *frame_parms,
//write_output("eNb_rxF.m","rxF",&eNb_common_vars->rxdataF[0][aa][2*frame_parms->ofdm_symbol_size*symbol],2*(frame_parms->ofdm_symbol_size),2,1); //write_output("eNb_rxF.m","rxF",&eNb_common_vars->rxdataF[0][aa][2*frame_parms->ofdm_symbol_size*symbol],2*(frame_parms->ofdm_symbol_size),2,1);
//write_output("eNb_srs.m","srs_eNb",eNb_common_vars->srs,(frame_parms->ofdm_symbol_size),1,1); //write_output("eNb_srs.m","srs_eNb",eNb_common_vars->srs,(frame_parms->ofdm_symbol_size),1,1);
mult_cpx_vector_norep((int16_t*) &eNb_common_vars->rxdataF[eNb_id][aa][2*frame_parms->ofdm_symbol_size*symbol], mult_cpx_conj_vector((int16_t*) &eNb_common_vars->rxdataF[eNb_id][aa][2*frame_parms->ofdm_symbol_size*symbol],
(int16_t*) eNb_srs_vars->srs, (int16_t*) eNb_srs_vars->srs,
(int16_t*) eNb_srs_vars->srs_ch_estimates[eNb_id][aa], (int16_t*) eNb_srs_vars->srs_ch_estimates[eNb_id][aa],
frame_parms->ofdm_symbol_size, frame_parms->ofdm_symbol_size,
15); 15);
//msg("SRS channel estimation cmult out\n"); //msg("SRS channel estimation cmult out\n");
#ifdef USER_MODE #ifdef USER_MODE
...@@ -695,6 +749,7 @@ int16_t lte_ul_freq_offset_estimation(LTE_DL_FRAME_PARMS *frame_parms, ...@@ -695,6 +749,7 @@ int16_t lte_ul_freq_offset_estimation(LTE_DL_FRAME_PARMS *frame_parms,
uint16_t nb_rb) uint16_t nb_rb)
{ {
#if defined(__x86_64__) || defined(__i386__)
int k, rb; int k, rb;
int a_idx = 64; int a_idx = 64;
uint8_t conj_flag = 0; uint8_t conj_flag = 0;
...@@ -830,4 +885,7 @@ int16_t lte_ul_freq_offset_estimation(LTE_DL_FRAME_PARMS *frame_parms, ...@@ -830,4 +885,7 @@ int16_t lte_ul_freq_offset_estimation(LTE_DL_FRAME_PARMS *frame_parms,
phase_idx = -phase_idx; phase_idx = -phase_idx;
return(phase_idx); return(phase_idx);
#elif defined(__arm__)
return(0);
#endif
} }
...@@ -94,9 +94,9 @@ int lte_dl_cell_spec_SS(PHY_VARS_eNB *phy_vars_eNB, ...@@ -94,9 +94,9 @@ int lte_dl_cell_spec_SS(PHY_VARS_eNB *phy_vars_eNB,
output[k] = qpsk[(phy_vars_eNB->lte_gold_table[Ns][l][mprime_dword]>>(2*mprime_qpsk_symb))&3]; output[k] = qpsk[(phy_vars_eNB->lte_gold_table[Ns][l][mprime_dword]>>(2*mprime_qpsk_symb))&3];
//output[k] = (lte_gold_table[eNB_offset][Ns][l][mprime_dword]>>(2*mprime_qpsk_symb))&3; //output[k] = (lte_gold_table[eNB_offset][Ns][l][mprime_dword]>>(2*mprime_qpsk_symb))&3;
#ifdef DEBUG_DL_CELL_SPEC #ifdef DEBUG_DL_CELL_SPEC
debug_msg("Ns %d, l %d, m %d,mprime_dword %d, mprime_qpsk_symbol %d\n", msg("Ns %d, l %d, m %d,mprime_dword %d, mprime_qpsk_symbol %d\n",
Ns,l,m,mprime_dword,mprime_qpsk_symb); Ns,l,m,mprime_dword,mprime_qpsk_symb);
debug_msg("index = %d (k %d)\n",(phy_vars_eNB->lte_gold_table[Ns][l][mprime_dword]>>(2*mprime_qpsk_symb))&3,k); msg("index = %d (k %d)\n",(phy_vars_eNB->lte_gold_table[Ns][l][mprime_dword]>>(2*mprime_qpsk_symb))&3,k);
#endif #endif
mprime++; mprime++;
......
This diff is collapsed.
...@@ -139,7 +139,7 @@ typedef struct { ...@@ -139,7 +139,7 @@ typedef struct {
/// Concatenated "e"-sequences (for definition see 36-212 V8.6 2009-03, p.17-18) /// Concatenated "e"-sequences (for definition see 36-212 V8.6 2009-03, p.17-18)
uint8_t e[MAX_NUM_CHANNEL_BITS]; uint8_t e[MAX_NUM_CHANNEL_BITS];
/// Turbo-code outputs (36-212 V8.6 2009-03, p.12 /// Turbo-code outputs (36-212 V8.6 2009-03, p.12
uint8_t d[MAX_NUM_DLSCH_SEGMENTS][(96+3+(3*6144))]; uint8_t *d[MAX_NUM_DLSCH_SEGMENTS];//[(96+3+(3*6144))];
/// Sub-block interleaver outputs (36-212 V8.6 2009-03, p.16-17) /// Sub-block interleaver outputs (36-212 V8.6 2009-03, p.16-17)
uint8_t w[MAX_NUM_DLSCH_SEGMENTS][3*6144]; uint8_t w[MAX_NUM_DLSCH_SEGMENTS][3*6144];
/// Number of code segments (for definition see 36-212 V8.6 2009-03, p.9) /// Number of code segments (for definition see 36-212 V8.6 2009-03, p.9)
......
...@@ -100,6 +100,10 @@ void free_eNB_dlsch(LTE_eNB_DLSCH_t *dlsch) ...@@ -100,6 +100,10 @@ void free_eNB_dlsch(LTE_eNB_DLSCH_t *dlsch)
free16(dlsch->harq_processes[i]->c[r],((r==0)?8:0) + 3+768); free16(dlsch->harq_processes[i]->c[r],((r==0)?8:0) + 3+768);
dlsch->harq_processes[i]->c[r] = NULL; dlsch->harq_processes[i]->c[r] = NULL;
} }
if (dlsch->harq_processes[i]->d[r]) {
free16(dlsch->harq_processes[i]->d[r],(96+3+(3*6144)));
dlsch->harq_processes[i]->d[r] = NULL;
}
} }
free16(dlsch->harq_processes[i],sizeof(LTE_DL_eNB_HARQ_t)); free16(dlsch->harq_processes[i],sizeof(LTE_DL_eNB_HARQ_t));
...@@ -168,14 +172,20 @@ LTE_eNB_DLSCH_t *new_eNB_dlsch(unsigned char Kmimo,unsigned char Mdlharq,unsigne ...@@ -168,14 +172,20 @@ LTE_eNB_DLSCH_t *new_eNB_dlsch(unsigned char Kmimo,unsigned char Mdlharq,unsigne
if (abstraction_flag==0) { if (abstraction_flag==0) {
for (r=0; r<MAX_NUM_DLSCH_SEGMENTS/bw_scaling; r++) { for (r=0; r<MAX_NUM_DLSCH_SEGMENTS/bw_scaling; r++) {
// account for filler in first segment and CRCs for multiple segment case // account for filler in first segment and CRCs for multiple segment case
dlsch->harq_processes[i]->c[r] = (unsigned char*)malloc16(((r==0)?8:0) + 3+ 768); dlsch->harq_processes[i]->c[r] = (uint8_t*)malloc16(((r==0)?8:0) + 3+ 768);
dlsch->harq_processes[i]->d[r] = (uint8_t*)malloc16((96+3+(3*6144)));
if (dlsch->harq_processes[i]->c[r]) { if (dlsch->harq_processes[i]->c[r]) {
bzero(dlsch->harq_processes[i]->c[r],((r==0)?8:0) + 3+ 768); bzero(dlsch->harq_processes[i]->c[r],((r==0)?8:0) + 3+ 768);
} else { } else {
msg("Can't get c\n"); msg("Can't get c\n");
exit_flag=2; exit_flag=2;
} }
if (dlsch->harq_processes[i]->d[r]) {
bzero(dlsch->harq_processes[i]->d[r],(96+3+(3*6144)));
} else {
msg("Can't get d\n");
exit_flag=2;
}
} }
} }
} else { } else {
...@@ -190,8 +200,10 @@ LTE_eNB_DLSCH_t *new_eNB_dlsch(unsigned char Kmimo,unsigned char Mdlharq,unsigne ...@@ -190,8 +200,10 @@ LTE_eNB_DLSCH_t *new_eNB_dlsch(unsigned char Kmimo,unsigned char Mdlharq,unsigne
if (abstraction_flag==0) { if (abstraction_flag==0) {
for (j=0; j<96; j++) for (j=0; j<96; j++)
for (r=0; r<MAX_NUM_DLSCH_SEGMENTS; r++) for (r=0; r<MAX_NUM_DLSCH_SEGMENTS/bw_scaling; r++) {
// printf("dlsch->harq_processes[%d]->d[%d] %p\n",i,r,dlsch->harq_processes[i]->d[r]);
dlsch->harq_processes[i]->d[r][j] = LTE_NULL; dlsch->harq_processes[i]->d[r][j] = LTE_NULL;
}
} }
} }
......
...@@ -262,7 +262,7 @@ int allocate_REs_in_RB(LTE_DL_FRAME_PARMS *frame_parms, ...@@ -262,7 +262,7 @@ int allocate_REs_in_RB(LTE_DL_FRAME_PARMS *frame_parms,
switch (mod_order0) { switch (mod_order0) {
case 2: //QPSK case 2: //QPSK
//printf("%d(%d) : %d,%d => ",tti_offset,*jj,((int16_t*)&txdataF[0][tti_offset])[0],((int16_t*)&txdataF[0][tti_offset])[1]); // printf("%d(%d) : %d,%d => ",tti_offset,*jj,((int16_t*)&txdataF[0][tti_offset])[0],((int16_t*)&txdataF[0][tti_offset])[1]);
for (aa=0; aa<frame_parms->nb_antennas_tx; aa++) { for (aa=0; aa<frame_parms->nb_antennas_tx; aa++) {
((int16_t*)&txdataF[aa][tti_offset])[0] += (x0[*jj]==1) ? (-gain_lin_QPSK) : gain_lin_QPSK; //I //b_i ((int16_t*)&txdataF[aa][tti_offset])[0] += (x0[*jj]==1) ? (-gain_lin_QPSK) : gain_lin_QPSK; //I //b_i
} }
...@@ -275,7 +275,7 @@ int allocate_REs_in_RB(LTE_DL_FRAME_PARMS *frame_parms, ...@@ -275,7 +275,7 @@ int allocate_REs_in_RB(LTE_DL_FRAME_PARMS *frame_parms,
*jj = *jj + 1; *jj = *jj + 1;
// printf("%d,%d\n",((int16_t*)&txdataF[0][tti_offset])[0],((int16_t*)&txdataF[0][tti_offset])[1]); // printf("%d,%d\n",((int16_t*)&txdataF[0][tti_offset])[0],((int16_t*)&txdataF[0][tti_offset])[1]);
break; break;
case 4: //16QAM case 4: //16QAM
......
This diff is collapsed.
This diff is collapsed.
...@@ -998,10 +998,7 @@ int32_t generate_prach( PHY_VARS_UE *phy_vars_ue, uint8_t eNB_id, uint8_t subfra ...@@ -998,10 +998,7 @@ int32_t generate_prach( PHY_VARS_UE *phy_vars_ue, uint8_t eNB_id, uint8_t subfra
return signal_energy( (int*)prach, 256 ); return signal_energy( (int*)prach, 256 );
} }
//__m128i mmtmpX0,mmtmpX1,mmtmpX2,mmtmpX3;
__m128i mmtmpX0,mmtmpX1,mmtmpX2,mmtmpX3;
void rx_prach(PHY_VARS_eNB *phy_vars_eNB,uint8_t subframe,uint16_t *preamble_energy_list, uint16_t *preamble_delay_list, uint16_t Nf, uint8_t tdd_mapindex) void rx_prach(PHY_VARS_eNB *phy_vars_eNB,uint8_t subframe,uint16_t *preamble_energy_list, uint16_t *preamble_delay_list, uint16_t Nf, uint8_t tdd_mapindex)
{ {
......
...@@ -48,7 +48,7 @@ ...@@ -48,7 +48,7 @@
* @{ * @{
*/ */
/** \fn free_eNB_dlsch(LTE_eNB_DLSCH_t *dlsch) /** \fn free_eNB_dlsch(LTE_eNB_DLSCH_t *dlsch,unsigned char N_RB_DL)
\brief This function frees memory allocated for a particular DLSCH at eNB \brief This function frees memory allocated for a particular DLSCH at eNB
@param dlsch Pointer to DLSCH to be removed @param dlsch Pointer to DLSCH to be removed
*/ */
...@@ -74,9 +74,7 @@ void free_ue_dlsch(LTE_UE_DLSCH_t *dlsch); ...@@ -74,9 +74,7 @@ void free_ue_dlsch(LTE_UE_DLSCH_t *dlsch);
LTE_UE_DLSCH_t *new_ue_dlsch(uint8_t Kmimo,uint8_t Mdlharq,uint8_t max_turbo_iterations,uint8_t N_RB_DL, uint8_t abstraction_flag); LTE_UE_DLSCH_t *new_ue_dlsch(uint8_t Kmimo,uint8_t Mdlharq,uint8_t max_turbo_iterations,uint8_t N_RB_DL, uint8_t abstraction_flag);
void free_eNB_dlsch(LTE_eNB_DLSCH_t *dlsch);
LTE_eNB_ULSCH_t *new_eNB_ulsch(uint8_t Mdlharq,uint8_t max_turbo_iterations,uint8_t N_RB_UL, uint8_t abstraction_flag);
void clean_eNb_ulsch(LTE_eNB_ULSCH_t *ulsch, uint8_t abstraction_flag); void clean_eNb_ulsch(LTE_eNB_ULSCH_t *ulsch, uint8_t abstraction_flag);
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment