Commit 8c5e8126 authored by Florian Kaltenberger's avatar Florian Kaltenberger

Added support for ARM NEON, lots of changes in openair1 and some in cmake_targets

git-svn-id: http://svn.eurecom.fr/openair4G/trunk@7543 818b1a75-f10b-46b9-bf7c-635c3b92a50f
parent 85ff3abc
......@@ -126,7 +126,7 @@ add_list_string_option(CMAKE_BUILD_TYPE "RelWithDebInfo" "Choose the type of bui
Message("Architecture is ${CMAKE_SYSTEM_PROCESSOR}")
if (CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7l")
set(C_FLAGS_PROCESSOR "-mfloat-abi=softfp -mfpu=neon")
set(C_FLAGS_PROCESSOR "-gdwarf-2 -mfloat-abi=hard -mfpu=neon -lgcc -lrt")
else (CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7l")
set(C_FLAGS_PROCESSOR "-msse4.2")
endif()
......@@ -140,8 +140,8 @@ set(CMAKE_C_FLAGS
# set a flag for changes in the source code
# these changes are related to hardcoded path to include .h files
add_definitions(-DCMAKER)
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS} -ggdb -DMALLOC_CHECK_=3")
set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS} -ggdb -DMALLOC_CHECK_=3 -O2")
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS} -g -DMALLOC_CHECK_=3")
set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS} -g -DMALLOC_CHECK_=3 -O2")
# Below has been put in comment because does not work with
# SVN authentication.
......@@ -778,7 +778,6 @@ set(PHY_SRC
${OPENAIR1_DIR}/PHY/TOOLS/log2_approx.c
${OPENAIR1_DIR}/PHY/TOOLS/cmult_sv.c
${OPENAIR1_DIR}/PHY/TOOLS/cmult_vv.c
${OPENAIR1_DIR}/PHY/TOOLS/cadd_vv.c
${OPENAIR1_DIR}/PHY/TOOLS/cdot_prod.c
${OPENAIR1_DIR}/PHY/TOOLS/signal_energy.c
${OPENAIR1_DIR}/PHY/TOOLS/dB_routines.c
......@@ -1692,7 +1691,7 @@ foreach(myExe dlsim ulsim pbchsim scansim mbmssim pdcchsim pucchsim prachsim syn
${XFORMS_SOURCE}
)
target_link_libraries (${myExe}
-Wl,--start-group SIMU UTIL SCHED_LIB PHY LFDS MSC ${ITTI_LIB} -Wl,--end-group
-Wl,--start-group SIMU UTIL SCHED_LIB PHY LFDS ${ITTI_LIB} -Wl,--end-group
pthread m rt ${CONFIG_LIBRARIES} ${ATLAS_LIBRARIES} ${XFORMS_LIBRARIES}
)
endforeach(myExe)
......
......@@ -10,5 +10,6 @@ set(RANDOM_BF False)
set(PBS_SIM False)
set(PERFECT_CE False)
set(NAS_UE False)
set(MESSAGE_CHART_GENERATOR False)
include(${CMAKE_CURRENT_SOURCE_DIR}/../CMakeLists.txt)
......@@ -31,8 +31,9 @@
author: raymond.knopp@eurecom.fr
date: 10.2009
*/
#include "defs.h"
//#include "lte_interleaver_inline.h"
#ifndef TC_MAIN
//#include "defs.h"
#endif
#include "extern_3GPPinterleaver.h"
......
This diff is collapsed.
TURBO_SRC = 3gpplte.c 3gpplte_turbo_decoder_sse.c crc_byte.c
TURBO_SRC = 3gpplte_sse.c 3gpplte_turbo_decoder_sse.c crc_byte.c
RATE13CC_SRC = ccoding_byte_lte.c viterbi_lte.c crc_byte.c
RATE12CC_SRC = ccoding_byte.c viterbi.c crc_byte.c
all: turbolte_test rate13cc_test rate12cc_test run_turbo run_rate13cc run_rate13ccdab run_rate12cc
all: 3gpplte_sse
turbolte_test: $(TURBO_SRC)
gcc -o turbo_test $(TURBO_SRC) -DTEST_DEBUG -DUSER_MODE -msse2 -mssse3 -Wall
3gpplte_sse: $(TURBO_SRC)
gcc -o 3gpplte_sse 3gpplte_sse.c -msse4 -Wall -g -ggdb -DMAIN
rate13cc_test: $(RATE13CC_SRC)
gcc -o rate13cc_test $(RATE13CC_SRC) -DTEST_DEBUG -DUSER_MODE -msse2 -mssse3 -Wall
rate12cc_test: $(RATE12CC_SRC)
gcc -o rate12cc_test $(RATE12CC_SRC) -DTEST_DEBUG -DUSER_MODE -msse2 -mssse3 -Wall
run_turbo: turbolte_test
./turbo_test
run_rate13cc: rate13cc_test
./rate13cc_test
run_rate13ccdab: rate13cc_test
./rate13cc_test -d
run_rate12cc: rate12cc_test
./rate12cc_test
clean:
rm *.o
......@@ -55,22 +55,22 @@ unsigned char ccodelte_table_rev[128]; // for receiver
void
ccodelte_encode (unsigned int numbits,
unsigned char add_crc,
unsigned char *inPtr,
unsigned char *outPtr,
unsigned short rnti)
ccodelte_encode (int32_t numbits,
uint8_t add_crc,
uint8_t *inPtr,
uint8_t *outPtr,
uint16_t rnti)
{
unsigned int state;
uint32_t state;
unsigned char c, out, first_bit;
char shiftbit=0;
unsigned short c16;
unsigned short next_last_byte=0;
unsigned int crc=0;
uint8_t c, out, first_bit;
int8_t shiftbit=0;
uint16_t c16;
uint16_t next_last_byte=0;
uint32_t crc=0;
#ifdef DEBUG_CCODE
unsigned int dummy=0;
uint32_t dummy=0;
#endif //DEBUG_CCODE
/* The input bit is shifted in position 8 of the state.
......@@ -80,20 +80,19 @@ ccodelte_encode (unsigned int numbits,
if (add_crc == 1) {
crc = crc8(inPtr,numbits);
first_bit = 2;
c = (unsigned char)(crc>>24);
c = (uint8_t)(crc>>24);
} else if (add_crc == 2) {
crc = crc16(inPtr,numbits);
#ifdef DEBUG_CCODE
printf("ccode_lte : crc %x\n",crc);
#endif
// scramble with RNTI
crc ^= (((unsigned int)rnti)<<16);
crc ^= (((uint32_t)rnti)<<16);
#ifdef DEBUG_CCODE
printf("ccode_lte : crc %x (rnti %x)\n",crc,rnti);
#endif
first_bit = 2;
// c = (unsigned char)(crc>>24);
c = (unsigned char)((crc>>16)&0xff);
c = (uint8_t)((crc>>16)&0xff);
} else {
next_last_byte = numbits>>3;
first_bit = (numbits-6)&7;
......@@ -182,7 +181,7 @@ ccodelte_encode (unsigned int numbits,
// now code 8-bit CRC for UCI
if (add_crc == 1) {
c = (unsigned char)(crc>>24);
c = (uint8_t)(crc>>24);
// for (shiftbit = 0; (shiftbit<8);shiftbit++) {
for (shiftbit = 7; (shiftbit>=0); shiftbit--) {
......@@ -209,7 +208,7 @@ ccodelte_encode (unsigned int numbits,
// now code 16-bit CRC for DCI
if (add_crc == 2) {
c16 = (unsigned short)(crc>>16);
c16 = (uint16_t)(crc>>16);
// for (shiftbit = 0; (shiftbit<16);shiftbit++) {
for (shiftbit = 15; (shiftbit>=0); shiftbit--) {
......
......@@ -320,7 +320,7 @@ void threegpplte_turbo_encoder(uint8_t *input,
uint16_t interleaver_f2);
/** \fn void ccodelte_encode(uint32_t numbits,uint8_t add_crc, uint8_t *inPtr,uint8_t *outPtr,uint16_t rnti)
/** \fn void ccodelte_encode(int32_t numbits,uint8_t add_crc, uint8_t *inPtr,uint8_t *outPtr,uint16_t rnti)
\brief This function implements the LTE convolutional code of rate 1/3
with a constraint length of 7 bits. The inputs are bit packed in octets
(from MSB to LSB). Trellis tail-biting is included here.
......@@ -331,7 +331,7 @@ void threegpplte_turbo_encoder(uint8_t *input,
@param rnti RNTI for CRC scrambling
*/
void
ccodelte_encode (uint32_t numbits,
ccodelte_encode (int32_t numbits,
uint8_t add_crc,
uint8_t *inPtr,
uint8_t *outPtr,
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -299,11 +299,17 @@ void freq_equalization(LTE_DL_FRAME_PARMS *frame_parms,
{
uint16_t re;
int16_t amp;
#if defined(__x86_64__) || defined(__i386__)
__m128i *ul_ch_mag128,*ul_ch_magb128,*rxdataF_comp128;
rxdataF_comp128 = (__m128i *)&rxdataF_comp[0][symbol*frame_parms->N_RB_DL*12];
ul_ch_mag128 = (__m128i *)&ul_ch_mag[0][symbol*frame_parms->N_RB_DL*12];
ul_ch_magb128 = (__m128i *)&ul_ch_magb[0][symbol*frame_parms->N_RB_DL*12];
#elif defined(__arm__)
int16x8_t *ul_ch_mag128,*ul_ch_magb128,*rxdataF_comp128;
rxdataF_comp128 = (int16x8_t*)&rxdataF_comp[0][symbol*frame_parms->N_RB_DL*12];
ul_ch_mag128 = (int16x8_t*)&ul_ch_mag[0][symbol*frame_parms->N_RB_DL*12];
ul_ch_magb128 = (int16x8_t*)&ul_ch_magb[0][symbol*frame_parms->N_RB_DL*12];
#endif
for (re=0; re<(Msc_RS>>2); re++) {
......@@ -313,15 +319,25 @@ void freq_equalization(LTE_DL_FRAME_PARMS *frame_parms,
amp=255;
// printf("freq_eq: symbol %d re %d => %d,%d,%d, (%d) (%d,%d) => ",symbol,re,*((int16_t*)(&ul_ch_mag128[re])),amp,inv_ch[8*amp],*((int16_t*)(&ul_ch_mag128[re]))*inv_ch[8*amp],*(int16_t*)&(rxdataF_comp128[re]),*(1+(int16_t*)&(rxdataF_comp128[re])));
#if defined(__x86_64__) || defined(__i386__)
rxdataF_comp128[re] = _mm_mullo_epi16(rxdataF_comp128[re],*((__m128i *)&inv_ch[8*amp]));
if (Qm==4)
ul_ch_mag128[re] = _mm_set1_epi16(324); // this is 512*2/sqrt(10)
ul_ch_mag128[re] = _mm_set1_epi16(324); // this is 512*2/sqrt(10)
else {
ul_ch_mag128[re] = _mm_set1_epi16(316); // this is 512*4/sqrt(42)
ul_ch_mag128[re] = _mm_set1_epi16(316); // this is 512*4/sqrt(42)
ul_ch_magb128[re] = _mm_set1_epi16(158); // this is 512*2/sqrt(42)
}
#elif defined(__arm__)
rxdataF_comp128[re] = vmulq_s16(rxdataF_comp128[re],*((int16x8_t *)&inv_ch[8*amp]));
if (Qm==4)
ul_ch_mag128[re] = vdupq_n_s16(324); // this is 512*2/sqrt(10)
else {
ul_ch_mag128[re] = vdupq_n_s16(316); // this is 512*4/sqrt(42)
ul_ch_magb128[re] = vdupq_n_s16(158); // this is 512*2/sqrt(42)
}
#endif
// printf("(%d,%d)\n",*(int16_t*)&(rxdataF_comp128[re]),*(1+(int16_t*)&(rxdataF_comp128[re])));
}
......
......@@ -35,7 +35,11 @@
#include "PHY/defs.h"
//#define DEBUG_PHY
#if defined(__x86_64__) || defined(__i386__)
__m128i avg128F;
#elif defined(__arm__)
int32x4_t avg128F;
#endif
//compute average channel_level on each (TX,RX) antenna pair
int dl_channel_level(int16_t *dl_ch,
......@@ -43,10 +47,15 @@ int dl_channel_level(int16_t *dl_ch,
{
int16_t rb;
#if defined(__x86_64__) || defined(__i386__)
__m128i *dl_ch128;
#elif defined(__arm__)
int16x4_t *dl_ch128;
#endif
int avg;
//clear average level
#if defined(__x86_64__) || defined(__i386__)
avg128F = _mm_setzero_si128();
dl_ch128=(__m128i *)dl_ch;
......@@ -59,7 +68,25 @@ int dl_channel_level(int16_t *dl_ch,
dl_ch128+=3;
}
#elif defined(__arm__)
avg128F = vdupq_n_s32(0);
dl_ch128=(int16x4_t *)dl_ch;
for (rb=0; rb<frame_parms->N_RB_DL; rb++) {
avg128F = vqaddq_s32(avg128F,vmull_s16(dl_ch128[0],dl_ch128[0]));
avg128F = vqaddq_s32(avg128F,vmull_s16(dl_ch128[1],dl_ch128[1]));
avg128F = vqaddq_s32(avg128F,vmull_s16(dl_ch128[2],dl_ch128[2]));
avg128F = vqaddq_s32(avg128F,vmull_s16(dl_ch128[3],dl_ch128[3]));
avg128F = vqaddq_s32(avg128F,vmull_s16(dl_ch128[4],dl_ch128[4]));
avg128F = vqaddq_s32(avg128F,vmull_s16(dl_ch128[5],dl_ch128[5]));
dl_ch128+=6;
}
#endif
DevAssert( frame_parms->N_RB_DL );
avg = (((int*)&avg128F)[0] +
((int*)&avg128F)[1] +
......@@ -67,10 +94,10 @@ int dl_channel_level(int16_t *dl_ch,
((int*)&avg128F)[3])/(frame_parms->N_RB_DL*12);
#if defined(__x86_64__) || defined(__i386__)
_mm_empty();
_m_empty();
#endif
return(avg);
}
......
......@@ -46,14 +46,15 @@
//#include "defs.h"
#include "PHY/defs.h"
#include "PHY/extern.h"
#include "pss6144.h"
#if defined(__x86_64__) || defined(__i386__)
#include "pss6144.h"
extern void print_shorts(char*,__m128i*);
#endif
void lte_sync_timefreq(PHY_VARS_UE *ue,int band,unsigned int DL_freq)
{
#if defined(__x86_64__) || defined(__i386__)
UE_SCAN_INFO_t *scan_info = &ue->scan_info[band];
int16_t spectrum[12288] __attribute__((aligned(16)));
int16_t spectrum_p5ms[12288] __attribute__((aligned(16)));
......@@ -358,5 +359,6 @@ void lte_sync_timefreq(PHY_VARS_UE *ue,int band,unsigned int DL_freq)
for (band_idx=0; band_idx<10; band_idx++)
printf("pss 2: level %d dB, freq %u\n", dB_fixed(scan_info->amp[2][band_idx]),scan_info->freq_offset_Hz[2][band_idx]);
#endif
}
......@@ -42,34 +42,26 @@
//#define DEBUG_MEAS
#ifdef USER_MODE
void print_shorts(char *s,__m128i *x)
void print_shorts(char *s,short *x)
{
short *tempb = (short *)x;
printf("%s : %d,%d,%d,%d,%d,%d,%d,%d\n",s,
tempb[0],tempb[1],tempb[2],tempb[3],tempb[4],tempb[5],tempb[6],tempb[7]
x[0],x[1],x[2],x[3],x[4],x[5],x[6],x[7]
);
}
void print_ints(char *s,__m128i *x)
void print_ints(char *s,int *x)
{
int *tempb = (int *)x;
printf("%s : %d,%d,%d,%d\n",s,
tempb[0],tempb[1],tempb[2],tempb[3]
x[0],x[1],x[2],x[3]
);
}
#endif
__m128i pmi128_re __attribute__ ((aligned(16)));
__m128i pmi128_im __attribute__ ((aligned(16)));
__m128i mmtmpPMI0 __attribute__ ((aligned(16)));
__m128i mmtmpPMI1 __attribute__ ((aligned(16)));
__m128i mmtmpPMI2 __attribute__ ((aligned(16)));
__m128i mmtmpPMI3 __attribute__ ((aligned(16)));
int16_t get_PL(uint8_t Mod_id,uint8_t CC_id,uint8_t eNB_index)
{
......@@ -421,7 +413,11 @@ void lte_ue_measurements(PHY_VARS_UE *phy_vars_ue,
//int rx_power[NUMBER_OF_CONNECTED_eNB_MAX];
int i;
unsigned int limit,subband;
#if defined(__x86_64__) || defined(__i386__)
__m128i *dl_ch0_128,*dl_ch1_128;
#elif defined(__arm__)
int16x8_t *dl_ch0_128, *dl_ch1_128;
#endif
int *dl_ch0,*dl_ch1;
LTE_DL_FRAME_PARMS *frame_parms = &phy_vars_ue->lte_frame_parms;
int nb_subbands,subband_size,last_subband_size;
......@@ -605,26 +601,30 @@ void lte_ue_measurements(PHY_VARS_UE *phy_vars_ue,
for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
// skip the first 4 RE due to interpolation filter length of 5 (not possible to skip 5 due to 128i alignment, must be multiple of 128bit)
#if defined(__x86_64__) || defined(__i386__)
__m128i pmi128_re,pmi128_im,mmtmpPMI0,mmtmpPMI1,mmtmpPMI2,mmtmpPMI3;
dl_ch0_128 = (__m128i *)&phy_vars_ue->lte_ue_common_vars.dl_ch_estimates[eNB_id][aarx][4];
dl_ch1_128 = (__m128i *)&phy_vars_ue->lte_ue_common_vars.dl_ch_estimates[eNB_id][2+aarx][4];
#elif defined(__arm__)
int32x4_t pmi128_re,pmi128_im,mmtmpPMI0,mmtmpPMI1,mmtmpPMI0b,mmtmpPMI1b;
/*
#ifdef DEBUG_PHY
if(eNB_id==0){
print_shorts("Ch0",dl_ch0_128);
print_shorts("Ch1",dl_ch1_128);
printf("eNB_ID = %d\n",eNB_id);
}
#endif
*/
dl_ch0_128 = (int16x8_t *)&phy_vars_ue->lte_ue_common_vars.dl_ch_estimates[eNB_id][aarx][4];
dl_ch1_128 = (int16x8_t *)&phy_vars_ue->lte_ue_common_vars.dl_ch_estimates[eNB_id][2+aarx][4];
#endif
for (subband=0; subband<nb_subbands; subband++) {
// pmi
#if defined(__x86_64__) || defined(__i386__)
pmi128_re = _mm_setzero_si128();
pmi128_im = _mm_setzero_si128();
#elif defined(__arm__)
pmi128_re = vdupq_n_s32(0);
pmi128_im = vdupq_n_s32(0);
#endif
// limit is the number of groups of 4 REs in a subband (12 = 4 RBs, 3 = 1 RB)
// for 5 MHz channelization, there are 7 subbands, 6 of size 4 RBs and 1 of size 1 RB
if ((N_RB_DL==6) || (subband<(nb_subbands-1)))
......@@ -636,52 +636,33 @@ void lte_ue_measurements(PHY_VARS_UE *phy_vars_ue,
// For each RE in subband perform ch0 * conj(ch1)
// multiply by conjugated channel
// if(eNB_id==0){
//print_shorts("ch0",dl_ch0_128);
//print_shorts("ch1",dl_ch1_128);
// }
// if(i==0){
mmtmpPMI0 = _mm_setzero_si128();
mmtmpPMI1 = _mm_setzero_si128();
// }
// if(eNB_id==0)
// print_ints("Pre_re",&mmtmpPMI0);
mmtmpPMI0 = _mm_madd_epi16(dl_ch0_128[0],dl_ch1_128[0]);
// if(eNB_id==0)
// print_ints("re",&mmtmpPMI0);
// mmtmpPMI0 contains real part of 4 consecutive outputs (32-bit)
// print_shorts("Ch1",dl_ch1_128);
#if defined(__x86_64__) || defined(__i386__)
mmtmpPMI1 = _mm_shufflelo_epi16(dl_ch1_128[0],_MM_SHUFFLE(2,3,0,1));//_MM_SHUFFLE(2,3,0,1)
// print_shorts("mmtmpPMI1:",&mmtmpPMI1);
mmtmpPMI1 = _mm_shufflehi_epi16(mmtmpPMI1,_MM_SHUFFLE(2,3,0,1));
// print_shorts("mmtmpPMI1:",&mmtmpPMI1);
mmtmpPMI1 = _mm_sign_epi16(mmtmpPMI1,*(__m128i*)&conjugate[0]);
// print_shorts("mmtmpPMI1:",&mmtmpPMI1);
mmtmpPMI1 = _mm_madd_epi16(mmtmpPMI1,dl_ch0_128[0]);
// if(eNB_id==0)
// print_ints("im",&mmtmpPMI1);
// mmtmpPMI1 contains imag part of 4 consecutive outputs (32-bit)
pmi128_re = _mm_add_epi32(pmi128_re,mmtmpPMI0);
pmi128_im = _mm_add_epi32(pmi128_im,mmtmpPMI1);
#elif defined(__arm__)
mmtmpPMI0 = vmull_s16(((int16x4_t*)dl_ch0_128)[0], ((int16x4_t*)dl_ch1_128)[0]);
mmtmpPMI1 = vmull_s16(((int16x4_t*)dl_ch0_128)[1], ((int16x4_t*)dl_ch1_128)[1]);
pmi128_re = vqaddq_s32(pmi128_re,vcombine_s32(vpadd_s32(vget_low_s32(mmtmpPMI0),vget_high_s32(mmtmpPMI0)),vpadd_s32(vget_low_s32(mmtmpPMI1),vget_high_s32(mmtmpPMI1))));
mmtmpPMI0b = vmull_s16(vrev32_s16(vmul_s16(((int16x4_t*)dl_ch0_128)[0],*(int16x4_t*)conjugate)), ((int16x4_t*)dl_ch1_128)[0]);
mmtmpPMI1b = vmull_s16(vrev32_s16(vmul_s16(((int16x4_t*)dl_ch0_128)[1],*(int16x4_t*)conjugate)), ((int16x4_t*)dl_ch1_128)[1]);
pmi128_im = vqaddq_s32(pmi128_im,vcombine_s32(vpadd_s32(vget_low_s32(mmtmpPMI0b),vget_high_s32(mmtmpPMI0b)),vpadd_s32(vget_low_s32(mmtmpPMI1b),vget_high_s32(mmtmpPMI1b))));
#endif
dl_ch0_128++;
dl_ch1_128++;
}
phy_vars_ue->PHY_measurements.subband_pmi_re[eNB_id][subband][aarx] = (((int *)&pmi128_re)[0] + ((int *)&pmi128_re)[1] + ((int *)&pmi128_re)[2] + ((int *)&pmi128_re)[3])>>2;
// if(eNB_id==0)
// printf("in lte_ue_measurements.c: pmi_re %d\n",phy_vars_ue->PHY_measurements.subband_pmi_re[eNB_id][subband][aarx]);
phy_vars_ue->PHY_measurements.subband_pmi_im[eNB_id][subband][aarx] = (((int *)&pmi128_im)[0] + ((int *)&pmi128_im)[1] + ((int *)&pmi128_im)[2] + ((int *)&pmi128_im)[3])>>2;
// if(eNB_id==0)
// printf("in lte_ue_measurements.c: pmi_im %d\n",phy_vars_ue->PHY_measurements.subband_pmi_im[eNB_id][subband][aarx]);
phy_vars_ue->PHY_measurements.wideband_pmi_re[eNB_id][aarx] += phy_vars_ue->PHY_measurements.subband_pmi_re[eNB_id][subband][aarx];
phy_vars_ue->PHY_measurements.wideband_pmi_im[eNB_id][aarx] += phy_vars_ue->PHY_measurements.subband_pmi_im[eNB_id][subband][aarx];
// msg("subband_pmi[%d][%d][%d] => (%d,%d)\n",eNB_id,subband,aarx,phy_vars_ue->PHY_measurements.subband_pmi_re[eNB_id][subband][aarx],phy_vars_ue->PHY_measurements.subband_pmi_im[eNB_id][subband][aarx]);
} // subband loop
} // rx antenna loop
} // if frame_parms->mode1_flag == 0
......@@ -742,9 +723,10 @@ void lte_ue_measurements(PHY_VARS_UE *phy_vars_ue,
// printf("in lte_ue_measurements: selected rx_antenna[eNB_id==0]:%u\n", phy_vars_ue->PHY_measurements.selected_rx_antennas[eNB_id][i]);
} // eNB_id loop
#if defined(__x86_64__) || defined(__i386__)
_mm_empty();
_m_empty();
#endif
}
......
......@@ -106,9 +106,13 @@ int32_t lte_ul_channel_estimation(PHY_VARS_eNB *phy_vars_eNB,
*temp_out_fft_1_ptr = (int32_t*)0,*out_fft_ptr_1 = (int32_t*)0,
*temp_in_ifft_ptr = (int32_t*)0;
#if defined(__x86_64__) || defined(__i386__)
__m128i *rxdataF128,*ul_ref128,*ul_ch128;
__m128i mmtmpU0,mmtmpU1,mmtmpU2,mmtmpU3;
#elif defined(__arm__)
int16x8_t *rxdataF128,*ul_ref128,*ul_ch128;
int32x4_t mmtmp0,mmtmp1,mmtmp_re,mmtmp_im;
#endif
Msc_RS = N_rb_alloc*12;
cyclic_shift = (frame_parms->pusch_config_common.ul_ReferenceSignalsPUSCH.cyclicShift +
......@@ -156,11 +160,18 @@ int32_t lte_ul_channel_estimation(PHY_VARS_eNB *phy_vars_eNB,
for (aa=0; aa<nb_antennas_rx; aa++) {
// msg("Componentwise prod aa %d, symbol_offset %d,ul_ch_estimates %p,ul_ch_estimates[aa] %p,ul_ref_sigs_rx[0][0][Msc_RS_idx] %p\n",aa,symbol_offset,ul_ch_estimates,ul_ch_estimates[aa],ul_ref_sigs_rx[0][0][Msc_RS_idx]);
#if defined(__x86_64__) || defined(__i386__)
rxdataF128 = (__m128i *)&rxdataF_ext[aa][symbol_offset];
ul_ch128 = (__m128i *)&ul_ch_estimates[aa][symbol_offset];
ul_ref128 = (__m128i *)ul_ref_sigs_rx[u][v][Msc_RS_idx];
#elif defined(__arm__)
rxdataF128 = (int16x8_t *)&rxdataF_ext[aa][symbol_offset];
ul_ch128 = (int16x8_t *)&ul_ch_estimates[aa][symbol_offset];
ul_ref128 = (int16x8_t *)ul_ref_sigs_rx[u][v][Msc_RS_idx];
#endif
for (i=0; i<Msc_RS/12; i++) {
#if defined(__x86_64__) || defined(__i386__)
// multiply by conjugated channel
mmtmpU0 = _mm_madd_epi16(ul_ref128[0],rxdataF128[0]);
// mmtmpU0 contains real part of 4 consecutive outputs (32-bit)
......@@ -204,7 +215,50 @@ int32_t lte_ul_channel_estimation(PHY_VARS_eNB *phy_vars_eNB,
mmtmpU3 = _mm_unpackhi_epi32(mmtmpU0,mmtmpU1);
ul_ch128[2] = _mm_packs_epi32(mmtmpU2,mmtmpU3);
#elif defined(__arm__)
mmtmp0 = vmull_s16(((int16x4_t*)ul_ref128)[0],((int16x4_t*)rxdataF128)[0]);
mmtmp1 = vmull_s16(((int16x4_t*)ul_ref128)[1],((int16x4_t*)rxdataF128)[1]);
mmtmp_re = vcombine_s32(vpadd_s32(vget_low_s32(mmtmp0),vget_high_s32(mmtmp0)),
vpadd_s32(vget_low_s32(mmtmp1),vget_high_s32(mmtmp1)));
mmtmp0 = vmull_s16(vrev32_s16(vmul_s16(((int16x4_t*)ul_ref128)[0],*(int16x4_t*)conjugate)), ((int16x4_t*)rxdataF128)[0]);
mmtmp1 = vmull_s16(vrev32_s16(vmul_s16(((int16x4_t*)ul_ref128)[1],*(int16x4_t*)conjugate)), ((int16x4_t*)rxdataF128)[1]);
mmtmp_im = vcombine_s32(vpadd_s32(vget_low_s32(mmtmp0),vget_high_s32(mmtmp0)),
vpadd_s32(vget_low_s32(mmtmp1),vget_high_s32(mmtmp1)));
ul_ch128[0] = vcombine_s16(vmovn_s32(mmtmp_re),vmovn_s32(mmtmp_im));
ul_ch128++;
ul_ref128++;
rxdataF128++;
mmtmp0 = vmull_s16(((int16x4_t*)ul_ref128)[0],((int16x4_t*)rxdataF128)[0]);
mmtmp1 = vmull_s16(((int16x4_t*)ul_ref128)[1],((int16x4_t*)rxdataF128)[1]);
mmtmp_re = vcombine_s32(vpadd_s32(vget_low_s32(mmtmp0),vget_high_s32(mmtmp0)),
vpadd_s32(vget_low_s32(mmtmp1),vget_high_s32(mmtmp1)));
mmtmp0 = vmull_s16(vrev32_s16(vmul_s16(((int16x4_t*)ul_ref128)[0],*(int16x4_t*)conjugate)), ((int16x4_t*)rxdataF128)[0]);
mmtmp1 = vmull_s16(vrev32_s16(vmul_s16(((int16x4_t*)ul_ref128)[1],*(int16x4_t*)conjugate)), ((int16x4_t*)rxdataF128)[1]);
mmtmp_im = vcombine_s32(vpadd_s32(vget_low_s32(mmtmp0),vget_high_s32(mmtmp0)),
vpadd_s32(vget_low_s32(mmtmp1),vget_high_s32(mmtmp1)));
ul_ch128[0] = vcombine_s16(vmovn_s32(mmtmp_re),vmovn_s32(mmtmp_im));
ul_ch128++;
ul_ref128++;
rxdataF128++;
mmtmp0 = vmull_s16(((int16x4_t*)ul_ref128)[0],((int16x4_t*)rxdataF128)[0]);
mmtmp1 = vmull_s16(((int16x4_t*)ul_ref128)[1],((int16x4_t*)rxdataF128)[1]);
mmtmp_re = vcombine_s32(vpadd_s32(vget_low_s32(mmtmp0),vget_high_s32(mmtmp0)),
vpadd_s32(vget_low_s32(mmtmp1),vget_high_s32(mmtmp1)));
mmtmp0 = vmull_s16(vrev32_s16(vmul_s16(((int16x4_t*)ul_ref128)[0],*(int16x4_t*)conjugate)), ((int16x4_t*)rxdataF128)[0]);
mmtmp1 = vmull_s16(vrev32_s16(vmul_s16(((int16x4_t*)ul_ref128)[1],*(int16x4_t*)conjugate)), ((int16x4_t*)rxdataF128)[1]);
mmtmp_im = vcombine_s32(vpadd_s32(vget_low_s32(mmtmp0),vget_high_s32(mmtmp0)),
vpadd_s32(vget_low_s32(mmtmp1),vget_high_s32(mmtmp1)));
ul_ch128[0] = vcombine_s16(vmovn_s32(mmtmp_re),vmovn_s32(mmtmp_im));
ul_ch128++;
ul_ref128++;
rxdataF128++;
#endif
ul_ch128+=3;
ul_ref128+=3;
rxdataF128+=3;
......@@ -538,17 +592,17 @@ int32_t lte_ul_channel_estimation(PHY_VARS_eNB *phy_vars_eNB,
// msg("sym: %d, current_phase1: %d, ru: %d + j%d, current_phase2: %d, ru: %d + j%d\n",k,current_phase1,ru1[2*current_phase1],ru1[2*current_phase1+1],current_phase2,ru2[2*current_phase2],ru2[2*current_phase2+1]);
// rotate channel estimates by estimated phase
rotate_cpx_vector_norep((int16_t*) ul_ch1,
&ru1[2*current_phase1],
(int16_t*) &ul_ch_estimates[aa][frame_parms->N_RB_UL*12*k],
Msc_RS,
15);
rotate_cpx_vector_norep((int16_t*) ul_ch2,
&ru2[2*current_phase2],
(int16_t*) &tmp_estimates[0],
Msc_RS,
15);
rotate_cpx_vector((int16_t*) ul_ch1,
&ru1[2*current_phase1],
(int16_t*) &ul_ch_estimates[aa][frame_parms->N_RB_UL*12*k],
Msc_RS,
15);
rotate_cpx_vector((int16_t*) ul_ch2,
&ru2[2*current_phase2],
(int16_t*) &tmp_estimates[0],
Msc_RS,
15);
// Combine the two rotated estimates
multadd_complex_vector_real_scalar((int16_t*) &ul_ch_estimates[aa][frame_parms->N_RB_UL*12*k],SCALE,(int16_t*) &ul_ch_estimates[aa][frame_parms->N_RB_UL*12*k],1,Msc_RS);
......@@ -664,11 +718,11 @@ int32_t lte_srs_channel_estimation(LTE_DL_FRAME_PARMS *frame_parms,
//write_output("eNb_rxF.m","rxF",&eNb_common_vars->rxdataF[0][aa][2*frame_parms->ofdm_symbol_size*symbol],2*(frame_parms->ofdm_symbol_size),2,1);
//write_output("eNb_srs.m","srs_eNb",eNb_common_vars->srs,(frame_parms->ofdm_symbol_size),1,1);
mult_cpx_vector_norep((int16_t*) &eNb_common_vars->rxdataF[eNb_id][aa][2*frame_parms->ofdm_symbol_size*symbol],
(int16_t*) eNb_srs_vars->srs,
(int16_t*) eNb_srs_vars->srs_ch_estimates[eNb_id][aa],
frame_parms->ofdm_symbol_size,
15);
mult_cpx_conj_vector((int16_t*) &eNb_common_vars->rxdataF[eNb_id][aa][2*frame_parms->ofdm_symbol_size*symbol],
(int16_t*) eNb_srs_vars->srs,
(int16_t*) eNb_srs_vars->srs_ch_estimates[eNb_id][aa],
frame_parms->ofdm_symbol_size,
15);
//msg("SRS channel estimation cmult out\n");
#ifdef USER_MODE
......@@ -695,6 +749,7 @@ int16_t lte_ul_freq_offset_estimation(LTE_DL_FRAME_PARMS *frame_parms,
uint16_t nb_rb)
{
#if defined(__x86_64__) || defined(__i386__)
int k, rb;
int a_idx = 64;
uint8_t conj_flag = 0;
......@@ -830,4 +885,7 @@ int16_t lte_ul_freq_offset_estimation(LTE_DL_FRAME_PARMS *frame_parms,
phase_idx = -phase_idx;
return(phase_idx);
#elif defined(__arm__)
return(0);
#endif
}
......@@ -94,9 +94,9 @@ int lte_dl_cell_spec_SS(PHY_VARS_eNB *phy_vars_eNB,
output[k] = qpsk[(phy_vars_eNB->lte_gold_table[Ns][l][mprime_dword]>>(2*mprime_qpsk_symb))&3];
//output[k] = (lte_gold_table[eNB_offset][Ns][l][mprime_dword]>>(2*mprime_qpsk_symb))&3;
#ifdef DEBUG_DL_CELL_SPEC
debug_msg("Ns %d, l %d, m %d,mprime_dword %d, mprime_qpsk_symbol %d\n",
msg("Ns %d, l %d, m %d,mprime_dword %d, mprime_qpsk_symbol %d\n",
Ns,l,m,mprime_dword,mprime_qpsk_symb);
debug_msg("index = %d (k %d)\n",(phy_vars_eNB->lte_gold_table[Ns][l][mprime_dword]>>(2*mprime_qpsk_symb))&3,k);
msg("index = %d (k %d)\n",(phy_vars_eNB->lte_gold_table[Ns][l][mprime_dword]>>(2*mprime_qpsk_symb))&3,k);
#endif
mprime++;
......
This diff is collapsed.
......@@ -139,7 +139,7 @@ typedef struct {
/// Concatenated "e"-sequences (for definition see 36-212 V8.6 2009-03, p.17-18)
uint8_t e[MAX_NUM_CHANNEL_BITS];
/// Turbo-code outputs (36-212 V8.6 2009-03, p.12
uint8_t d[MAX_NUM_DLSCH_SEGMENTS][(96+3+(3*6144))];
uint8_t *d[MAX_NUM_DLSCH_SEGMENTS];//[(96+3+(3*6144))];
/// Sub-block interleaver outputs (36-212 V8.6 2009-03, p.16-17)
uint8_t w[MAX_NUM_DLSCH_SEGMENTS][3*6144];
/// Number of code segments (for definition see 36-212 V8.6 2009-03, p.9)
......
......@@ -100,6 +100,10 @@ void free_eNB_dlsch(LTE_eNB_DLSCH_t *dlsch)
free16(dlsch->harq_processes[i]->c[r],((r==0)?8:0) + 3+768);
dlsch->harq_processes[i]->c[r] = NULL;
}
if (dlsch->harq_processes[i]->d[r]) {
free16(dlsch->harq_processes[i]->d[r],(96+3+(3*6144)));
dlsch->harq_processes[i]->d[r] = NULL;
}
}
free16(dlsch->harq_processes[i],sizeof(LTE_DL_eNB_HARQ_t));
......@@ -168,14 +172,20 @@ LTE_eNB_DLSCH_t *new_eNB_dlsch(unsigned char Kmimo,unsigned char Mdlharq,unsigne
if (abstraction_flag==0) {
for (r=0; r<MAX_NUM_DLSCH_SEGMENTS/bw_scaling; r++) {
// account for filler in first segment and CRCs for multiple segment case
dlsch->harq_processes[i]->c[r] = (unsigned char*)malloc16(((r==0)?8:0) + 3+ 768);
dlsch->harq_processes[i]->c[r] = (uint8_t*)malloc16(((r==0)?8:0) + 3+ 768);
dlsch->harq_processes[i]->d[r] = (uint8_t*)malloc16((96+3+(3*6144)));
if (dlsch->harq_processes[i]->c[r]) {
bzero(dlsch->harq_processes[i]->c[r],((r==0)?8:0) + 3+ 768);
} else {
msg("Can't get c\n");
exit_flag=2;
}
if (dlsch->harq_processes[i]->d[r]) {
bzero(dlsch->harq_processes[i]->d[r],(96+3+(3*6144)));
} else {
msg("Can't get d\n");
exit_flag=2;
}
}
}
} else {
......@@ -190,8 +200,10 @@ LTE_eNB_DLSCH_t *new_eNB_dlsch(unsigned char Kmimo,unsigned char Mdlharq,unsigne
if (abstraction_flag==0) {
for (j=0; j<96; j++)
for (r=0; r<MAX_NUM_DLSCH_SEGMENTS; r++)
for (r=0; r<MAX_NUM_DLSCH_SEGMENTS/bw_scaling; r++) {
// printf("dlsch->harq_processes[%d]->d[%d] %p\n",i,r,dlsch->harq_processes[i]->d[r]);
dlsch->harq_processes[i]->d[r][j] = LTE_NULL;
}
}
}
......
......@@ -262,7 +262,7 @@ int allocate_REs_in_RB(LTE_DL_FRAME_PARMS *frame_parms,
switch (mod_order0) {
case 2: //QPSK
//printf("%d(%d) : %d,%d => ",tti_offset,*jj,((int16_t*)&txdataF[0][tti_offset])[0],((int16_t*)&txdataF[0][tti_offset])[1]);
// printf("%d(%d) : %d,%d => ",tti_offset,*jj,((int16_t*)&txdataF[0][tti_offset])[0],((int16_t*)&txdataF[0][tti_offset])[1]);
for (aa=0; aa<frame_parms->nb_antennas_tx; aa++) {
((int16_t*)&txdataF[aa][tti_offset])[0] += (x0[*jj]==1) ? (-gain_lin_QPSK) : gain_lin_QPSK; //I //b_i
}
......@@ -275,7 +275,7 @@ int allocate_REs_in_RB(LTE_DL_FRAME_PARMS *frame_parms,
*jj = *jj + 1;
// printf("%d,%d\n",((int16_t*)&txdataF[0][tti_offset])[0],((int16_t*)&txdataF[0][tti_offset])[1]);
// printf("%d,%d\n",((int16_t*)&txdataF[0][tti_offset])[0],((int16_t*)&txdataF[0][tti_offset])[1]);
break;
case 4: //16QAM
......
This diff is collapsed.
This diff is collapsed.
......@@ -998,10 +998,7 @@ int32_t generate_prach( PHY_VARS_UE *phy_vars_ue, uint8_t eNB_id, uint8_t subfra
return signal_energy( (int*)prach, 256 );
}
__m128i mmtmpX0,mmtmpX1,mmtmpX2,mmtmpX3;
//__m128i mmtmpX0,mmtmpX1,mmtmpX2,mmtmpX3;
void rx_prach(PHY_VARS_eNB *phy_vars_eNB,uint8_t subframe,uint16_t *preamble_energy_list, uint16_t *preamble_delay_list, uint16_t Nf, uint8_t tdd_mapindex)
{
......
......@@ -48,7 +48,7 @@
* @{
*/
/** \fn free_eNB_dlsch(LTE_eNB_DLSCH_t *dlsch)
/** \fn free_eNB_dlsch(LTE_eNB_DLSCH_t *dlsch,unsigned char N_RB_DL)
\brief This function frees memory allocated for a particular DLSCH at eNB
@param dlsch Pointer to DLSCH to be removed
*/
......@@ -74,9 +74,7 @@ void free_ue_dlsch(LTE_UE_DLSCH_t *dlsch);
LTE_UE_DLSCH_t *new_ue_dlsch(uint8_t Kmimo,uint8_t Mdlharq,uint8_t max_turbo_iterations,uint8_t N_RB_DL, uint8_t abstraction_flag);
void free_eNB_dlsch(LTE_eNB_DLSCH_t *dlsch);
LTE_eNB_ULSCH_t *new_eNB_ulsch(uint8_t Mdlharq,uint8_t max_turbo_iterations,uint8_t N_RB_UL, uint8_t abstraction_flag);
void clean_eNb_ulsch(LTE_eNB_ULSCH_t *ulsch, uint8_t abstraction_flag);
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment