Commit 562ce288 authored by Raymond Knopp's avatar Raymond Knopp

corrections for ARM NEON port. IFFT/FFTs are validated (512/1024/2048), 7.5...

corrections for ARM NEON port. IFFT/FFTs are validated (512/1024/2048), 7.5 kHz frequency-shift for eNodeB RX, UE TX validated. UL channel estimation ok for DMRS symbols, interpolation is broken on NEON.
parent 48b95afc
......@@ -173,6 +173,11 @@ int32_t temp_in_ifft_0[2048*2] __attribute__((aligned(32)));
mmtmpU3 = _mm_unpackhi_epi32(mmtmpU0,mmtmpU1);
ul_ch128[2] = _mm_packs_epi32(mmtmpU2,mmtmpU3);
ul_ch128+=3;
ul_ref128+=3;
rxdataF128+=3;
#elif defined(__arm__) || defined(__aarch64__)
mmtmp0 = vmull_s16(((int16x4_t*)ul_ref128)[0],((int16x4_t*)rxdataF128)[0]);
mmtmp1 = vmull_s16(((int16x4_t*)ul_ref128)[1],((int16x4_t*)rxdataF128)[1]);
......@@ -183,7 +188,7 @@ int32_t temp_in_ifft_0[2048*2] __attribute__((aligned(32)));
mmtmp_im = vcombine_s32(vpadd_s32(vget_low_s32(mmtmp0),vget_high_s32(mmtmp0)),
vpadd_s32(vget_low_s32(mmtmp1),vget_high_s32(mmtmp1)));
ul_ch128[0] = vcombine_s16(vmovn_s32(mmtmp_re),vmovn_s32(mmtmp_im));
((int16x4x2_t*)ul_ch128)[0] = vzip_s16(vshrn_n_s32(mmtmp_re,15),vshrn_n_s32(mmtmp_im,15));
ul_ch128++;
ul_ref128++;
rxdataF128++;
......@@ -196,7 +201,7 @@ int32_t temp_in_ifft_0[2048*2] __attribute__((aligned(32)));
mmtmp_im = vcombine_s32(vpadd_s32(vget_low_s32(mmtmp0),vget_high_s32(mmtmp0)),
vpadd_s32(vget_low_s32(mmtmp1),vget_high_s32(mmtmp1)));
ul_ch128[0] = vcombine_s16(vmovn_s32(mmtmp_re),vmovn_s32(mmtmp_im));
((int16x4x2_t*)ul_ch128)[0] = vzip_s16(vshrn_n_s32(mmtmp_re,15),vshrn_n_s32(mmtmp_im,15));
ul_ch128++;
ul_ref128++;
rxdataF128++;
......@@ -210,16 +215,13 @@ int32_t temp_in_ifft_0[2048*2] __attribute__((aligned(32)));
mmtmp_im = vcombine_s32(vpadd_s32(vget_low_s32(mmtmp0),vget_high_s32(mmtmp0)),
vpadd_s32(vget_low_s32(mmtmp1),vget_high_s32(mmtmp1)));
ul_ch128[0] = vcombine_s16(vmovn_s32(mmtmp_re),vmovn_s32(mmtmp_im));
((int16x4x2_t*)ul_ch128)[0] = vzip_s16(vshrn_n_s32(mmtmp_re,15),vshrn_n_s32(mmtmp_im,15));
ul_ch128++;
ul_ref128++;
rxdataF128++;
#endif
ul_ch128+=3;
ul_ref128+=3;
rxdataF128+=3;
}
alpha_ind = 0;
......
......@@ -21,7 +21,7 @@
#include "PHY/defs.h"
#include "defs.h"
//#define DEBUG_FEP
#define DEBUG_FEP
#define SOFFSET 0
......@@ -240,6 +240,7 @@ int slot_fep(PHY_VARS_UE *ue,
#ifdef DEBUG_FEP
printf("slot_fep: done\n");
#endif
return(0);
}
......
......@@ -30,6 +30,9 @@ short conjugate75[8]__attribute__((aligned(16))) = {-1,1,-1,1,-1,1,-1,1} ;
short conjugate75_2[8]__attribute__((aligned(16))) = {1,-1,1,-1,1,-1,1,-1} ;
short negate[8]__attribute__((aligned(16))) = {-1,-1,-1,-1,-1,-1,-1,-1};
#define print_shorts(s,x) printf("%s : %d %d %d %d %d %d %d %d\n",s,((int16_t*)x)[0],((int16_t*)x)[1],((int16_t*)x)[2],((int16_t*)x)[3],((int16_t*)x)[4],((int16_t*)x)[5],((int16_t*)x)[6],((int16_t*)x)[7])
#define print_dw(s,x) printf("%s : %d %d %d %d\n",s,((int32_t*)x)[0],((int32_t*)x)[1],((int32_t*)x)[2],((int32_t*)x)[3])
void apply_7_5_kHz(PHY_VARS_UE *ue,int32_t*txdata,uint8_t slot)
{
......@@ -90,7 +93,6 @@ void apply_7_5_kHz(PHY_VARS_UE *ue,int32_t*txdata,uint8_t slot)
kHz7_5ptr128 = (int16x8_t*)kHz7_5ptr;
#endif
// apply 7.5 kHz
for (i=0; i<(len>>2); i++) {
#if defined(__x86_64__) || defined(__i386__)
mmtmp_re = _mm_madd_epi16(*txptr128,*kHz7_5ptr128);
......@@ -117,15 +119,34 @@ void apply_7_5_kHz(PHY_VARS_UE *ue,int32_t*txdata,uint8_t slot)
vpadd_s32(vget_low_s32(mmtmp1),vget_high_s32(mmtmp1)));
//mmtmp_re = [Re(ch[0])Re(rx[0])+Im(ch[0])Im(ch[0]) Re(ch[1])Re(rx[1])+Im(ch[1])Im(ch[1]) Re(ch[2])Re(rx[2])+Im(ch[2])Im(ch[2]) Re(ch[3])Re(rx[3])+Im(ch[3])Im(ch[3])]
mmtmp0 = vmull_s16(vrev32_s16(vmul_s16(((int16x4_t*)txptr128)[0],*(int16x4_t*)conjugate75_2)),((int16x4_t*)kHz7_5ptr128)[0]);
/*if (i<4) {
print_shorts("txp:",txptr128);
print_shorts("kHz75:",kHz7_5ptr128);
print_dw("mmtmp0",&mmtmp0);
print_dw("mmtmp1",&mmtmp1);
}*/
mmtmp0 = vmull_s16(((int16x4_t*)txptr128)[0],
vrev32_s16(vmul_s16(((int16x4_t*)kHz7_5ptr128)[0],
*(int16x4_t*)conjugate75_2)));
//mmtmp0 = [-Im(ch[0])Re(rx[0]) Re(ch[0])Im(rx[0]) -Im(ch[1])Re(rx[1]) Re(ch[1])Im(rx[1])]
mmtmp1 = vmull_s16(vrev32_s16(vmul_s16(((int16x4_t*)txptr128)[1],*(int16x4_t*)conjugate75_2)), ((int16x4_t*)kHz7_5ptr128)[1]);
mmtmp1 = vmull_s16(((int16x4_t*)txptr128)[1],
vrev32_s16(vmul_s16(((int16x4_t*)kHz7_5ptr128)[1],
*(int16x4_t*)conjugate75_2)));
//mmtmp0 = [-Im(ch[2])Re(rx[2]) Re(ch[2])Im(rx[2]) -Im(ch[3])Re(rx[3]) Re(ch[3])Im(rx[3])]
mmtmp_im = vcombine_s32(vpadd_s32(vget_low_s32(mmtmp0),vget_high_s32(mmtmp0)),
vpadd_s32(vget_low_s32(mmtmp1),vget_high_s32(mmtmp1)));
//mmtmp_im = [-Im(ch[0])Re(rx[0])+Re(ch[0])Im(rx[0]) -Im(ch[1])Re(rx[1])+Re(ch[1])Im(rx[1]) -Im(ch[2])Re(rx[2])+Re(ch[2])Im(rx[2]) -Im(ch[3])Re(rx[3])+Re(ch[3])Im(rx[3])]
txptr128[0] = vcombine_s16(vmovn_s32(mmtmp_re),vmovn_s32(mmtmp_im));
((int16x4x2_t*)txptr128)[0] = vzip_s16(vshrn_n_s32(mmtmp_re,15),vshrn_n_s32(mmtmp_im,15));
/*if (i<4) {
print_dw("mmtmp_re",&mmtmp_re);
print_dw("mmtmp_im",&mmtmp_im);
print_shorts("txp",txptr128);
}*/
txptr128++;
kHz7_5ptr128++;
#endif
......@@ -238,15 +259,21 @@ void remove_7_5_kHz(RU_t *ru,uint8_t slot)
vpadd_s32(vget_low_s32(mmtmp1),vget_high_s32(mmtmp1)));
//mmtmp_re = [Re(ch[0])Re(rx[0])+Im(ch[0])Im(ch[0]) Re(ch[1])Re(rx[1])+Im(ch[1])Im(ch[1]) Re(ch[2])Re(rx[2])+Im(ch[2])Im(ch[2]) Re(ch[3])Re(rx[3])+Im(ch[3])Im(ch[3])]
mmtmp0 = vmull_s16(vrev32_s16(vmul_s16(((int16x4_t*)rxptr128)[0],*(int16x4_t*)conjugate75_2)), ((int16x4_t*)kHz7_5ptr128)[0]);
mmtmp0 = vmull_s16(((int16x4_t*)rxptr128)[0],
vrev32_s16(vmul_s16(((int16x4_t*)kHz7_5ptr128)[0],
*(int16x4_t*)conjugate75_2)));
//mmtmp0 = [-Im(ch[0])Re(rx[0]) Re(ch[0])Im(rx[0]) -Im(ch[1])Re(rx[1]) Re(ch[1])Im(rx[1])]
mmtmp1 = vmull_s16(vrev32_s16(vmul_s16(((int16x4_t*)rxptr128)[1],*(int16x4_t*)conjugate75_2)), ((int16x4_t*)kHz7_5ptr128)[1]);
mmtmp1 = vmull_s16(((int16x4_t*)rxptr128)[1],
vrev32_s16(vmul_s16(((int16x4_t*)kHz7_5ptr128)[1],
*(int16x4_t*)conjugate75_2)));
//mmtmp1 = [-Im(ch[2])Re(rx[2]) Re(ch[2])Im(rx[2]) -Im(ch[3])Re(rx[3]) Re(ch[3])Im(rx[3])]
mmtmp_im = vcombine_s32(vpadd_s32(vget_low_s32(mmtmp0),vget_high_s32(mmtmp0)),
vpadd_s32(vget_low_s32(mmtmp1),vget_high_s32(mmtmp1)));
//mmtmp_im = [-Im(ch[0])Re(rx[0])+Re(ch[0])Im(rx[0]) -Im(ch[1])Re(rx[1])+Re(ch[1])Im(rx[1]) -Im(ch[2])Re(rx[2])+Re(ch[2])Im(rx[2]) -Im(ch[3])Re(rx[3])+Re(ch[3])Im(rx[3])]
rxptr128_7_5kHz[0] = vcombine_s16(vmovn_s32(mmtmp_re),vmovn_s32(mmtmp_im));
((int16x4x2_t*)rxptr128_7_5kHz)[0] = vzip_s16(vshrn_n_s32(mmtmp_re,15),vshrn_n_s32(mmtmp_im,15));
rxptr128_7_5kHz++;
rxptr128++;
kHz7_5ptr128++;
......
......@@ -684,9 +684,12 @@ static inline void bfly2_16(int16x8_t *x0, int16x8_t *x1, int16x8_t *y0, int16x8
static inline void bfly2_16(int16x8_t *x0, int16x8_t *x1, int16x8_t *y0, int16x8_t *y1, int16x8_t *tw, int16x8_t *twb)
{
int16x8_t x1t;
*y0 = vqaddq_s16(*x0,*x1);
*y1 = vqsubq_s16(*x0,*x1);
x1t = packed_cmult2(*(x1),*(tw),*(twb));
*y0 = vqaddq_s16(*x0,x1t);
*y1 = vqsubq_s16(*x0,x1t);
}
#endif
......@@ -18918,7 +18921,7 @@ int main(int argc, char**argv)
((int16_t*)x)[i] = (int16_t)((taus()&0xffff))>>5;
}
memset((void*)&y[0],0,16*4);
idft16((int16_t *)x,(int16_t *)y);
dft16((int16_t *)x,(int16_t *)y);
printf("\n\n16-point\n");
printf("X: ");
for (i=0;i<4;i++)
......@@ -19105,7 +19108,7 @@ int main(int argc, char**argv)
reset_meas(&ts);
for (i=0; i<10000; i++) {
start_meas(&ts);
idft512((int16_t *)x,(int16_t *)y,1);
dft512((int16_t *)x,(int16_t *)y,1);
stop_meas(&ts);
}
......@@ -19140,7 +19143,7 @@ int main(int argc, char**argv)
for (i=0; i<10000; i++) {
start_meas(&ts);
idft1024((int16_t *)x,(int16_t *)y,1);
dft1024((int16_t *)x,(int16_t *)y,1);
stop_meas(&ts);
}
......@@ -19165,7 +19168,7 @@ int main(int argc, char**argv)
for (i=0; i<10000; i++) {
start_meas(&ts);
idft2048((int16_t *)x,(int16_t *)y,1);
dft2048((int16_t *)x,(int16_t *)y,1);
stop_meas(&ts);
}
......@@ -1308,6 +1308,7 @@ void ulsch_common_procedures(PHY_VARS_UE *ue, UE_rxtx_proc_t *proc, uint8_t empt
&ue->frame_parms);
}
#if defined(EXMIMO) || defined(OAI_USRP) || defined(OAI_BLADERF) || defined(OAI_LMSSDR)
apply_7_5_kHz(ue,dummy_tx_buffer,0);
apply_7_5_kHz(ue,dummy_tx_buffer,1);
......
......@@ -1667,9 +1667,9 @@ int main(int argc, char **argv)
write_output("txsig0.m","txs0", &ru->common.txdata[0][subframe* eNB->frame_parms.samples_per_tti], eNB->frame_parms.samples_per_tti,1,1);
if (transmission_mode<7) {
write_output("txsigF0.m","txsF0x", &ru->common.txdataF_BF[0][subframe*nsymb*eNB->frame_parms.ofdm_symbol_size],nsymb*eNB->frame_parms.ofdm_symbol_size,1,1);
write_output("txsigF0.m","txsF0x", &ru->common.txdataF_BF[0][0],nsymb*eNB->frame_parms.ofdm_symbol_size,1,1);
} else if (transmission_mode == 7) {
write_output("txsigF0.m","txsF0", &ru->common.txdataF_BF[5][subframe*nsymb*eNB->frame_parms.ofdm_symbol_size],nsymb*eNB->frame_parms.ofdm_symbol_size,1,1);
write_output("txsigF0.m","txsF0", &ru->common.txdataF_BF[5][0],nsymb*eNB->frame_parms.ofdm_symbol_size,1,1);
write_output("txsigF0_BF.m","txsF0_BF", &ru->common.txdataF_BF[0][0],eNB->frame_parms.ofdm_symbol_size,1,1);
}
}
......@@ -1730,6 +1730,7 @@ int main(int argc, char **argv)
phy_procedures_UE_RX(UE,proc,0,0,dci_flag,normal_txrx,no_relay,NULL);
dci_received = dci_received - UE->pdcch_vars[UE->current_thread_id[proc->subframe_rx]][eNB_id]->dci_received;
if (dci_flag && (dci_received == 0)) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment