Commit ae44bc06 authored by laurent's avatar laurent

pbch displays in the scope

parent e9ac05d4
......@@ -266,123 +266,33 @@ void nr_pbch_channel_compensation(int **rxdataF_ext,
int **rxdataF_comp,
NR_DL_FRAME_PARMS *frame_parms,
uint32_t symbol,
uint8_t output_shift)
{
short conjugate[8]__attribute__((aligned(16))) = {-1,1,-1,1,-1,1,-1,1};
//short conjugate2[8]__attribute__((aligned(16))) = {1,-1,1,-1,1,-1,1,-1};
#if defined(__x86_64__) || defined(__i386__)
__m128i mmtmpP0,mmtmpP1,mmtmpP2,mmtmpP3;
#elif defined(__arm__)
int16x8_t mmtmpP0,mmtmpP1,mmtmpP2,mmtmpP3;
#endif
uint16_t nb_re=180;
uint8_t aarx;
#if defined(__x86_64__) || defined(__i386__)
__m128i *dl_ch128,*rxdataF128,*rxdataF_comp128;
#elif defined(__arm__)
#endif
uint8_t output_shift) {
const uint16_t nb_re=symbol == 2 ? 72 : 180;
AssertFatal((symbol > 0 && symbol < 4),
"symbol %d is illegal for PBCH DM-RS\n",
symbol);
if (symbol == 2) nb_re = 72;
// printf("comp: symbol %d : nb_re %d\n",symbol,nb_re);
for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
for (int aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
#if defined(__x86_64__) || defined(__i386__)
dl_ch128 = (__m128i *)&dl_ch_estimates_ext[aarx][symbol*20*12];
rxdataF128 = (__m128i *)&rxdataF_ext[aarx][symbol*20*12];
rxdataF_comp128 = (__m128i *)&rxdataF_comp[aarx][symbol*20*12];
vect128 *dl_ch128 = (vect128 *)&dl_ch_estimates_ext[aarx][symbol*20*12];
vect128 *rxdataF128 = (vect128 *)&rxdataF_ext[aarx][symbol*20*12];
vect128 *rxdataF_comp128 = (vect128 *)&rxdataF_comp[aarx][symbol*20*12];
/*
printf("ch compensation dl_ch ext addr %p \n", &dl_ch_estimates_ext[aarx][symbol*20*12]);
printf("rxdataf ext addr %p symbol %d\n", &rxdataF_ext[aarx][symbol*20*12], symbol);
printf("rxdataf_comp addr %p\n",&rxdataF_comp[aarx][symbol*20*12]);
*/
#elif defined(__arm__)
// to be filled in
#endif
for (int re=0; re<nb_re; re+=12) {
// printf("******re %d\n",re);
#if defined(__x86_64__) || defined(__i386__)
// multiply by conjugated channel
mmtmpP0 = _mm_madd_epi16(dl_ch128[0],rxdataF128[0]);
// print_ints("re",&mmtmpP0);
// mmtmpP0 contains real part of 4 consecutive outputs (32-bit)
mmtmpP1 = _mm_shufflelo_epi16(dl_ch128[0],_MM_SHUFFLE(2,3,0,1));
mmtmpP1 = _mm_shufflehi_epi16(mmtmpP1,_MM_SHUFFLE(2,3,0,1));
mmtmpP1 = _mm_sign_epi16(mmtmpP1,*(__m128i*)&conjugate[0]);
// print_ints("im",&mmtmpP1);
mmtmpP1 = _mm_madd_epi16(mmtmpP1,rxdataF128[0]);
// mmtmpP1 contains imag part of 4 consecutive outputs (32-bit)
mmtmpP0 = _mm_srai_epi32(mmtmpP0,output_shift);
// print_ints("re(shift)",&mmtmpP0);
mmtmpP1 = _mm_srai_epi32(mmtmpP1,output_shift);
// print_ints("im(shift)",&mmtmpP1);
mmtmpP2 = _mm_unpacklo_epi32(mmtmpP0,mmtmpP1);
mmtmpP3 = _mm_unpackhi_epi32(mmtmpP0,mmtmpP1);
// print_ints("c0",&mmtmpP2);
// print_ints("c1",&mmtmpP3);
rxdataF_comp128[0] = _mm_packs_epi32(mmtmpP2,mmtmpP3);
/*
print_shorts("rx:",rxdataF128);
print_shorts("ch:",dl_ch128);
print_shorts("pack:",rxdataF_comp128);
*/
// multiply by conjugated channel
mmtmpP0 = _mm_madd_epi16(dl_ch128[1],rxdataF128[1]);
// mmtmpP0 contains real part of 4 consecutive outputs (32-bit)
mmtmpP1 = _mm_shufflelo_epi16(dl_ch128[1],_MM_SHUFFLE(2,3,0,1));
mmtmpP1 = _mm_shufflehi_epi16(mmtmpP1,_MM_SHUFFLE(2,3,0,1));
mmtmpP1 = _mm_sign_epi16(mmtmpP1,*(__m128i*)&conjugate[0]);
mmtmpP1 = _mm_madd_epi16(mmtmpP1,rxdataF128[1]);
// mmtmpP1 contains imag part of 4 consecutive outputs (32-bit)
mmtmpP0 = _mm_srai_epi32(mmtmpP0,output_shift);
mmtmpP1 = _mm_srai_epi32(mmtmpP1,output_shift);
mmtmpP2 = _mm_unpacklo_epi32(mmtmpP0,mmtmpP1);
mmtmpP3 = _mm_unpackhi_epi32(mmtmpP0,mmtmpP1);
rxdataF_comp128[1] = _mm_packs_epi32(mmtmpP2,mmtmpP3);
// print_shorts("rx:",rxdataF128+1);
// print_shorts("ch:",dl_ch128+1);
// print_shorts("pack:",rxdataF_comp128+1);
// multiply by conjugated channel
mmtmpP0 = _mm_madd_epi16(dl_ch128[2],rxdataF128[2]);
// mmtmpP0 contains real part of 4 consecutive outputs (32-bit)
mmtmpP1 = _mm_shufflelo_epi16(dl_ch128[2],_MM_SHUFFLE(2,3,0,1));
mmtmpP1 = _mm_shufflehi_epi16(mmtmpP1,_MM_SHUFFLE(2,3,0,1));
mmtmpP1 = _mm_sign_epi16(mmtmpP1,*(__m128i*)&conjugate[0]);
mmtmpP1 = _mm_madd_epi16(mmtmpP1,rxdataF128[2]);
// mmtmpP1 contains imag part of 4 consecutive outputs (32-bit)
mmtmpP0 = _mm_srai_epi32(mmtmpP0,output_shift);
mmtmpP1 = _mm_srai_epi32(mmtmpP1,output_shift);
mmtmpP2 = _mm_unpacklo_epi32(mmtmpP0,mmtmpP1);
mmtmpP3 = _mm_unpackhi_epi32(mmtmpP0,mmtmpP1);
rxdataF_comp128[2] = _mm_packs_epi32(mmtmpP2,mmtmpP3);
// print_shorts("rx:",rxdataF128+2);
// print_shorts("ch:",dl_ch128+2);
// print_shorts("pack:",rxdataF_comp128+2);
dl_ch128+=3;
rxdataF128+=3;
rxdataF_comp128+=3;
#elif defined(__arm__)
// to be filled in
#endif
*rxdataF_comp128++ = mulByConjugate128(rxdataF128++, dl_ch128++, output_shift);
*rxdataF_comp128++ = mulByConjugate128(rxdataF128++, dl_ch128++, output_shift);
*rxdataF_comp128++ = mulByConjugate128(rxdataF128++, dl_ch128++, output_shift);
}
}
#if defined(__x86_64__) || defined(__i386__)
_mm_empty();
_m_empty();
#endif
}
void nr_pbch_detection_mrc(NR_DL_FRAME_PARMS *frame_parms,
......
This diff is collapsed.
......@@ -78,8 +78,8 @@
#include "defs_RU.h"
#define RX_NB_TH_MAX 1
#define RX_NB_TH 1
#define RX_NB_TH_MAX 2
#define RX_NB_TH 2
#define LTE_SLOTS_PER_SUBFRAME 2
......
......@@ -26,11 +26,11 @@
* The host CPU needs to have support for SSE2 at least. SSE3 and SSE4.1 functions are emulated if the CPU lacks support for them.
* This will slow down the softmodem, but may be valuable if only offline signal processing is required.
*
* \author S. Held
* \email sebastian.held@imst.de
* \company IMST GmbH
* \date 2015
* \version 0.1
* \author S. Held, Laurent THOMAS
* \email sebastian.held@imst.de, laurent.thomas@open-cells.com
* \company IMST GmbH, Open Cells Project
* \date 2019
* \version 0.2
*/
#ifndef SSE_INTRIN_H
......@@ -40,23 +40,23 @@
#if defined(__x86_64) || defined(__i386__)
#ifndef __SSE2__
# error SSE2 processor intrinsics disabled
# error SSE2 processor intrinsics disabled
#endif
#include <emmintrin.h>
#include <xmmintrin.h>
#ifdef __SSE3__
# include <pmmintrin.h>
# include <tmmintrin.h>
#include <pmmintrin.h>
#include <tmmintrin.h>
#endif
#ifdef __SSE4_1__
# include <smmintrin.h>
#include <smmintrin.h>
#endif
#ifdef __AVX2__
# include <immintrin.h>
#include <immintrin.h>
#endif
// ------------------------------------------------
......@@ -108,8 +108,7 @@ typedef union {
* \date 2006-2008
* \copyright Apache License 2.0
*/
static inline __m128i ssp_comge_epi8_SSE2(__m128i a, __m128i b)
{
static inline __m128i ssp_comge_epi8_SSE2(__m128i a, __m128i b) {
__m128i c;
c = _mm_cmpgt_epi8( a, b );
a = _mm_cmpeq_epi8( a, b );
......@@ -126,13 +125,11 @@ static inline __m128i ssp_comge_epi8_SSE2(__m128i a, __m128i b)
* \date 2006-2008
* \copyright Apache License 2.0
*/
static inline __m128i ssp_shuffle_epi8_SSE2 (__m128i a, __m128i mask)
{
static inline __m128i ssp_shuffle_epi8_SSE2 (__m128i a, __m128i mask) {
ssp_m128 A,B, MASK, maskZero;
A.i = a;
maskZero.i = ssp_comge_epi8_SSE2( mask, _mm_setzero_si128() );
MASK.i = _mm_and_si128 ( mask, _mm_set1_epi8( (char)0x0F) );
B.s8[ 0] = A.s8[ (MASK.s8[ 0]) ];
B.s8[ 1] = A.s8[ (MASK.s8[ 1]) ];
B.s8[ 2] = A.s8[ (MASK.s8[ 2]) ];
......@@ -149,7 +146,6 @@ static inline __m128i ssp_shuffle_epi8_SSE2 (__m128i a, __m128i mask)
B.s8[13] = A.s8[ (MASK.s8[13]) ];
B.s8[14] = A.s8[ (MASK.s8[14]) ];
B.s8[15] = A.s8[ (MASK.s8[15]) ];
B.i = _mm_and_si128( B.i, maskZero.i );
return B.i;
}
......@@ -182,8 +178,7 @@ static inline __m128i ssp_shuffle_epi8_SSE2 (__m128i a, __m128i mask)
* \date 2006-2008
* \copyright Apache License 2.0
*/
static inline __m128i ssp_insert_epi8_SSE2( __m128i a, int b, const int ndx )
{
static inline __m128i ssp_insert_epi8_SSE2( __m128i a, int b, const int ndx ) {
ssp_m128 Ahi, Alo;
b = b & 0xFF; /* Convert to 8-bit integer */
Ahi.i = _mm_unpackhi_epi8( a, _mm_setzero_si128() ); /* Ahi = a_8[8:15] Simulate 8bit integers as 16-bit integers */
......@@ -191,72 +186,71 @@ static inline __m128i ssp_insert_epi8_SSE2( __m128i a, int b, const int ndx )
/* Insert b as a 16-bit integer to upper or lower half of a */
switch( ndx & 0xF ) {
case 0:
Alo.i = _mm_insert_epi16( Alo.i, b, 0 );
break;
case 0:
Alo.i = _mm_insert_epi16( Alo.i, b, 0 );
break;
case 1:
Alo.i = _mm_insert_epi16( Alo.i, b, 1 );
break;
case 1:
Alo.i = _mm_insert_epi16( Alo.i, b, 1 );
break;
case 2:
Alo.i = _mm_insert_epi16( Alo.i, b, 2 );
break;
case 2:
Alo.i = _mm_insert_epi16( Alo.i, b, 2 );
break;
case 3:
Alo.i = _mm_insert_epi16( Alo.i, b, 3 );
break;
case 3:
Alo.i = _mm_insert_epi16( Alo.i, b, 3 );
break;
case 4:
Alo.i = _mm_insert_epi16( Alo.i, b, 4 );
break;
case 4:
Alo.i = _mm_insert_epi16( Alo.i, b, 4 );
break;
case 5:
Alo.i = _mm_insert_epi16( Alo.i, b, 5 );
break;
case 5:
Alo.i = _mm_insert_epi16( Alo.i, b, 5 );
break;
case 6:
Alo.i = _mm_insert_epi16( Alo.i, b, 6 );
break;
case 6:
Alo.i = _mm_insert_epi16( Alo.i, b, 6 );
break;
case 7:
Alo.i = _mm_insert_epi16( Alo.i, b, 7 );
break;
case 7:
Alo.i = _mm_insert_epi16( Alo.i, b, 7 );
break;
case 8:
Ahi.i = _mm_insert_epi16( Ahi.i, b, 0 );
break;
case 8:
Ahi.i = _mm_insert_epi16( Ahi.i, b, 0 );
break;
case 9:
Ahi.i = _mm_insert_epi16( Ahi.i, b, 1 );
break;
case 9:
Ahi.i = _mm_insert_epi16( Ahi.i, b, 1 );
break;
case 10:
Ahi.i = _mm_insert_epi16( Ahi.i, b, 2 );
break;
case 10:
Ahi.i = _mm_insert_epi16( Ahi.i, b, 2 );
break;
case 11:
Ahi.i = _mm_insert_epi16( Ahi.i, b, 3 );
break;
case 11:
Ahi.i = _mm_insert_epi16( Ahi.i, b, 3 );
break;
case 12:
Ahi.i = _mm_insert_epi16( Ahi.i, b, 4 );
break;
case 12:
Ahi.i = _mm_insert_epi16( Ahi.i, b, 4 );
break;
case 13:
Ahi.i = _mm_insert_epi16( Ahi.i, b, 5 );
break;
case 13:
Ahi.i = _mm_insert_epi16( Ahi.i, b, 5 );
break;
case 14:
Ahi.i = _mm_insert_epi16( Ahi.i, b, 6 );
break;
case 14:
Ahi.i = _mm_insert_epi16( Ahi.i, b, 6 );
break;
default:
Ahi.i = _mm_insert_epi16( Ahi.i, b, 7 );
default:
Ahi.i = _mm_insert_epi16( Ahi.i, b, 7 );
}
return _mm_packus_epi16( Alo.i, Ahi.i ); // Pack the 16-bit integers to 8bit again.
///* Another implementation, but slower: */
//ssp_m128 A, B, mask;
//mask.i = _mm_setzero_si128();
......@@ -277,16 +271,13 @@ static inline __m128i ssp_insert_epi8_SSE2( __m128i a, int b, const int ndx )
* \date 2006-2008
* \copyright Apache License 2.0
*/
static inline __m128i ssp_cvtepi8_epi16_SSE2 ( __m128i a)
{
static inline __m128i ssp_cvtepi8_epi16_SSE2 ( __m128i a) {
__m128i b = _mm_setzero_si128 ();
__m128i c = _mm_unpacklo_epi8(a, b);
__m128i d = _mm_set1_epi16 (128);
b = _mm_and_si128(d, c);
d = _mm_set1_epi16(0x1FE);
b = _mm_mullo_epi16(b, d);
return _mm_add_epi16(c, b);
}
......@@ -299,8 +290,7 @@ static inline __m128i ssp_cvtepi8_epi16_SSE2 ( __m128i a)
* \date 2006-2008
* \copyright Apache License 2.0
*/
static inline __m128i ssp_logical_bitwise_select_SSE2( __m128i a, __m128i b, __m128i mask ) // Bitwise (mask ? a : b)
{
static inline __m128i ssp_logical_bitwise_select_SSE2( __m128i a, __m128i b, __m128i mask ) { // Bitwise (mask ? a : b)
a = _mm_and_si128 ( a, mask ); // clear a where mask = 0
b = _mm_andnot_si128( mask, b ); // clear b where mask = 1
a = _mm_or_si128 ( a, b ); // a = a OR b
......@@ -316,8 +306,7 @@ static inline __m128i ssp_logical_bitwise_select_SSE2( __m128i a, __m128i b, __m
* \date 2006-2008
* \copyright Apache License 2.0
*/
static inline __m128i ssp_max_epi8_SSE2( __m128i a, __m128i b )
{
static inline __m128i ssp_max_epi8_SSE2( __m128i a, __m128i b ) {
__m128i mask = _mm_cmpgt_epi8( a, b ); // FFFFFFFF where a > b
a = ssp_logical_bitwise_select_SSE2( a, b, mask );
return a;
......@@ -332,15 +321,12 @@ static inline __m128i ssp_max_epi8_SSE2( __m128i a, __m128i b )
* \date 2006-2008
* \copyright Apache License 2.0
*/
static inline __m128i ssp_cvtepi16_epi32_SSE2 ( __m128i a)
{
static inline __m128i ssp_cvtepi16_epi32_SSE2 ( __m128i a) {
__m128i b = _mm_set1_epi32 (-1); //0xFFFFFFFF
__m128i c = _mm_unpacklo_epi16(a, b); //FFFFa0**FFFFa1**....
__m128i d = _mm_set1_epi32 (0x8000); //0x8000
b = _mm_andnot_si128(c, d); // 0x80 for positive, 0x00 for negative
d = _mm_slli_epi32(b, 1); // 0x100 for positive, 0x000 for negative
return _mm_add_epi32(c, d);
}
#endif // __SSE4_1__
......@@ -350,5 +336,46 @@ static inline __m128i ssp_cvtepi16_epi32_SSE2 ( __m128i a)
#endif // x86_64 || i386
#endif // SSE_INTRIN_H
#if defined(__x86_64__) || defined(__i386__)
#define vect128 __m128i
#elif defined(__arm__)
#define vect128 int16x8_t
#endif
static const short minusConjug128[8]__attribute__((aligned(16))) = {-1,1,-1,1,-1,1,-1,1};
static inline vect128 mulByConjugate128(vect128 *a, vect128 *b, int8_t output_shift) {
#if defined(__x86_64__) || defined(__i386__)
vect128 realPart = _mm_madd_epi16(*a,*b);
realPart = _mm_srai_epi32(realPart,output_shift);
vect128 imagPart = _mm_shufflelo_epi16(*b,_MM_SHUFFLE(2,3,0,1));
imagPart = _mm_shufflehi_epi16(imagPart,_MM_SHUFFLE(2,3,0,1));
imagPart = _mm_sign_epi16(imagPart,*(vect128 *)minusConjug128);
imagPart = _mm_madd_epi16(imagPart,*a);
imagPart = _mm_srai_epi32(imagPart,output_shift);
vect128 lowPart = _mm_unpacklo_epi32(realPart,imagPart);
vect128 highPart = _mm_unpackhi_epi32(realPart,imagPart);
return ( _mm_packs_epi32(lowPart,highPart));
#elif defined(__arm__)
AssertFatal(false, "not developped\n");
#endif
}
#if defined(__x86_64__) || defined(__i386__)
#define displaySamples128(vect) {\
__m128i x=vect; \
printf("vector: %s = (%hd,%hd) (%hd,%hd) (%hd,%hd) (%hd,%hd)\n", #vect, \
_mm_extract_epi16(x,0), \
_mm_extract_epi16(x,1),\
_mm_extract_epi16(x,2),\
_mm_extract_epi16(x,3),\
_mm_extract_epi16(x,4),\
_mm_extract_epi16(x,5),\
_mm_extract_epi16(x,6),\
_mm_extract_epi16(x,7));\
}
#elif defined(__arm__)
displaySamples128(vect) {}
//TBD
#endif
#endif // SSE_INTRIN_H
......@@ -4,7 +4,9 @@ It replaces a actual RF board driver.
As much as possible, it works like a RF board, but not in realtime: it can run faster than realtime if there is enough CPU or slower (it is CPU bound instead of real time RF sampling bound)
#build
#build
## From build_oai
You can build it the same way, and together with actual RF driver
Example:
......@@ -18,10 +20,19 @@ It is also possible to build actual RF and use choose on each run:
Will build both the eNB (lte-softmodem) and the UE (lte-uesoftmodem)
We recommend to use the option --ue-nas-use-tun that is much simpler to use than the OAI kernel driver.
#usage
## Add the rfsimulator after initial build
After any regular build, you can compile the driver
```bash
cd <the_compilation_dir_from_bouild_oai_script>/build
make rfsimulator
```
Then, you can use it freely
# Usage
Setting the env variable RFSIMULATOR enables the RF board simulator
It should the set to "enb" in the eNB
## 4G case
For the UE, it should be set to the IP address of the eNB
example:
```bash
......@@ -33,6 +44,23 @@ If you reach 'RA not active' on UE, be careful to generate a valid SIM
```bash
$OPENAIR_DIR/targets/bin/conf2uedata -c $OPENAIR_DIR/openair3/NAS/TOOLS/ue_eurecom_test_sfr.conf -o .
```
## 5G case
After regular build, add the simulation driver
```bash
cd ran_build/build
make rfsimulator
```
### Launch gNB in one window
```bash
sudo RFSIMULATOR=enb ./nr-softmodem -O ../../../targets/PROJECTS/GENERIC-LTE-EPC/CONF/gnb.band78.tm1.106PRB.usrpn300.conf --parallel-config PARALLEL_SINGLE_THREAD
```
### Launch UE in another window
```bash
sudo RFSIMULATOR=127.0.0.1 ./nr-uesoftmodem --numerology 1 -r 106 -C 3510000000
```
Of course, set the gNB machine IP address if the UE and the gNB are not on the same machine
In UE, you can add "-d" to get the softscope
#Caveacts
Still issues in power control: txgain, rxgain are not used
......
......@@ -997,8 +997,9 @@ void *UE_thread(void *arg) {
UE->rx_offset < 10*UE->frame_parms.samples_per_subframe )
UE->rx_offset_diff = 1;
LOG_D(PHY,"AbsSubframe %d.%d TTI SET rx_off_diff to %d rx_offset %d \n",proc->frame_rx,subframe_nr,UE->rx_offset_diff,UE->rx_offset);
LOG_E(PHY,"AbsSubframe %d.%d TTI SET rx_off_diff to %d rx_offset %d \n",proc->frame_rx,subframe_nr,UE->rx_offset_diff,UE->rx_offset);
if ( getenv("RFSIMULATOR") != 0)
UE->rx_offset_diff=0;
readBlockSize=UE->frame_parms.samples_per_subframe -
UE->frame_parms.ofdm_symbol_size -
UE->frame_parms.nb_prefix_samples0 -
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment