Commit 0b985591 authored by Thomas Schlichter's avatar Thomas Schlichter

small optimization in usrp_lib.cpp

parent 286d9a0f
...@@ -483,20 +483,21 @@ static int trx_usrp_write(openair0_device *device, ...@@ -483,20 +483,21 @@ static int trx_usrp_write(openair0_device *device,
// bring RX data into 12 LSBs for softmodem RX // bring RX data into 12 LSBs for softmodem RX
for (int i=0; i<cc; i++) { for (int i=0; i<cc; i++) {
for (int j=0; j<nsamps2; j++) {
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
if ((((uintptr_t) buff[i])&0x1F)==0) { if ((((uintptr_t) buff[i])&0x1F)==0) {
for (int j=0; j<nsamps2; j++) {
buff_tx[i][j] = simde_mm256_slli_epi16(((__m256i *)buff[i])[j],4); buff_tx[i][j] = simde_mm256_slli_epi16(((__m256i *)buff[i])[j],4);
} }
else } else {
{ for (int j=0; j<nsamps2; j++) {
__m256i tmp = simde_mm256_loadu_si256(((__m256i *)buff[i])+j); __m256i tmp = simde_mm256_loadu_si256(((__m256i *)buff[i])+j);
buff_tx[i][j] = simde_mm256_slli_epi16(tmp,4); buff_tx[i][j] = simde_mm256_slli_epi16(tmp,4);
} }
}
#elif defined(__arm__) || defined(__aarch64__) #elif defined(__arm__) || defined(__aarch64__)
for (int j=0; j<nsamps2; j++)
buff_tx[i][j] = vshlq_n_s16(((int16x8_t *)buff[i])[j],4); buff_tx[i][j] = vshlq_n_s16(((int16x8_t *)buff[i])[j],4);
#endif #endif
}
} }
s->tx_md.has_time_spec = true; s->tx_md.has_time_spec = true;
...@@ -621,20 +622,21 @@ void *trx_usrp_write_thread(void * arg){ ...@@ -621,20 +622,21 @@ void *trx_usrp_write_thread(void * arg){
// bring RX data into 12 LSBs for softmodem RX // bring RX data into 12 LSBs for softmodem RX
for (int i=0; i<cc; i++) { for (int i=0; i<cc; i++) {
for (int j=0; j<nsamps2; j++) { #if defined(__x86_64__) || defined(__i386__)
#if defined(__x86_64__) || defined(__i386__) if ((((uintptr_t) buff[i])&0x1F)==0) {
if ((((uintptr_t) buff[i])&0x1F)==0) { for (int j=0; j<nsamps2; j++) {
buff_tx[i][j] = simde_mm256_slli_epi16(((__m256i *)buff[i])[j],4); buff_tx[i][j] = simde_mm256_slli_epi16(((__m256i *)buff[i])[j],4);
} }
else } else {
{ for (int j=0; j<nsamps2; j++) {
__m256i tmp = simde_mm256_loadu_si256(((__m256i *)buff[i])+j); __m256i tmp = simde_mm256_loadu_si256(((__m256i *)buff[i])+j);
buff_tx[i][j] = simde_mm256_slli_epi16(tmp,4); buff_tx[i][j] = simde_mm256_slli_epi16(tmp,4);
} }
#elif defined(__arm__) || defined(__aarch64__)
buff_tx[i][j] = vshlq_n_s16(((int16x8_t *)buff[i])[j],4);
#endif
} }
#elif defined(__arm__) || defined(__aarch64__)
for (int j=0; j<nsamps2; j++)
buff_tx[i][j] = vshlq_n_s16(((int16x8_t *)buff[i])[j],4);
#endif
} }
...@@ -772,20 +774,21 @@ static int trx_usrp_read(openair0_device *device, openair0_timestamp *ptimestamp ...@@ -772,20 +774,21 @@ static int trx_usrp_read(openair0_device *device, openair0_timestamp *ptimestamp
// bring RX data into 12 LSBs for softmodem RX // bring RX data into 12 LSBs for softmodem RX
for (int i=0; i<cc; i++) { for (int i=0; i<cc; i++) {
for (int j=0; j<nsamps2; j++) {
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
// FK: in some cases the buffer might not be 32 byte aligned, so we cannot use avx2 // FK: in some cases the buffer might not be 32 byte aligned, so we cannot use avx2
if ((((uintptr_t) buff[i])&0x1F)==0) {
if ((((uintptr_t) buff[i])&0x1F)==0) { for (int j=0; j<nsamps2; j++) {
((__m256i *)buff[i])[j] = simde_mm256_srai_epi16(buff_tmp[i][j],rxshift); ((__m256i *)buff[i])[j] = simde_mm256_srai_epi16(buff_tmp[i][j],rxshift);
} else { }
} else {
for (int j=0; j<nsamps2; j++) {
__m256i tmp = simde_mm256_srai_epi16(buff_tmp[i][j],rxshift); __m256i tmp = simde_mm256_srai_epi16(buff_tmp[i][j],rxshift);
simde_mm256_storeu_si256(((__m256i *)buff[i])+j, tmp); simde_mm256_storeu_si256(((__m256i *)buff[i])+j, tmp);
} }
} }
#elif defined(__arm__) || defined(__aarch64__) #elif defined(__arm__) || defined(__aarch64__)
for (int j=0; j<nsamps2; j++) for (int j=0; j<nsamps2; j++)
((int16x8_t *)buff[i])[j] = vshrq_n_s16(buff_tmp[i][j],rxshift); ((int16x8_t *)buff[i])[j] = vshrq_n_s16(buff_tmp[i][j],rxshift);
#endif #endif
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment