Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
O
OpenXG-RAN
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
canghaiwuhen
OpenXG-RAN
Commits
dd8cb984
Commit
dd8cb984
authored
May 19, 2020
by
Raymond Knopp
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added support in gNB and UE for long PRACH sequences. Tested format 0 with unitary simulation only.
parent
b5b78125
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
893 additions
and
48 deletions
+893
-48
openair1/PHY/INIT/nr_init_ru.c
openair1/PHY/INIT/nr_init_ru.c
+2
-2
openair1/PHY/NR_TRANSPORT/nr_prach.c
openair1/PHY/NR_TRANSPORT/nr_prach.c
+6
-5
openair1/PHY/NR_UE_TRANSPORT/nr_prach.c
openair1/PHY/NR_UE_TRANSPORT/nr_prach.c
+4
-3
openair1/PHY/TOOLS/Makefile
openair1/PHY/TOOLS/Makefile
+10
-10
openair1/PHY/TOOLS/oai_dfts.c
openair1/PHY/TOOLS/oai_dfts.c
+840
-10
openair1/SIMULATION/NR_PHY/prachsim.c
openair1/SIMULATION/NR_PHY/prachsim.c
+28
-17
openair1/SIMULATION/TOOLS/taus.c
openair1/SIMULATION/TOOLS/taus.c
+2
-1
openair2/LAYER2/NR_MAC_COMMON/nr_mac_common.c
openair2/LAYER2/NR_MAC_COMMON/nr_mac_common.c
+1
-0
No files found.
openair1/PHY/INIT/nr_init_ru.c
View file @
dd8cb984
...
...
@@ -113,8 +113,8 @@ int nr_phy_init_RU(RU_t *ru) {
ru
->
prach_rxsigF
=
(
int16_t
**
)
malloc
(
ru
->
nb_rx
*
sizeof
(
int16_t
*
));
for
(
i
=
0
;
i
<
ru
->
nb_rx
;
i
++
)
{
//
for preamble format 1 and 2, more memory should be allocated
ru
->
prach_rxsigF
[
i
]
=
(
int16_t
*
)
malloc16_clear
(
fp
->
ofdm_symbol_size
*
12
*
(
1
<<
mu
)
*
2
*
sizeof
(
int16_t
)
);
//
largest size for PRACH FFT is 4x98304 (16*24576)
ru
->
prach_rxsigF
[
i
]
=
(
int16_t
*
)
malloc16_clear
(
4
*
98304
*
2
*
sizeof
(
int16_t
)
);
LOG_D
(
PHY
,
"[INIT] prach_vars->rxsigF[%d] = %p
\n
"
,
i
,
ru
->
prach_rxsigF
[
i
]);
}
...
...
openair1/PHY/NR_TRANSPORT/nr_prach.c
View file @
dd8cb984
...
...
@@ -283,6 +283,7 @@ void rx_nr_prach_ru(RU_t *ru,
if
(
prachFormat
==
0
||
prachFormat
==
1
||
prachFormat
==
2
)
{
dftlen
=
49152
;
dft
(
DFT_49152
,
prach2
,
rxsigF
[
aa
],
1
);
LOG_M
(
"prach_rxsigF.m"
,
"prach_rxF0"
,
rxsigF
[
aa
],
49152
,
1
,
1
);
}
if
(
prachFormat
==
1
||
prachFormat
==
2
)
{
dft
(
DFT_49152
,
prach2
+
98304
,
rxsigF
[
aa
]
+
98304
,
1
);
...
...
@@ -503,6 +504,7 @@ void rx_nr_prach_ru(RU_t *ru,
}
//Coherent combining of PRACH repetitions (assumes channel does not change, to be revisted for "long" PRACH)
LOG_D
(
PHY
,
"Doing PRACH combining of %d reptitions N_ZC %d
\n
"
,
reps
,
N_ZC
);
int16_t
rxsigF_tmp
[
N_ZC
<<
1
];
// if (k+N_ZC > dftlen) { // PRACH signal is split around DC
int16_t
*
rxsigF2
=
rxsigF
[
aa
];
...
...
@@ -583,8 +585,6 @@ void rx_nr_prach(PHY_VARS_gNB *gNB,
restricted_set
=
cfg
->
restricted_set_config
.
value
;
AssertFatal
(
prach_sequence_length
==
1
,
"no support yet for long prachSequenceLength
\n
"
);
uint8_t
prach_fmt
=
prach_pdu
->
prach_format
;
uint16_t
N_ZC
=
(
prach_sequence_length
==
0
)
?
839
:
139
;
...
...
@@ -624,7 +624,7 @@ void rx_nr_prach(PHY_VARS_gNB *gNB,
if
(
LOG_DEBUGFLAG
(
PRACH
)){
int
en
=
dB_fixed
(
signal_energy
((
int32_t
*
)
&
rxsigF
[
0
][
0
],(
N_ZC
==
839
)
?
840
:
140
));
if
(
en
>
60
)
LOG_
I
(
PHY
,
"frame %d, subframe %d : Trying preamble %d
\n
"
,
frame
,
subframe
,
preamble_index
);
if
(
en
>
60
)
LOG_
D
(
PHY
,
"frame %d, subframe %d : Trying preamble %d
\n
"
,
frame
,
subframe
,
preamble_index
);
}
if
(
restricted_set
==
0
)
{
// This is the relative offset in the root sequence table (5.7.2-4 from 36.211) for the given preamble index
...
...
@@ -750,10 +750,10 @@ void rx_nr_prach(PHY_VARS_gNB *gNB,
prach_ifft
[
i
]
+=
((
int32_t
)
prach_ifft_tmp
[
i
<<
1
]
*
(
int32_t
)
prach_ifft_tmp
[(
i
<<
1
)]
+
(
int32_t
)
prach_ifft_tmp
[
1
+
(
i
<<
1
)]
*
(
int32_t
)
prach_ifft_tmp
[
1
+
(
i
<<
1
)])
>>
10
;
}
if
(
LOG_DUMPFLAG
(
PRACH
))
{
//
if (LOG_DUMPFLAG(PRACH)) {
if
(
aa
==
0
)
LOG_M
(
"prach_rxF_comp0.m"
,
"prach_rxF_comp0"
,
prachF
,
1024
,
1
,
1
);
if
(
aa
==
1
)
LOG_M
(
"prach_rxF_comp1.m"
,
"prach_rxF_comp1"
,
prachF
,
1024
,
1
,
1
);
}
//
}
}
// antennas_rx
}
// new dft
...
...
@@ -766,6 +766,7 @@ void rx_nr_prach(PHY_VARS_gNB *gNB,
lev
=
(
int32_t
)
prach_ifft
[(
preamble_shift2
+
i
)];
levdB
=
dB_fixed_times10
(
lev
);
if
(
levdB
>*
max_preamble_energy
)
{
LOG_D
(
PHY
,
"preamble_index %d, delay %d en %d dB > %d dB
\n
"
,
preamble_index
,
i
,
levdB
,
*
max_preamble_energy
);
*
max_preamble_energy
=
levdB
;
*
max_preamble_delay
=
i
;
// Note: This has to be normalized to the 30.72 Ms/s sampling rate
*
max_preamble
=
preamble_index
;
...
...
openair1/PHY/NR_UE_TRANSPORT/nr_prach.c
View file @
dd8cb984
...
...
@@ -60,6 +60,7 @@ extern int64_t table_6_3_3_2_4_prachConfig_Index [256][10];
extern
uint16_t
nr_du
[
838
];
extern
int16_t
nr_ru
[
2
*
839
];
extern
const
char
*
prachfmt
[
9
];
extern
const
char
*
prachfmt03
[
4
];
// Note:
// - prach_fmt_id is an ID used to map to the corresponding PRACH format value in prachfmt
...
...
@@ -212,7 +213,7 @@ int32_t generate_nr_prach(PHY_VARS_NR_UE *ue, uint8_t gNB_id, uint8_t slot){
LOG_I
(
PHY
,
"PRACH [UE %d] generate PRACH for RootSeqIndex %d, Preamble Index %d, PRACH Format %s, NCS %d (N_ZC %d): Preamble_offset %d, Preamble_shift %d
\n
"
,
Mod_id
,
rootSequenceIndex
,
preamble_index
,
prachfmt
[
prach_fmt_id
],
prach_sequence_length
==
0
?
prachfmt03
[
prach_fmt_id
]
:
prachfmt
[
prach_fmt_id
],
NCS
,
N_ZC
,
preamble_offset
,
...
...
openair1/PHY/TOOLS/Makefile
View file @
dd8cb984
lte_dfts_sse4
:
lte
_dfts.c
gcc-7
-O3
-std
=
gnu99
-msse4
.1
-o
lte_dfts_sse4 lte_dfts.c time_meas.c ../../SIMULATION/TOOLS/taus.c
-I
$$
OPENAIR_HOME
-I
$$
OPENAIR1_DIR
-I
$$
OPENAIR_TARGETS
-I
$$
OPENAIR2_DIR
-I
$$
OPENAIR2_DIR/COMMON
-I
$$
OPENAIR_HOME/common/utils
-I
$$
OPENAIR_HOME/common/utils/T
-DMR_MAIN
-DNB_ANTENNAS_RX
=
1
-lm
-lpthread
# -DD256STATS #-DD64STATS
oai_dfts_sse4
:
oai
_dfts.c
gcc-7
-O3
-std
=
gnu99
-msse4
.1
-o
oai_dfts_sse4 oai_dfts.c time_meas.c ../../SIMULATION/TOOLS/taus.c
-I
$$
OPENAIR_HOME
-I
$$
OPENAIR1_DIR
-I
$$
OPENAIR_TARGETS
-I
$$
OPENAIR_TARGETS/COMMON
-I
$$
OPENAIR_TARGETS/ARCH/COMMON
-I
$$
OPENAIR2_DIR
-I
$$
OPENAIR2_DIR/COMMON
-I
$$
OPENAIR_HOME/common/utils
-I
$$
OPENAIR_HOME/common/utils/T
-I
$$
OPENAIR_HOME/common/utils/msc
-I
$$
OPENAIR_HOME/nfapi/open-nFAPI/nfapi/public_inc
-DMR_MAIN
-DNB_ANTENNAS_RX
=
1
-lm
-lpthread
# -DD256STATS #-DD64STATS
lte_dfts_avx2
:
lte
_dfts.c
gcc
-O2
-std
=
gnu99
-mavx2
-g
-ggdb
-o
lte_dfts_avx2 lte_dfts.c time_meas.c ../../SIMULATION/TOOLS/taus.c
$$
OPENAIR_HOME/common/utils/backtrace.c
-I
$$
OPENAIR_HOME
-I
$$
OPENAIR1_DIR
-I
$$
OPENAIR_TARGETS
-I
$$
OPENAIR2_DIR
-I
$$
OPENAIR2_DIR/COMMON
-I
$$
OPENAIR_HOME/common/utils
-I
$$
OPENAIR_HOME/common/utils/T
-DMR_MAIN
-DNB_ANTENNAS_RX
=
1
-lm
-lpthread
# -DD256STATS #-DD64STATS
oai_dfts_avx2
:
oai
_dfts.c
gcc
-O2
-std
=
gnu99
-mavx2
-g
-ggdb
-o
oai_dfts_avx2 oai_dfts.c time_meas.c ../../SIMULATION/TOOLS/taus.c
$$
OPENAIR_HOME/common/utils/backtrace.c
-I
$$
OPENAIR_HOME
-I
$$
OPENAIR1_DIR
-I
$$
OPENAIR_TARGETS
-I
$$
OPENAIR_TARGETS/COMMON
-I
$$
OPENAIR_TARGETS/ARCH/COMMON
-I
$$
OPENAIR2_DIR
-I
$$
OPENAIR2_DIR/COMMON
-I
$$
OPENAIR_HOME/common/utils
-I
$$
OPENAIR_HOME/common/utils/T
-I
$$
OPENAIR_HOME/common/utils/msc
-I
$$
OPENAIR_HOME/nfapi/open-nFAPI/nfapi/public_inc
-DMR_MAIN
-DNB_ANTENNAS_RX
=
1
-lm
-lpthread
# -DD256STATS #-DD64STATS
lte_dfts_avx2.s
:
lte
_dfts.c
gcc
-O2
-std
=
gnu99
-mavx2
-S
lte_dfts.c time_meas.c ../../SIMULATION/TOOLS/taus.c
$$
OPENAIR_HOME/common/utils/backtrace.c
-I
$$
OPENAIR_HOME
-I
$$
OPENAIR1_DIR
-I
$$
OPENAIR_TARGETS
-I
$$
OPENAIR2_DIR
-I
$$
OPENAIR2_DIR/COMMON
-I
$$
OPENAIR_HOME/common/utils
-I
$$
OPENAIR_HOME/common/utils/T
-DMR_MAIN
-DNB_ANTENNAS_RX
=
1
-lm
-lpthread
# -DD256STATS #-DD64STATS
oai_dfts_avx2.s
:
oai
_dfts.c
gcc
-O2
-std
=
gnu99
-mavx2
-S
oai_dfts.c time_meas.c ../../SIMULATION/TOOLS/taus.c
$$
OPENAIR_HOME/common/utils/backtrace.c
-I
$$
OPENAIR_HOME
-I
$$
OPENAIR1_DIR
-I
$$
OPENAIR_TARGETS
-I
$$
OPENAIR_TARGETS/COMMON
-I
$$
OPENAIR_TARGETS/ARCH/COMMON
-I
$$
OPENAIR2_DIR
-I
$$
OPENAIR2_DIR/COMMON
-I
$$
OPENAIR_HOME/common/utils
-I
$$
OPENAIR_HOME/common/utils/T
-I
$$
OPENAIR_HOME/common/utils/msc
-I
$$
OPENAIR_HOME/nfapi/open-nFAPI/nfapi/public_inc
-DMR_MAIN
-DNB_ANTENNAS_RX
=
1
-lm
-lpthread
# -DD256STATS #-DD64STATS
lte_dfts_sse4.s
:
lte
_dfts.c
gcc
-O2
-std
=
gnu99
-msse4
.1
-S
lte_dfts.c time_meas.c ../../SIMULATION/TOOLS/taus.c
$$
OPENAIR_HOME/common/utils/backtrace.c
-I
$$
OPENAIR_HOME
-I
$$
OPENAIR1_DIR
-I
$$
OPENAIR_TARGETS
-I
$$
OPENAIR2_DIR
-I
$$
OPENAIR2_DIR/COMMON
-I
$$
OPENAIR_HOME/common/utils
-I
$$
OPENAIR_HOME/common/utils/T
-DMR_MAIN
-DNB_ANTENNAS_RX
=
1
-lm
-lpthread
# -DD256STATS #-DD64STATS
oai_dfts_sse4.s
:
oai
_dfts.c
gcc
-O2
-std
=
gnu99
-msse4
.1
-S
oai_dfts.c time_meas.c ../../SIMULATION/TOOLS/taus.c
$$
OPENAIR_HOME/common/utils/backtrace.c
-I
$$
OPENAIR_HOME
-I
$$
OPENAIR1_DIR
-I
$$
OPENAIR_TARGETS
-I
$$
OPENAIR_TARGETS/COMMON
-I
$$
OPENAIR_TARGETS/ARCH/COMMON
-I
$$
OPENAIR2_DIR
-I
$$
OPENAIR2_DIR/COMMON
-I
$$
OPENAIR_HOME/common/utils
-I
$$
OPENAIR_HOME/common/utils/T
-I
$$
OPENAIR_HOME/common/utils/msc
-I
$$
OPENAIR_HOME/nfapi/open-nFAPI/nfapi/public_inc
-DMR_MAIN
-DNB_ANTENNAS_RX
=
1
-lm
-lpthread
# -DD256STATS #-DD64STATS
dft_cycles_avx2
:
lte
_dfts_avx2
./
lte
_dfts_avx2 | egrep cycles
dft_cycles_avx2
:
oai
_dfts_avx2
./
oai
_dfts_avx2 | egrep cycles
openair1/PHY/TOOLS/oai_dfts.c
View file @
dd8cb984
...
...
@@ -5353,6 +5353,607 @@ void idft8192(int16_t *x,int16_t *y,unsigned char scale)
#endif
int16_t
tw16384
[
3
*
2
*
4096
];
#ifndef __AVX2__
void
dft16384
(
int16_t
*
x
,
int16_t
*
y
,
unsigned
char
scale
)
{
simd_q15_t
xtmp
[
4096
],
ytmp
[
4096
],
*
tw16384_128p
=
(
simd_q15_t
*
)
tw16384
,
*
x128
=
(
simd_q15_t
*
)
x
,
*
y128
=
(
simd_q15_t
*
)
y
,
*
y128p
=
(
simd_q15_t
*
)
y
;
simd_q15_t
*
ytmpp
=
&
ytmp
[
0
];
int
i
,
j
;
for
(
i
=
0
,
j
=
0
;
i
<
4096
;
i
+=
4
,
j
++
)
{
transpose16_ooff
(
x128
+
i
,
xtmp
+
j
,
1024
);
}
dft4096
((
int16_t
*
)(
xtmp
),(
int16_t
*
)(
ytmp
),
1
);
dft4096
((
int16_t
*
)(
xtmp
+
1024
),(
int16_t
*
)(
ytmp
+
1024
),
1
);
dft4096
((
int16_t
*
)(
xtmp
+
2048
),(
int16_t
*
)(
ytmp
+
2048
),
1
);
dft4096
((
int16_t
*
)(
xtmp
+
3072
),(
int16_t
*
)(
ytmp
+
3072
),
1
);
for
(
i
=
0
;
i
<
1024
;
i
++
)
{
bfly4
(
ytmpp
,
ytmpp
+
1024
,
ytmpp
+
2048
,
ytmpp
+
3072
,
y128p
,
y128p
+
1024
,
y128p
+
2048
,
y128p
+
3072
,
tw16384_128p
,
tw16384_128p
+
1024
,
tw16384_128p
+
2048
);
tw16384_128p
++
;
y128p
++
;
ytmpp
++
;
}
if
(
scale
>
0
)
{
for
(
i
=
0
;
i
<
256
;
i
++
)
{
y128
[
0
]
=
shiftright_int16
(
y128
[
0
],
1
);
y128
[
1
]
=
shiftright_int16
(
y128
[
1
],
1
);
y128
[
2
]
=
shiftright_int16
(
y128
[
2
],
1
);
y128
[
3
]
=
shiftright_int16
(
y128
[
3
],
1
);
y128
[
4
]
=
shiftright_int16
(
y128
[
4
],
1
);
y128
[
5
]
=
shiftright_int16
(
y128
[
5
],
1
);
y128
[
6
]
=
shiftright_int16
(
y128
[
6
],
1
);
y128
[
7
]
=
shiftright_int16
(
y128
[
7
],
1
);
y128
[
8
]
=
shiftright_int16
(
y128
[
8
],
1
);
y128
[
9
]
=
shiftright_int16
(
y128
[
9
],
1
);
y128
[
10
]
=
shiftright_int16
(
y128
[
10
],
1
);
y128
[
11
]
=
shiftright_int16
(
y128
[
11
],
1
);
y128
[
12
]
=
shiftright_int16
(
y128
[
12
],
1
);
y128
[
13
]
=
shiftright_int16
(
y128
[
13
],
1
);
y128
[
14
]
=
shiftright_int16
(
y128
[
14
],
1
);
y128
[
15
]
=
shiftright_int16
(
y128
[
15
],
1
);
y128
+=
16
;
}
}
_mm_empty
();
_m_empty
();
}
void
idft16384
(
int16_t
*
x
,
int16_t
*
y
,
unsigned
char
scale
)
{
simd_q15_t
xtmp
[
4096
],
ytmp
[
4096
],
*
tw16384_128p
=
(
simd_q15_t
*
)
tw16384
,
*
x128
=
(
simd_q15_t
*
)
x
,
*
y128
=
(
simd_q15_t
*
)
y
,
*
y128p
=
(
simd_q15_t
*
)
y
;
simd_q15_t
*
ytmpp
=
&
ytmp
[
0
];
int
i
,
j
;
for
(
i
=
0
,
j
=
0
;
i
<
4096
;
i
+=
4
,
j
++
)
{
transpose16_ooff
(
x128
+
i
,
xtmp
+
j
,
1024
);
}
idft4096
((
int16_t
*
)(
xtmp
),(
int16_t
*
)(
ytmp
),
1
);
idft4096
((
int16_t
*
)(
xtmp
+
1024
),(
int16_t
*
)(
ytmp
+
1024
),
1
);
idft4096
((
int16_t
*
)(
xtmp
+
2048
),(
int16_t
*
)(
ytmp
+
2048
),
1
);
idft4096
((
int16_t
*
)(
xtmp
+
3072
),(
int16_t
*
)(
ytmp
+
3072
),
1
);
for
(
i
=
0
;
i
<
1024
;
i
++
)
{
ibfly4
(
ytmpp
,
ytmpp
+
1024
,
ytmpp
+
2048
,
ytmpp
+
3072
,
y128p
,
y128p
+
1024
,
y128p
+
2048
,
y128p
+
3072
,
tw16384_128p
,
tw16384_128p
+
1024
,
tw16384_128p
+
2048
);
tw16384_128p
++
;
y128p
++
;
ytmpp
++
;
}
if
(
scale
>
0
)
{
for
(
i
=
0
;
i
<
256
;
i
++
)
{
y128
[
0
]
=
shiftright_int16
(
y128
[
0
],
scale
);
y128
[
1
]
=
shiftright_int16
(
y128
[
1
],
scale
);
y128
[
2
]
=
shiftright_int16
(
y128
[
2
],
scale
);
y128
[
3
]
=
shiftright_int16
(
y128
[
3
],
scale
);
y128
[
4
]
=
shiftright_int16
(
y128
[
4
],
scale
);
y128
[
5
]
=
shiftright_int16
(
y128
[
5
],
scale
);
y128
[
6
]
=
shiftright_int16
(
y128
[
6
],
scale
);
y128
[
7
]
=
shiftright_int16
(
y128
[
7
],
scale
);
y128
[
8
]
=
shiftright_int16
(
y128
[
8
],
scale
);
y128
[
9
]
=
shiftright_int16
(
y128
[
9
],
scale
);
y128
[
10
]
=
shiftright_int16
(
y128
[
10
],
scale
);
y128
[
11
]
=
shiftright_int16
(
y128
[
11
],
scale
);
y128
[
12
]
=
shiftright_int16
(
y128
[
12
],
scale
);
y128
[
13
]
=
shiftright_int16
(
y128
[
13
],
scale
);
y128
[
14
]
=
shiftright_int16
(
y128
[
14
],
scale
);
y128
[
15
]
=
shiftright_int16
(
y128
[
15
],
scale
);
y128
+=
16
;
}
}
_mm_empty
();
_m_empty
();
}
#else //__AVX2__
void
dft16384
(
int16_t
*
x
,
int16_t
*
y
,
unsigned
char
scale
)
{
simd256_q15_t
xtmp
[
2048
],
ytmp
[
2048
],
*
tw16384_256p
=
(
simd256_q15_t
*
)
tw16384
,
*
x256
=
(
simd256_q15_t
*
)
x
,
*
y256
=
(
simd256_q15_t
*
)
y
,
*
y256p
=
(
simd256_q15_t
*
)
y
;
simd256_q15_t
*
ytmpp
=
&
ytmp
[
0
];
int
i
,
j
;
for
(
i
=
0
,
j
=
0
;
i
<
2048
;
i
+=
4
,
j
++
)
{
transpose16_ooff_simd256
(
x256
+
i
,
xtmp
+
j
,
512
);
}
dft4096
((
int16_t
*
)(
xtmp
),(
int16_t
*
)(
ytmp
),
1
);
dft4096
((
int16_t
*
)(
xtmp
+
512
),(
int16_t
*
)(
ytmp
+
512
),
1
);
dft4096
((
int16_t
*
)(
xtmp
+
1024
),(
int16_t
*
)(
ytmp
+
1024
),
1
);
dft4096
((
int16_t
*
)(
xtmp
+
1536
),(
int16_t
*
)(
ytmp
+
1536
),
1
);
for
(
i
=
0
;
i
<
512
;
i
++
)
{
bfly4_256
(
ytmpp
,
ytmpp
+
512
,
ytmpp
+
1024
,
ytmpp
+
1536
,
y256p
,
y256p
+
512
,
y256p
+
1024
,
y256p
+
1536
,
tw16384_256p
,
tw16384_256p
+
512
,
tw16384_256p
+
1024
);
tw16384_256p
++
;
y256p
++
;
ytmpp
++
;
}
if
(
scale
>
0
)
{
for
(
i
=
0
;
i
<
128
;
i
++
)
{
y256
[
0
]
=
shiftright_int16_simd256
(
y256
[
0
],
1
);
y256
[
1
]
=
shiftright_int16_simd256
(
y256
[
1
],
1
);
y256
[
2
]
=
shiftright_int16_simd256
(
y256
[
2
],
1
);
y256
[
3
]
=
shiftright_int16_simd256
(
y256
[
3
],
1
);
y256
[
4
]
=
shiftright_int16_simd256
(
y256
[
4
],
1
);
y256
[
5
]
=
shiftright_int16_simd256
(
y256
[
5
],
1
);
y256
[
6
]
=
shiftright_int16_simd256
(
y256
[
6
],
1
);
y256
[
7
]
=
shiftright_int16_simd256
(
y256
[
7
],
1
);
y256
[
8
]
=
shiftright_int16_simd256
(
y256
[
8
],
1
);
y256
[
9
]
=
shiftright_int16_simd256
(
y256
[
9
],
1
);
y256
[
10
]
=
shiftright_int16_simd256
(
y256
[
10
],
1
);
y256
[
11
]
=
shiftright_int16_simd256
(
y256
[
11
],
1
);
y256
[
12
]
=
shiftright_int16_simd256
(
y256
[
12
],
1
);
y256
[
13
]
=
shiftright_int16_simd256
(
y256
[
13
],
1
);
y256
[
14
]
=
shiftright_int16_simd256
(
y256
[
14
],
1
);
y256
[
15
]
=
shiftright_int16_simd256
(
y256
[
15
],
1
);
y256
+=
16
;
}
}
_mm_empty
();
_m_empty
();
}
void
idft16384
(
int16_t
*
x
,
int16_t
*
y
,
unsigned
char
scale
)
{
simd256_q15_t
xtmp
[
2048
],
ytmp
[
2048
],
*
tw16384_256p
=
(
simd256_q15_t
*
)
tw16384
,
*
x256
=
(
simd256_q15_t
*
)
x
,
*
y256
=
(
simd256_q15_t
*
)
y
,
*
y256p
=
(
simd256_q15_t
*
)
y
;
simd256_q15_t
*
ytmpp
=
&
ytmp
[
0
];
int
i
,
j
;
for
(
i
=
0
,
j
=
0
;
i
<
2048
;
i
+=
4
,
j
++
)
{
transpose16_ooff_simd256
(
x256
+
i
,
xtmp
+
j
,
512
);
}
idft4096
((
int16_t
*
)(
xtmp
),(
int16_t
*
)(
ytmp
),
1
);
idft4096
((
int16_t
*
)(
xtmp
+
512
),(
int16_t
*
)(
ytmp
+
512
),
1
);
idft4096
((
int16_t
*
)(
xtmp
+
1024
),(
int16_t
*
)(
ytmp
+
1024
),
1
);
idft4096
((
int16_t
*
)(
xtmp
+
1536
),(
int16_t
*
)(
ytmp
+
1536
),
1
);
for
(
i
=
0
;
i
<
512
;
i
++
)
{
ibfly4_256
(
ytmpp
,
ytmpp
+
512
,
ytmpp
+
1024
,
ytmpp
+
1536
,
y256p
,
y256p
+
512
,
y256p
+
1024
,
y256p
+
1536
,
tw16384_256p
,
tw16384_256p
+
512
,
tw16384_256p
+
1024
);
tw16384_256p
++
;
y256p
++
;
ytmpp
++
;
}
if
(
scale
>
0
)
{
for
(
i
=
0
;
i
<
128
;
i
++
)
{
y256
[
0
]
=
shiftright_int16_simd256
(
y256
[
0
],
1
);
y256
[
1
]
=
shiftright_int16_simd256
(
y256
[
1
],
1
);
y256
[
2
]
=
shiftright_int16_simd256
(
y256
[
2
],
1
);
y256
[
3
]
=
shiftright_int16_simd256
(
y256
[
3
],
1
);
y256
[
4
]
=
shiftright_int16_simd256
(
y256
[
4
],
1
);
y256
[
5
]
=
shiftright_int16_simd256
(
y256
[
5
],
1
);
y256
[
6
]
=
shiftright_int16_simd256
(
y256
[
6
],
1
);
y256
[
7
]
=
shiftright_int16_simd256
(
y256
[
7
],
1
);
y256
[
8
]
=
shiftright_int16_simd256
(
y256
[
8
],
1
);
y256
[
9
]
=
shiftright_int16_simd256
(
y256
[
9
],
1
);
y256
[
10
]
=
shiftright_int16_simd256
(
y256
[
10
],
1
);
y256
[
11
]
=
shiftright_int16_simd256
(
y256
[
11
],
1
);
y256
[
12
]
=
shiftright_int16_simd256
(
y256
[
12
],
1
);
y256
[
13
]
=
shiftright_int16_simd256
(
y256
[
13
],
1
);
y256
[
14
]
=
shiftright_int16_simd256
(
y256
[
14
],
1
);
y256
[
15
]
=
shiftright_int16_simd256
(
y256
[
15
],
1
);
y256
+=
16
;
}
}
_mm_empty
();
_m_empty
();
}
#endif //__AVX2__
int16_t
tw32768
[
2
*
16384
]
__attribute__
((
aligned
(
32
)));
#ifndef __AVX2__
void
dft32768
(
int16_t
*
x
,
int16_t
*
y
,
unsigned
char
scale
)
{
simdshort_q15_t
xtmp
[
16384
],
*
xtmpp
,
*
x64
=
(
simdshort_q15_t
*
)
x
;
simd_q15_t
ytmp
[
8192
],
*
tw32768_128p
=
(
simd_q15_t
*
)
tw32768
,
*
y128
=
(
simd_q15_t
*
)
y
,
*
y128p
=
(
simd_q15_t
*
)
y
;
simd_q15_t
*
ytmpp
=
&
ytmp
[
0
];
int
i
;
simd_q15_t
ONE_OVER_SQRT2_Q15_128
=
set1_int16
(
ONE_OVER_SQRT2_Q15
);
xtmpp
=
xtmp
;
for
(
i
=
0
;
i
<
256
;
i
++
)
{
transpose4_ooff
(
x64
,
xtmpp
,
8192
);
transpose4_ooff
(
x64
+
2
,
xtmpp
+
1
,
8192
);
transpose4_ooff
(
x64
+
4
,
xtmpp
+
2
,
8192
);
transpose4_ooff
(
x64
+
6
,
xtmpp
+
3
,
8192
);
transpose4_ooff
(
x64
+
8
,
xtmpp
+
4
,
8192
);
transpose4_ooff
(
x64
+
10
,
xtmpp
+
5
,
8192
);
transpose4_ooff
(
x64
+
12
,
xtmpp
+
6
,
8192
);
transpose4_ooff
(
x64
+
14
,
xtmpp
+
7
,
8192
);
transpose4_ooff
(
x64
+
16
,
xtmpp
+
8
,
8192
);
transpose4_ooff
(
x64
+
18
,
xtmpp
+
9
,
8192
);
transpose4_ooff
(
x64
+
20
,
xtmpp
+
10
,
8192
);
transpose4_ooff
(
x64
+
22
,
xtmpp
+
11
,
8192
);
transpose4_ooff
(
x64
+
24
,
xtmpp
+
12
,
8192
);
transpose4_ooff
(
x64
+
26
,
xtmpp
+
13
,
8192
);
transpose4_ooff
(
x64
+
28
,
xtmpp
+
14
,
8192
);
transpose4_ooff
(
x64
+
30
,
xtmpp
+
15
,
8192
);
transpose4_ooff
(
x64
+
32
,
xtmpp
+
16
,
8192
);
transpose4_ooff
(
x64
+
34
,
xtmpp
+
17
,
8192
);
transpose4_ooff
(
x64
+
36
,
xtmpp
+
18
,
8192
);
transpose4_ooff
(
x64
+
38
,
xtmpp
+
19
,
8192
);
transpose4_ooff
(
x64
+
40
,
xtmpp
+
20
,
8192
);
transpose4_ooff
(
x64
+
42
,
xtmpp
+
21
,
8192
);
transpose4_ooff
(
x64
+
44
,
xtmpp
+
22
,
8192
);
transpose4_ooff
(
x64
+
46
,
xtmpp
+
23
,
8192
);
transpose4_ooff
(
x64
+
48
,
xtmpp
+
24
,
8192
);
transpose4_ooff
(
x64
+
50
,
xtmpp
+
25
,
8192
);
transpose4_ooff
(
x64
+
52
,
xtmpp
+
26
,
8192
);
transpose4_ooff
(
x64
+
54
,
xtmpp
+
27
,
8192
);
transpose4_ooff
(
x64
+
56
,
xtmpp
+
28
,
8192
);
transpose4_ooff
(
x64
+
58
,
xtmpp
+
29
,
8192
);
transpose4_ooff
(
x64
+
60
,
xtmpp
+
30
,
8192
);
transpose4_ooff
(
x64
+
62
,
xtmpp
+
31
,
8192
);
x64
+=
64
;
xtmpp
+=
32
;
}
dft16384
((
int16_t
*
)(
xtmp
),(
int16_t
*
)
ytmp
,
1
);
dft16384
((
int16_t
*
)(
xtmp
+
8192
),(
int16_t
*
)(
ytmp
+
4096
),
1
);
for
(
i
=
0
;
i
<
4096
;
i
++
)
{
bfly2
(
ytmpp
,
ytmpp
+
4096
,
y128p
,
y128p
+
4096
,
tw32768_128p
);
tw32768_128p
++
;
y128p
++
;
ytmpp
++
;
}
if
(
scale
>
0
)
{
y128p
=
y128
;
for
(
i
=
0
;
i
<
512
;
i
++
)
{
y128p
[
0
]
=
mulhi_int16
(
y128p
[
0
],
ONE_OVER_SQRT2_Q15_128
);
y128p
[
1
]
=
mulhi_int16
(
y128p
[
1
],
ONE_OVER_SQRT2_Q15_128
);
y128p
[
2
]
=
mulhi_int16
(
y128p
[
2
],
ONE_OVER_SQRT2_Q15_128
);
y128p
[
3
]
=
mulhi_int16
(
y128p
[
3
],
ONE_OVER_SQRT2_Q15_128
);
y128p
[
4
]
=
mulhi_int16
(
y128p
[
4
],
ONE_OVER_SQRT2_Q15_128
);
y128p
[
5
]
=
mulhi_int16
(
y128p
[
5
],
ONE_OVER_SQRT2_Q15_128
);
y128p
[
6
]
=
mulhi_int16
(
y128p
[
6
],
ONE_OVER_SQRT2_Q15_128
);
y128p
[
7
]
=
mulhi_int16
(
y128p
[
7
],
ONE_OVER_SQRT2_Q15_128
);
y128p
[
8
]
=
mulhi_int16
(
y128p
[
8
],
ONE_OVER_SQRT2_Q15_128
);
y128p
[
9
]
=
mulhi_int16
(
y128p
[
9
],
ONE_OVER_SQRT2_Q15_128
);
y128p
[
10
]
=
mulhi_int16
(
y128p
[
10
],
ONE_OVER_SQRT2_Q15_128
);
y128p
[
11
]
=
mulhi_int16
(
y128p
[
11
],
ONE_OVER_SQRT2_Q15_128
);
y128p
[
12
]
=
mulhi_int16
(
y128p
[
12
],
ONE_OVER_SQRT2_Q15_128
);
y128p
[
13
]
=
mulhi_int16
(
y128p
[
13
],
ONE_OVER_SQRT2_Q15_128
);
y128p
[
14
]
=
mulhi_int16
(
y128p
[
14
],
ONE_OVER_SQRT2_Q15_128
);
y128p
[
15
]
=
mulhi_int16
(
y128p
[
15
],
ONE_OVER_SQRT2_Q15_128
);
y128p
+=
16
;
}
}
_mm_empty
();
_m_empty
();
}
void
idft32768
(
int16_t
*
x
,
int16_t
*
y
,
unsigned
char
scale
)
{
simdshort_q15_t
xtmp
[
16384
],
*
xtmpp
,
*
x64
=
(
simdshort_q15_t
*
)
x
;
simd_q15_t
ytmp
[
8192
],
*
tw32768_128p
=
(
simd_q15_t
*
)
tw32768
,
*
y128
=
(
simd_q15_t
*
)
y
,
*
y128p
=
(
simd_q15_t
*
)
y
;
simd_q15_t
*
ytmpp
=
&
ytmp
[
0
];
int
i
;
simd_q15_t
ONE_OVER_SQRT2_Q15_128
=
set1_int16
(
ONE_OVER_SQRT2_Q15
);
xtmpp
=
xtmp
;
for
(
i
=
0
;
i
<
256
;
i
++
)
{
transpose4_ooff
(
x64
,
xtmpp
,
8192
);
transpose4_ooff
(
x64
+
2
,
xtmpp
+
1
,
8192
);
transpose4_ooff
(
x64
+
4
,
xtmpp
+
2
,
8192
);
transpose4_ooff
(
x64
+
6
,
xtmpp
+
3
,
8192
);
transpose4_ooff
(
x64
+
8
,
xtmpp
+
4
,
8192
);
transpose4_ooff
(
x64
+
10
,
xtmpp
+
5
,
8192
);
transpose4_ooff
(
x64
+
12
,
xtmpp
+
6
,
8192
);
transpose4_ooff
(
x64
+
14
,
xtmpp
+
7
,
8192
);
transpose4_ooff
(
x64
+
16
,
xtmpp
+
8
,
8192
);
transpose4_ooff
(
x64
+
18
,
xtmpp
+
9
,
8192
);
transpose4_ooff
(
x64
+
20
,
xtmpp
+
10
,
8192
);
transpose4_ooff
(
x64
+
22
,
xtmpp
+
11
,
8192
);
transpose4_ooff
(
x64
+
24
,
xtmpp
+
12
,
8192
);
transpose4_ooff
(
x64
+
26
,
xtmpp
+
13
,
8192
);
transpose4_ooff
(
x64
+
28
,
xtmpp
+
14
,
8192
);
transpose4_ooff
(
x64
+
30
,
xtmpp
+
15
,
8192
);
transpose4_ooff
(
x64
+
32
,
xtmpp
+
16
,
8192
);
transpose4_ooff
(
x64
+
34
,
xtmpp
+
17
,
8192
);
transpose4_ooff
(
x64
+
36
,
xtmpp
+
18
,
8192
);
transpose4_ooff
(
x64
+
38
,
xtmpp
+
19
,
8192
);
transpose4_ooff
(
x64
+
40
,
xtmpp
+
20
,
8192
);
transpose4_ooff
(
x64
+
42
,
xtmpp
+
21
,
8192
);
transpose4_ooff
(
x64
+
44
,
xtmpp
+
22
,
8192
);
transpose4_ooff
(
x64
+
46
,
xtmpp
+
23
,
8192
);
transpose4_ooff
(
x64
+
48
,
xtmpp
+
24
,
8192
);
transpose4_ooff
(
x64
+
50
,
xtmpp
+
25
,
8192
);
transpose4_ooff
(
x64
+
52
,
xtmpp
+
26
,
8192
);
transpose4_ooff
(
x64
+
54
,
xtmpp
+
27
,
8192
);
transpose4_ooff
(
x64
+
56
,
xtmpp
+
28
,
8192
);
transpose4_ooff
(
x64
+
58
,
xtmpp
+
29
,
8192
);
transpose4_ooff
(
x64
+
60
,
xtmpp
+
30
,
8192
);
transpose4_ooff
(
x64
+
62
,
xtmpp
+
31
,
8192
);
x64
+=
64
;
xtmpp
+=
32
;
}
idft16384
((
int16_t
*
)(
xtmp
),(
int16_t
*
)
ytmp
,
1
);
idft16384
((
int16_t
*
)(
xtmp
+
8192
),(
int16_t
*
)(
ytmp
+
4096
),
1
);
for
(
i
=
0
;
i
<
4096
;
i
++
)
{
ibfly2
(
ytmpp
,
ytmpp
+
4096
,
y128p
,
y128p
+
4096
,
tw32768_128p
);
tw32768_128p
++
;
y128p
++
;
ytmpp
++
;
}
if
(
scale
>
0
)
{
y128p
=
y128
;
for
(
i
=
0
;
i
<
512
;
i
++
)
{
y128p
[
0
]
=
mulhi_int16
(
y128p
[
0
],
ONE_OVER_SQRT2_Q15_128
);
y128p
[
1
]
=
mulhi_int16
(
y128p
[
1
],
ONE_OVER_SQRT2_Q15_128
);
y128p
[
2
]
=
mulhi_int16
(
y128p
[
2
],
ONE_OVER_SQRT2_Q15_128
);
y128p
[
3
]
=
mulhi_int16
(
y128p
[
3
],
ONE_OVER_SQRT2_Q15_128
);
y128p
[
4
]
=
mulhi_int16
(
y128p
[
4
],
ONE_OVER_SQRT2_Q15_128
);
y128p
[
5
]
=
mulhi_int16
(
y128p
[
5
],
ONE_OVER_SQRT2_Q15_128
);
y128p
[
6
]
=
mulhi_int16
(
y128p
[
6
],
ONE_OVER_SQRT2_Q15_128
);
y128p
[
7
]
=
mulhi_int16
(
y128p
[
7
],
ONE_OVER_SQRT2_Q15_128
);
y128p
[
8
]
=
mulhi_int16
(
y128p
[
8
],
ONE_OVER_SQRT2_Q15_128
);
y128p
[
9
]
=
mulhi_int16
(
y128p
[
9
],
ONE_OVER_SQRT2_Q15_128
);
y128p
[
10
]
=
mulhi_int16
(
y128p
[
10
],
ONE_OVER_SQRT2_Q15_128
);
y128p
[
11
]
=
mulhi_int16
(
y128p
[
11
],
ONE_OVER_SQRT2_Q15_128
);
y128p
[
12
]
=
mulhi_int16
(
y128p
[
12
],
ONE_OVER_SQRT2_Q15_128
);
y128p
[
13
]
=
mulhi_int16
(
y128p
[
13
],
ONE_OVER_SQRT2_Q15_128
);
y128p
[
14
]
=
mulhi_int16
(
y128p
[
14
],
ONE_OVER_SQRT2_Q15_128
);
y128p
[
15
]
=
mulhi_int16
(
y128p
[
15
],
ONE_OVER_SQRT2_Q15_128
);
y128p
+=
16
;
}
}
_mm_empty
();
_m_empty
();
}
#else // __AVX2__
void
dft32768
(
int16_t
*
x
,
int16_t
*
y
,
unsigned
char
scale
)
{
simd256_q15_t
xtmp
[
4096
],
*
xtmpp
,
*
x256
=
(
simd256_q15_t
*
)
x
;
simd256_q15_t
ytmp
[
4096
],
*
tw32768_256p
=
(
simd256_q15_t
*
)
tw32768
,
*
y256
=
(
simd256_q15_t
*
)
y
,
*
y256p
=
(
simd256_q15_t
*
)
y
;
simd256_q15_t
*
ytmpp
=
&
ytmp
[
0
];
int
i
;
simd256_q15_t
ONE_OVER_SQRT2_Q15_128
=
set1_int16_simd256
(
ONE_OVER_SQRT2_Q15
);
xtmpp
=
xtmp
;
for
(
i
=
0
;
i
<
256
;
i
++
)
{
transpose4_ooff_simd256
(
x256
,
xtmpp
,
2048
);
transpose4_ooff_simd256
(
x256
+
2
,
xtmpp
+
1
,
2048
);
transpose4_ooff_simd256
(
x256
+
4
,
xtmpp
+
2
,
2048
);
transpose4_ooff_simd256
(
x256
+
6
,
xtmpp
+
3
,
2048
);
transpose4_ooff_simd256
(
x256
+
8
,
xtmpp
+
4
,
2048
);
transpose4_ooff_simd256
(
x256
+
10
,
xtmpp
+
5
,
2048
);
transpose4_ooff_simd256
(
x256
+
12
,
xtmpp
+
6
,
2048
);
transpose4_ooff_simd256
(
x256
+
14
,
xtmpp
+
7
,
2048
);
transpose4_ooff_simd256
(
x256
+
16
,
xtmpp
+
8
,
2048
);
transpose4_ooff_simd256
(
x256
+
18
,
xtmpp
+
9
,
2048
);
transpose4_ooff_simd256
(
x256
+
20
,
xtmpp
+
10
,
2048
);
transpose4_ooff_simd256
(
x256
+
22
,
xtmpp
+
11
,
2048
);
transpose4_ooff_simd256
(
x256
+
24
,
xtmpp
+
12
,
2048
);
transpose4_ooff_simd256
(
x256
+
26
,
xtmpp
+
13
,
2048
);
transpose4_ooff_simd256
(
x256
+
28
,
xtmpp
+
14
,
2048
);
transpose4_ooff_simd256
(
x256
+
30
,
xtmpp
+
15
,
2048
);
transpose4_ooff_simd256
(
x256
+
32
,
xtmpp
+
16
,
2048
);
transpose4_ooff_simd256
(
x256
+
34
,
xtmpp
+
17
,
2048
);
transpose4_ooff_simd256
(
x256
+
36
,
xtmpp
+
18
,
2048
);
transpose4_ooff_simd256
(
x256
+
38
,
xtmpp
+
19
,
2048
);
transpose4_ooff_simd256
(
x256
+
40
,
xtmpp
+
20
,
2048
);
transpose4_ooff_simd256
(
x256
+
42
,
xtmpp
+
21
,
2048
);
transpose4_ooff_simd256
(
x256
+
44
,
xtmpp
+
22
,
2048
);
transpose4_ooff_simd256
(
x256
+
46
,
xtmpp
+
23
,
2048
);
transpose4_ooff_simd256
(
x256
+
48
,
xtmpp
+
24
,
2048
);
transpose4_ooff_simd256
(
x256
+
50
,
xtmpp
+
25
,
2048
);
transpose4_ooff_simd256
(
x256
+
52
,
xtmpp
+
26
,
2048
);
transpose4_ooff_simd256
(
x256
+
54
,
xtmpp
+
27
,
2048
);
transpose4_ooff_simd256
(
x256
+
56
,
xtmpp
+
28
,
2048
);
transpose4_ooff_simd256
(
x256
+
58
,
xtmpp
+
29
,
2048
);
transpose4_ooff_simd256
(
x256
+
60
,
xtmpp
+
30
,
2048
);
transpose4_ooff_simd256
(
x256
+
62
,
xtmpp
+
31
,
2048
);
x256
+=
64
;
xtmpp
+=
32
;
}
dft16384
((
int16_t
*
)(
xtmp
),(
int16_t
*
)
ytmp
,
1
);
dft16384
((
int16_t
*
)(
xtmp
+
2048
),(
int16_t
*
)(
ytmp
+
2048
),
1
);
for
(
i
=
0
;
i
<
2048
;
i
++
)
{
bfly2_256
(
ytmpp
,
ytmpp
+
2048
,
y256p
,
y256p
+
2048
,
tw32768_256p
);
tw32768_256p
++
;
y256p
++
;
ytmpp
++
;
}
if
(
scale
>
0
)
{
y256p
=
y256
;
for
(
i
=
0
;
i
<
64
;
i
++
)
{
y256p
[
0
]
=
mulhi_int16_simd256
(
y256p
[
0
],
ONE_OVER_SQRT2_Q15_128
);
y256p
[
1
]
=
mulhi_int16_simd256
(
y256p
[
1
],
ONE_OVER_SQRT2_Q15_128
);
y256p
[
2
]
=
mulhi_int16_simd256
(
y256p
[
2
],
ONE_OVER_SQRT2_Q15_128
);
y256p
[
3
]
=
mulhi_int16_simd256
(
y256p
[
3
],
ONE_OVER_SQRT2_Q15_128
);
y256p
[
4
]
=
mulhi_int16_simd256
(
y256p
[
4
],
ONE_OVER_SQRT2_Q15_128
);
y256p
[
5
]
=
mulhi_int16_simd256
(
y256p
[
5
],
ONE_OVER_SQRT2_Q15_128
);
y256p
[
6
]
=
mulhi_int16_simd256
(
y256p
[
6
],
ONE_OVER_SQRT2_Q15_128
);
y256p
[
7
]
=
mulhi_int16_simd256
(
y256p
[
7
],
ONE_OVER_SQRT2_Q15_128
);
y256p
[
8
]
=
mulhi_int16_simd256
(
y256p
[
8
],
ONE_OVER_SQRT2_Q15_128
);
y256p
[
9
]
=
mulhi_int16_simd256
(
y256p
[
9
],
ONE_OVER_SQRT2_Q15_128
);
y256p
[
10
]
=
mulhi_int16_simd256
(
y256p
[
10
],
ONE_OVER_SQRT2_Q15_128
);
y256p
[
11
]
=
mulhi_int16_simd256
(
y256p
[
11
],
ONE_OVER_SQRT2_Q15_128
);
y256p
[
12
]
=
mulhi_int16_simd256
(
y256p
[
12
],
ONE_OVER_SQRT2_Q15_128
);
y256p
[
13
]
=
mulhi_int16_simd256
(
y256p
[
13
],
ONE_OVER_SQRT2_Q15_128
);
y256p
[
14
]
=
mulhi_int16_simd256
(
y256p
[
14
],
ONE_OVER_SQRT2_Q15_128
);
y256p
[
15
]
=
mulhi_int16_simd256
(
y256p
[
15
],
ONE_OVER_SQRT2_Q15_128
);
y256p
+=
16
;
}
}
_mm_empty
();
_m_empty
();
}
void
idft32768
(
int16_t
*
x
,
int16_t
*
y
,
unsigned
char
scale
)
{
simd256_q15_t
xtmp
[
4096
],
*
xtmpp
,
*
x256
=
(
simd256_q15_t
*
)
x
;
simd256_q15_t
ytmp
[
4096
],
*
tw32768_256p
=
(
simd256_q15_t
*
)
tw32768
,
*
y256
=
(
simd256_q15_t
*
)
y
,
*
y256p
=
(
simd256_q15_t
*
)
y
;
simd256_q15_t
*
ytmpp
=
&
ytmp
[
0
];
int
i
;
simd256_q15_t
ONE_OVER_SQRT2_Q15_128
=
set1_int16_simd256
(
ONE_OVER_SQRT2_Q15
);
xtmpp
=
xtmp
;
for
(
i
=
0
;
i
<
64
;
i
++
)
{
transpose4_ooff_simd256
(
x256
,
xtmpp
,
2048
);
transpose4_ooff_simd256
(
x256
+
2
,
xtmpp
+
1
,
2048
);
transpose4_ooff_simd256
(
x256
+
4
,
xtmpp
+
2
,
2048
);
transpose4_ooff_simd256
(
x256
+
6
,
xtmpp
+
3
,
2048
);
transpose4_ooff_simd256
(
x256
+
8
,
xtmpp
+
4
,
2048
);
transpose4_ooff_simd256
(
x256
+
10
,
xtmpp
+
5
,
2048
);
transpose4_ooff_simd256
(
x256
+
12
,
xtmpp
+
6
,
2048
);
transpose4_ooff_simd256
(
x256
+
14
,
xtmpp
+
7
,
2048
);
transpose4_ooff_simd256
(
x256
+
16
,
xtmpp
+
8
,
2048
);
transpose4_ooff_simd256
(
x256
+
18
,
xtmpp
+
9
,
2048
);
transpose4_ooff_simd256
(
x256
+
20
,
xtmpp
+
10
,
2048
);
transpose4_ooff_simd256
(
x256
+
22
,
xtmpp
+
11
,
2048
);
transpose4_ooff_simd256
(
x256
+
24
,
xtmpp
+
12
,
2048
);
transpose4_ooff_simd256
(
x256
+
26
,
xtmpp
+
13
,
2048
);
transpose4_ooff_simd256
(
x256
+
28
,
xtmpp
+
14
,
2048
);
transpose4_ooff_simd256
(
x256
+
30
,
xtmpp
+
15
,
2048
);
transpose4_ooff_simd256
(
x256
+
32
,
xtmpp
+
16
,
2048
);
transpose4_ooff_simd256
(
x256
+
34
,
xtmpp
+
17
,
2048
);
transpose4_ooff_simd256
(
x256
+
36
,
xtmpp
+
18
,
2048
);
transpose4_ooff_simd256
(
x256
+
38
,
xtmpp
+
19
,
2048
);
transpose4_ooff_simd256
(
x256
+
40
,
xtmpp
+
20
,
2048
);
transpose4_ooff_simd256
(
x256
+
42
,
xtmpp
+
21
,
2048
);
transpose4_ooff_simd256
(
x256
+
44
,
xtmpp
+
22
,
2048
);
transpose4_ooff_simd256
(
x256
+
46
,
xtmpp
+
23
,
2048
);
transpose4_ooff_simd256
(
x256
+
48
,
xtmpp
+
24
,
2048
);
transpose4_ooff_simd256
(
x256
+
50
,
xtmpp
+
25
,
2048
);
transpose4_ooff_simd256
(
x256
+
52
,
xtmpp
+
26
,
2048
);
transpose4_ooff_simd256
(
x256
+
54
,
xtmpp
+
27
,
2048
);
transpose4_ooff_simd256
(
x256
+
56
,
xtmpp
+
28
,
2048
);
transpose4_ooff_simd256
(
x256
+
58
,
xtmpp
+
29
,
2048
);
transpose4_ooff_simd256
(
x256
+
60
,
xtmpp
+
30
,
2048
);
transpose4_ooff_simd256
(
x256
+
62
,
xtmpp
+
31
,
2048
);
x256
+=
64
;
xtmpp
+=
32
;
}
idft16384
((
int16_t
*
)(
xtmp
),(
int16_t
*
)
ytmp
,
1
);
idft16384
((
int16_t
*
)(
xtmp
+
2048
),(
int16_t
*
)(
ytmp
+
2048
),
1
);
for
(
i
=
0
;
i
<
2048
;
i
++
)
{
ibfly2_256
(
ytmpp
,
ytmpp
+
2048
,
y256p
,
y256p
+
2048
,
tw32768_256p
);
tw32768_256p
++
;
y256p
++
;
ytmpp
++
;
}
if
(
scale
>
0
)
{
y256p
=
y256
;
for
(
i
=
0
;
i
<
256
;
i
++
)
{
y256p
[
0
]
=
mulhi_int16_simd256
(
y256p
[
0
],
ONE_OVER_SQRT2_Q15_128
);
y256p
[
1
]
=
mulhi_int16_simd256
(
y256p
[
1
],
ONE_OVER_SQRT2_Q15_128
);
y256p
[
2
]
=
mulhi_int16_simd256
(
y256p
[
2
],
ONE_OVER_SQRT2_Q15_128
);
y256p
[
3
]
=
mulhi_int16_simd256
(
y256p
[
3
],
ONE_OVER_SQRT2_Q15_128
);
y256p
[
4
]
=
mulhi_int16_simd256
(
y256p
[
4
],
ONE_OVER_SQRT2_Q15_128
);
y256p
[
5
]
=
mulhi_int16_simd256
(
y256p
[
5
],
ONE_OVER_SQRT2_Q15_128
);
y256p
[
6
]
=
mulhi_int16_simd256
(
y256p
[
6
],
ONE_OVER_SQRT2_Q15_128
);
y256p
[
7
]
=
mulhi_int16_simd256
(
y256p
[
7
],
ONE_OVER_SQRT2_Q15_128
);
y256p
[
8
]
=
mulhi_int16_simd256
(
y256p
[
8
],
ONE_OVER_SQRT2_Q15_128
);
y256p
[
9
]
=
mulhi_int16_simd256
(
y256p
[
9
],
ONE_OVER_SQRT2_Q15_128
);
y256p
[
10
]
=
mulhi_int16_simd256
(
y256p
[
10
],
ONE_OVER_SQRT2_Q15_128
);
y256p
[
11
]
=
mulhi_int16_simd256
(
y256p
[
11
],
ONE_OVER_SQRT2_Q15_128
);
y256p
[
12
]
=
mulhi_int16_simd256
(
y256p
[
12
],
ONE_OVER_SQRT2_Q15_128
);
y256p
[
13
]
=
mulhi_int16_simd256
(
y256p
[
13
],
ONE_OVER_SQRT2_Q15_128
);
y256p
[
14
]
=
mulhi_int16_simd256
(
y256p
[
14
],
ONE_OVER_SQRT2_Q15_128
);
y256p
[
15
]
=
mulhi_int16_simd256
(
y256p
[
15
],
ONE_OVER_SQRT2_Q15_128
);
y256p
+=
16
;
}
}
_mm_empty
();
_m_empty
();
}
#endif
int16_t
twa1536
[
1024
],
twb1536
[
1024
];
// 512 x 3
...
...
@@ -6176,12 +6777,99 @@ int16_t twb49152[32768] __attribute__((aligned(32)));
// 16384 x 3
void
dft49152
(
int16_t
*
input
,
int16_t
*
output
,
uint8_t
scale
)
{
AssertFatal
(
1
==
0
,
"Need to do this ..
\n
"
);
int
i
,
i2
,
j
;
uint32_t
tmp
[
3
][
16384
]
__attribute__
((
aligned
(
32
)));
uint32_t
tmpo
[
3
][
16384
]
__attribute__
((
aligned
(
32
)));
simd_q15_t
*
y128p
=
(
simd_q15_t
*
)
output
;
simd_q15_t
ONE_OVER_SQRT3_Q15_128
=
set1_int16
(
ONE_OVER_SQRT3_Q15
);
for
(
i
=
0
,
j
=
0
;
i
<
16384
;
i
++
)
{
tmp
[
0
][
i
]
=
((
uint32_t
*
)
input
)[
j
++
];
tmp
[
1
][
i
]
=
((
uint32_t
*
)
input
)[
j
++
];
tmp
[
2
][
i
]
=
((
uint32_t
*
)
input
)[
j
++
];
}
dft16384
((
int16_t
*
)(
tmp
[
0
]),(
int16_t
*
)(
tmpo
[
0
]),
1
);
dft16384
((
int16_t
*
)(
tmp
[
1
]),(
int16_t
*
)(
tmpo
[
1
]),
1
);
dft16384
((
int16_t
*
)(
tmp
[
2
]),(
int16_t
*
)(
tmpo
[
2
]),
1
);
for
(
i
=
0
,
i2
=
0
;
i
<
32768
;
i
+=
8
,
i2
+=
4
)
{
bfly3
((
simd_q15_t
*
)(
&
tmpo
[
0
][
i2
]),(
simd_q15_t
*
)(
&
tmpo
[
1
][
i2
]),((
simd_q15_t
*
)
&
tmpo
[
2
][
i2
]),
(
simd_q15_t
*
)(
output
+
i
),(
simd_q15_t
*
)(
output
+
32768
+
i
),(
simd_q15_t
*
)(
output
+
65536
+
i
),
(
simd_q15_t
*
)(
twa49152
+
i
),(
simd_q15_t
*
)(
twb49152
+
i
));
}
if
(
scale
==
1
)
{
for
(
i
=
0
;
i
<
768
;
i
++
)
{
y128p
[
0
]
=
mulhi_int16
(
y128p
[
0
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
1
]
=
mulhi_int16
(
y128p
[
1
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
2
]
=
mulhi_int16
(
y128p
[
2
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
3
]
=
mulhi_int16
(
y128p
[
3
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
4
]
=
mulhi_int16
(
y128p
[
4
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
5
]
=
mulhi_int16
(
y128p
[
5
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
6
]
=
mulhi_int16
(
y128p
[
6
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
7
]
=
mulhi_int16
(
y128p
[
7
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
8
]
=
mulhi_int16
(
y128p
[
8
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
9
]
=
mulhi_int16
(
y128p
[
9
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
10
]
=
mulhi_int16
(
y128p
[
10
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
11
]
=
mulhi_int16
(
y128p
[
11
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
12
]
=
mulhi_int16
(
y128p
[
12
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
13
]
=
mulhi_int16
(
y128p
[
13
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
14
]
=
mulhi_int16
(
y128p
[
14
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
15
]
=
mulhi_int16
(
y128p
[
15
],
ONE_OVER_SQRT3_Q15_128
);
y128p
+=
16
;
}
}
_mm_empty
();
_m_empty
();
}
void
idft49152
(
int16_t
*
input
,
int16_t
*
output
,
uint8_t
scale
)
{
AssertFatal
(
1
==
0
,
"Need to do this ..
\n
"
);
int
i
,
i2
,
j
;
uint32_t
tmp
[
3
][
16384
]
__attribute__
((
aligned
(
32
)));
uint32_t
tmpo
[
3
][
16384
]
__attribute__
((
aligned
(
32
)));
simd_q15_t
*
y128p
=
(
simd_q15_t
*
)
output
;
simd_q15_t
ONE_OVER_SQRT3_Q15_128
=
set1_int16
(
ONE_OVER_SQRT3_Q15
);
for
(
i
=
0
,
j
=
0
;
i
<
16384
;
i
++
)
{
tmp
[
0
][
i
]
=
((
uint32_t
*
)
input
)[
j
++
];
tmp
[
1
][
i
]
=
((
uint32_t
*
)
input
)[
j
++
];
tmp
[
2
][
i
]
=
((
uint32_t
*
)
input
)[
j
++
];
}
idft16384
((
int16_t
*
)(
tmp
[
0
]),(
int16_t
*
)(
tmpo
[
0
]),
1
);
idft16384
((
int16_t
*
)(
tmp
[
1
]),(
int16_t
*
)(
tmpo
[
1
]),
1
);
idft16384
((
int16_t
*
)(
tmp
[
2
]),(
int16_t
*
)(
tmpo
[
2
]),
1
);
for
(
i
=
0
,
i2
=
0
;
i
<
32768
;
i
+=
8
,
i2
+=
4
)
{
ibfly3
((
simd_q15_t
*
)(
&
tmpo
[
0
][
i2
]),(
simd_q15_t
*
)(
&
tmpo
[
1
][
i2
]),((
simd_q15_t
*
)
&
tmpo
[
2
][
i2
]),
(
simd_q15_t
*
)(
output
+
i
),(
simd_q15_t
*
)(
output
+
32768
+
i
),(
simd_q15_t
*
)(
output
+
65536
+
i
),
(
simd_q15_t
*
)(
twa49152
+
i
),(
simd_q15_t
*
)(
twb49152
+
i
));
}
if
(
scale
==
1
)
{
for
(
i
=
0
;
i
<
768
;
i
++
)
{
y128p
[
0
]
=
mulhi_int16
(
y128p
[
0
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
1
]
=
mulhi_int16
(
y128p
[
1
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
2
]
=
mulhi_int16
(
y128p
[
2
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
3
]
=
mulhi_int16
(
y128p
[
3
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
4
]
=
mulhi_int16
(
y128p
[
4
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
5
]
=
mulhi_int16
(
y128p
[
5
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
6
]
=
mulhi_int16
(
y128p
[
6
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
7
]
=
mulhi_int16
(
y128p
[
7
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
8
]
=
mulhi_int16
(
y128p
[
8
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
9
]
=
mulhi_int16
(
y128p
[
9
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
10
]
=
mulhi_int16
(
y128p
[
10
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
11
]
=
mulhi_int16
(
y128p
[
11
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
12
]
=
mulhi_int16
(
y128p
[
12
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
13
]
=
mulhi_int16
(
y128p
[
13
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
14
]
=
mulhi_int16
(
y128p
[
14
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
15
]
=
mulhi_int16
(
y128p
[
15
],
ONE_OVER_SQRT3_Q15_128
);
y128p
+=
16
;
}
}
_mm_empty
();
_m_empty
();
}
int16_t
twa73728
[
49152
]
__attribute__
((
aligned
(
32
)));
...
...
@@ -6198,17 +6886,104 @@ void idft73728(int16_t *input, int16_t *output,uint8_t scale) {
}
int16_t
twa98304
[
49152
]
__attribute__
((
aligned
(
32
)));
int16_t
twb98304
[
49152
]
__attribute__
((
aligned
(
32
)));
int16_t
twa98304
[
65536
]
__attribute__
((
aligned
(
32
)));
int16_t
twb98304
[
65536
]
__attribute__
((
aligned
(
32
)));
// 32768 x 3
void
dft98304
(
int16_t
*
input
,
int16_t
*
output
,
uint8_t
scale
)
{
AssertFatal
(
1
==
0
,
"Need to do this ..
\n
"
);
int
i
,
i2
,
j
;
uint32_t
tmp
[
3
][
32768
]
__attribute__
((
aligned
(
32
)));
uint32_t
tmpo
[
3
][
32768
]
__attribute__
((
aligned
(
32
)));
simd_q15_t
*
y128p
=
(
simd_q15_t
*
)
output
;
simd_q15_t
ONE_OVER_SQRT3_Q15_128
=
set1_int16
(
ONE_OVER_SQRT3_Q15
);
for
(
i
=
0
,
j
=
0
;
i
<
32768
;
i
++
)
{
tmp
[
0
][
i
]
=
((
uint32_t
*
)
input
)[
j
++
];
tmp
[
1
][
i
]
=
((
uint32_t
*
)
input
)[
j
++
];
tmp
[
2
][
i
]
=
((
uint32_t
*
)
input
)[
j
++
];
}
dft32768
((
int16_t
*
)(
tmp
[
0
]),(
int16_t
*
)(
tmpo
[
0
]),
1
);
dft32768
((
int16_t
*
)(
tmp
[
1
]),(
int16_t
*
)(
tmpo
[
1
]),
1
);
dft32768
((
int16_t
*
)(
tmp
[
2
]),(
int16_t
*
)(
tmpo
[
2
]),
1
);
for
(
i
=
0
,
i2
=
0
;
i
<
65536
;
i
+=
8
,
i2
+=
4
)
{
bfly3
((
simd_q15_t
*
)(
&
tmpo
[
0
][
i2
]),(
simd_q15_t
*
)(
&
tmpo
[
1
][
i2
]),((
simd_q15_t
*
)
&
tmpo
[
2
][
i2
]),
(
simd_q15_t
*
)(
output
+
i
),(
simd_q15_t
*
)(
output
+
65536
+
i
),(
simd_q15_t
*
)(
output
+
131072
+
i
),
(
simd_q15_t
*
)(
twa98304
+
i
),(
simd_q15_t
*
)(
twb98304
+
i
));
}
if
(
scale
==
1
)
{
for
(
i
=
0
;
i
<
1536
;
i
++
)
{
y128p
[
0
]
=
mulhi_int16
(
y128p
[
0
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
1
]
=
mulhi_int16
(
y128p
[
1
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
2
]
=
mulhi_int16
(
y128p
[
2
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
3
]
=
mulhi_int16
(
y128p
[
3
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
4
]
=
mulhi_int16
(
y128p
[
4
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
5
]
=
mulhi_int16
(
y128p
[
5
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
6
]
=
mulhi_int16
(
y128p
[
6
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
7
]
=
mulhi_int16
(
y128p
[
7
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
8
]
=
mulhi_int16
(
y128p
[
8
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
9
]
=
mulhi_int16
(
y128p
[
9
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
10
]
=
mulhi_int16
(
y128p
[
10
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
11
]
=
mulhi_int16
(
y128p
[
11
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
12
]
=
mulhi_int16
(
y128p
[
12
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
13
]
=
mulhi_int16
(
y128p
[
13
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
14
]
=
mulhi_int16
(
y128p
[
14
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
15
]
=
mulhi_int16
(
y128p
[
15
],
ONE_OVER_SQRT3_Q15_128
);
y128p
+=
16
;
}
}
_mm_empty
();
_m_empty
();
}
void
idft98304
(
int16_t
*
input
,
int16_t
*
output
,
uint8_t
scale
)
{
AssertFatal
(
1
==
0
,
"Need to do this ..
\n
"
);
int
i
,
i2
,
j
;
uint32_t
tmp
[
3
][
32768
]
__attribute__
((
aligned
(
32
)));
uint32_t
tmpo
[
3
][
32768
]
__attribute__
((
aligned
(
32
)));
simd_q15_t
*
y128p
=
(
simd_q15_t
*
)
output
;
simd_q15_t
ONE_OVER_SQRT3_Q15_128
=
set1_int16
(
ONE_OVER_SQRT3_Q15
);
for
(
i
=
0
,
j
=
0
;
i
<
32768
;
i
++
)
{
tmp
[
0
][
i
]
=
((
uint32_t
*
)
input
)[
j
++
];
tmp
[
1
][
i
]
=
((
uint32_t
*
)
input
)[
j
++
];
tmp
[
2
][
i
]
=
((
uint32_t
*
)
input
)[
j
++
];
}
idft32768
((
int16_t
*
)(
tmp
[
0
]),(
int16_t
*
)(
tmpo
[
0
]),
1
);
idft32768
((
int16_t
*
)(
tmp
[
1
]),(
int16_t
*
)(
tmpo
[
1
]),
1
);
idft32768
((
int16_t
*
)(
tmp
[
2
]),(
int16_t
*
)(
tmpo
[
2
]),
1
);
for
(
i
=
0
,
i2
=
0
;
i
<
65536
;
i
+=
8
,
i2
+=
4
)
{
ibfly3
((
simd_q15_t
*
)(
&
tmpo
[
0
][
i2
]),(
simd_q15_t
*
)(
&
tmpo
[
1
][
i2
]),((
simd_q15_t
*
)
&
tmpo
[
2
][
i2
]),
(
simd_q15_t
*
)(
output
+
i
),(
simd_q15_t
*
)(
output
+
65536
+
i
),(
simd_q15_t
*
)(
output
+
131072
+
i
),
(
simd_q15_t
*
)(
twa98304
+
i
),(
simd_q15_t
*
)(
twb98304
+
i
));
}
if
(
scale
==
1
)
{
for
(
i
=
0
;
i
<
1536
;
i
++
)
{
y128p
[
0
]
=
mulhi_int16
(
y128p
[
0
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
1
]
=
mulhi_int16
(
y128p
[
1
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
2
]
=
mulhi_int16
(
y128p
[
2
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
3
]
=
mulhi_int16
(
y128p
[
3
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
4
]
=
mulhi_int16
(
y128p
[
4
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
5
]
=
mulhi_int16
(
y128p
[
5
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
6
]
=
mulhi_int16
(
y128p
[
6
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
7
]
=
mulhi_int16
(
y128p
[
7
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
8
]
=
mulhi_int16
(
y128p
[
8
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
9
]
=
mulhi_int16
(
y128p
[
9
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
10
]
=
mulhi_int16
(
y128p
[
10
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
11
]
=
mulhi_int16
(
y128p
[
11
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
12
]
=
mulhi_int16
(
y128p
[
12
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
13
]
=
mulhi_int16
(
y128p
[
13
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
14
]
=
mulhi_int16
(
y128p
[
14
],
ONE_OVER_SQRT3_Q15_128
);
y128p
[
15
]
=
mulhi_int16
(
y128p
[
15
],
ONE_OVER_SQRT3_Q15_128
);
y128p
+=
16
;
}
}
_mm_empty
();
_m_empty
();
}
...
...
@@ -8823,6 +9598,8 @@ int dfts_autoinit(void)
init_rad2
(
2048
,
tw2048
);
init_rad4
(
4096
,
tw4096
);
init_rad2
(
8192
,
tw8192
);
init_rad4
(
16384
,
tw16384
);
init_rad2
(
32768
,
tw32768
);
init_rad3
(
1536
,
twa1536
,
twb1536
);
init_rad3
(
3072
,
twa3072
,
twb3072
);
...
...
@@ -8830,6 +9607,9 @@ int dfts_autoinit(void)
init_rad3
(
12288
,
twa12288
,
twb12288
);
init_rad3
(
18432
,
twa18432
,
twb18432
);
init_rad3
(
24576
,
twa24576
,
twb24576
);
init_rad3
(
49152
,
twa49152
,
twb49152
);
init_rad3
(
98304
,
twa98304
,
twb98304
);
init_rad2_rep
(
24
,
tw24
);
init_rad3_rep
(
36
,
twa36
,
twb36
);
...
...
@@ -8869,6 +9649,7 @@ int dfts_autoinit(void)
#ifndef MR_MAIN
void
dft
(
uint8_t
sizeidx
,
int16_t
*
sigF
,
int16_t
*
sig
,
unsigned
char
scale_flag
){
AssertFatal
((
sizeidx
>=
0
&&
sizeidx
<
(
int
)
DFT_SIZE_IDXTABLESIZE
),
"Invalid dft size index %i
\n
"
,
sizeidx
);
...
...
@@ -8879,6 +9660,7 @@ void idft(uint8_t sizeidx, int16_t *sigF,int16_t *sig,unsigned char scale_flag){
AssertFatal
((
sizeidx
>=
0
&&
sizeidx
<
(
int
)
IDFT_SIZE_IDXTABLESIZE
),
"Invalid idft size index %i
\n
"
,
sizeidx
);
idft_ftab
[
sizeidx
](
sigF
,
sig
,
scale_flag
);
};
#endif
/*---------------------------------------------------------------------------------------*/
...
...
@@ -9049,9 +9831,9 @@ int main(int argc, char**argv)
time_stats_t
ts
;
#ifdef __AVX2__
simd256_q15_t
x
[
4096
],
x2
[
4096
],
y
[
4096
],
tw0
,
tw1
,
tw2
,
tw3
;
simd256_q15_t
x
[
16384
],
x2
[
16384
],
y
[
16384
],
tw0
,
tw1
,
tw2
,
tw3
;
#else
simd_q15_t
x
[
8192
],
y
[
8192
],
tw0
,
tw1
,
tw2
,
tw3
;
simd_q15_t
x
[
32768
],
y
[
32768
],
tw0
,
tw1
,
tw2
,
tw3
;
#endif
int
i
;
simd_q15_t
*
x128
=
(
simd_q15_t
*
)
x
,
*
y128
=
(
simd_q15_t
*
)
y
;
...
...
@@ -9613,10 +10395,34 @@ int main(int argc, char**argv)
stop_meas
(
&
ts
);
}
printf
(
"
\n\n
1536
-point(%f cycles)
\n
"
,(
double
)
ts
.
diff
/
(
double
)
ts
.
trials
);
printf
(
"
\n\n
8192
-point(%f cycles)
\n
"
,(
double
)
ts
.
diff
/
(
double
)
ts
.
trials
);
LOG_M
(
"y8192.m"
,
"y8192"
,
y
,
8192
,
1
,
1
);
LOG_M
(
"x8192.m"
,
"x8192"
,
x
,
8192
,
1
,
1
);
memset
((
void
*
)
x
,
0
,
16384
*
sizeof
(
int32_t
));
for
(
i
=
2
;
i
<
9602
;
i
++
)
{
if
((
taus
()
&
1
)
==
0
)
((
int16_t
*
)
x
)[
i
]
=
364
;
else
((
int16_t
*
)
x
)[
i
]
=
-
364
;
}
for
(
i
=
2
*
(
16384
-
4800
);
i
<
32768
;
i
++
)
{
if
((
taus
()
&
1
)
==
0
)
((
int16_t
*
)
x
)[
i
]
=
364
;
else
((
int16_t
*
)
x
)[
i
]
=
-
364
;
}
reset_meas
(
&
ts
);
for
(
i
=
0
;
i
<
10000
;
i
++
)
{
start_meas
(
&
ts
);
dft16384
((
int16_t
*
)
x
,(
int16_t
*
)
y
,
1
);
stop_meas
(
&
ts
);
}
printf
(
"
\n\n
16384-point(%f cycles)
\n
"
,(
double
)
ts
.
diff
/
(
double
)
ts
.
trials
);
LOG_M
(
"y16384.m"
,
"y16384"
,
y
,
16384
,
1
,
1
);
LOG_M
(
"x16384.m"
,
"x16384"
,
x
,
16384
,
1
,
1
);
memset
((
void
*
)
x
,
0
,
1536
*
sizeof
(
int32_t
));
for
(
i
=
2
;
i
<
1202
;
i
++
)
{
if
((
taus
()
&
1
)
==
0
)
...
...
@@ -9765,6 +10571,30 @@ int main(int argc, char**argv)
LOG_M
(
"y24576.m"
,
"y24576"
,
y
,
24576
,
1
,
1
);
LOG_M
(
"x24576.m"
,
"x24576"
,
x
,
24576
,
1
,
1
);
memset
((
void
*
)
x
,
0
,
49152
*
sizeof
(
int32_t
));
for
(
i
=
2
;
i
<
28402
;
i
++
)
{
if
((
taus
()
&
1
)
==
0
)
((
int16_t
*
)
x
)[
i
]
=
364
;
else
((
int16_t
*
)
x
)[
i
]
=
-
364
;
}
for
(
i
=
2
*
(
49152
-
14400
);
i
<
98304
;
i
++
)
{
if
((
taus
()
&
1
)
==
0
)
((
int16_t
*
)
x
)[
i
]
=
364
;
else
((
int16_t
*
)
x
)[
i
]
=
-
364
;
}
reset_meas
(
&
ts
);
for
(
i
=
0
;
i
<
10000
;
i
++
)
{
start_meas
(
&
ts
);
idft49152
((
int16_t
*
)
x
,(
int16_t
*
)
y
,
1
);
stop_meas
(
&
ts
);
}
printf
(
"
\n\n
49152-point(%f cycles)
\n
"
,(
double
)
ts
.
diff
/
(
double
)
ts
.
trials
);
LOG_M
(
"y49152.m"
,
"y49152"
,
y
,
49152
,
1
,
1
);
LOG_M
(
"x49152.m"
,
"x49152"
,
x
,
49152
,
1
,
1
);
/*
int dftsizes[33]={24,36,48,60,72,96,108,120,144,180,192,216,240,288,300,324,360,384,432,480,540,576,600,648,720,768,864,900,960,972,1080,1152,1200};
void (*dft)(int16_t *x,int16_t *y,uint8_t scale)[33] = {dft24,dft36,dft48,dft60,dft72,dft96,dft108,dft120,dft144,dft180,dft192,dft216,dft240,dft288,dft300,dft324,dft360,dft384,dft432,dft480,dft540,dft576,dft600,dft648,dft720,dft768,dft864,dft900,dft960,dft972,dft1080,dft1152,dft1200};
for (int n=0;n<33;n++) {
...
...
@@ -9797,7 +10627,7 @@ int main(int argc, char**argv)
LOG_M(ystr,ystr2,y,dftsizes[n]*4,1,1);
LOG_M(xstr,xstr2,x,dftsizes[n]*4,1,1);
}
*/
return
(
0
);
}
...
...
openair1/SIMULATION/NR_PHY/prachsim.c
View file @
dd8cb984
...
...
@@ -89,7 +89,7 @@ int main(int argc, char **argv){
int
i
,
aa
,
aarx
,
**
txdata
,
trial
,
n_frames
=
1
,
prach_start
,
rx_prach_start
;
//, ntrials=1;
int
N_RB_UL
=
106
,
delay
=
0
,
NCS_config
=
13
,
rootSequenceIndex
=
1
,
threequarter_fs
=
0
,
mu
=
1
,
fd_occasion
=
0
,
loglvl
=
OAILOG_INFO
,
numRA
=
0
,
prachStartSymbol
=
0
;
uint8_t
snr1set
=
0
,
ue_speed1set
=
0
,
transmission_mode
=
1
,
n_tx
=
1
,
n_rx
=
1
,
awgn_flag
=
0
,
msg1_frequencystart
=
0
,
num_prach_fd_occasions
=
1
,
prach_format
;
uint8_t
frame
=
1
,
subframe
=
19
,
config_index
=
98
,
prach_sequence_length
=
1
,
num_root_sequences
=
16
,
restrictedSetConfig
=
0
,
N_dur
,
N_t_slot
,
start_symbol
;
uint8_t
frame
=
1
,
subframe
=
9
,
slot
=
19
,
config_index
=
98
,
prach_sequence_length
=
1
,
num_root_sequences
=
16
,
restrictedSetConfig
=
0
,
N_dur
,
N_t_slot
,
start_symbol
;
uint16_t
Nid_cell
=
0
,
preamble_tx
=
0
,
preamble_delay
,
format
,
format0
,
format1
;
uint32_t
tx_lev
=
10000
,
prach_errors
=
0
,
samp_count
;
//,tx_lev_dB;
uint64_t
SSB_positions
=
0x01
,
absoluteFrequencyPointA
=
640000
;
...
...
@@ -118,7 +118,7 @@ int main(int argc, char **argv){
randominit
(
0
);
while
((
c
=
getopt
(
argc
,
argv
,
"hHaA:Cr:p:g:n:s:S:t:x:y:v:V:z:N:F:d:Z:L:R:E"
))
!=
-
1
)
{
while
((
c
=
getopt
(
argc
,
argv
,
"hHaA:C
c:
r:p:g:n:s:S:t:x:y:v:V:z:N:F:d:Z:L:R:E"
))
!=
-
1
)
{
switch
(
c
)
{
case
'a'
:
printf
(
"Running AWGN simulation
\n
"
);
...
...
@@ -127,6 +127,10 @@ int main(int argc, char **argv){
//ntrials=1;
break
;
case
'c'
:
config_index
=
atoi
(
optarg
);
break
;
case
'd'
:
delay
=
atoi
(
optarg
);
break
;
...
...
@@ -313,6 +317,11 @@ int main(int argc, char **argv){
}
}
if
(
config_index
<
67
)
{
prach_sequence_length
=
0
;
slot
=
subframe
*
2
;
}
printf
(
"Config_index %d, prach_sequence_length %d
\n
"
,
config_index
,
prach_sequence_length
);
// Configure log
logInit
();
set_glog
(
loglvl
);
...
...
@@ -373,11 +382,11 @@ int main(int argc, char **argv){
gNB
->
gNB_config
.
prach_config
.
num_prach_fd_occasions
.
value
=
num_prach_fd_occasions
;
gNB
->
gNB_config
.
prach_config
.
num_prach_fd_occasions_list
=
(
nfapi_nr_num_prach_fd_occasions_t
*
)
malloc
(
num_prach_fd_occasions
*
sizeof
(
nfapi_nr_num_prach_fd_occasions_t
));
gNB
->
proc
.
slot_rx
=
s
ubframe
;
gNB
->
proc
.
slot_rx
=
s
lot
;
get_nr_prach_info_from_index
(
config_index
,
(
int
)
frame
,
(
int
)
s
ubframe
,
(
int
)
s
lot
,
absoluteFrequencyPointA
,
mu
,
frame_parms
->
frame_type
,
...
...
@@ -562,7 +571,7 @@ int main(int argc, char **argv){
UE_nr_rxtx_proc_t
proc
=
{
0
};
proc
.
frame_tx
=
frame
;
proc
.
nr_tti_tx
=
s
ubframe
;
proc
.
nr_tti_tx
=
s
lot
;
nr_ue_prach_procedures
(
UE
,
&
proc
,
0
,
0
);
/* tx_lev_dB not used later, no need to set */
...
...
@@ -571,19 +580,19 @@ int main(int argc, char **argv){
if
(
mu
==
0
)
samp_count
=
frame_parms
->
samples_per_subframe
;
else
samp_count
=
(
subframe
%
(
frame_parms
->
slots_per_subframe
/
2
))
?
frame_parms
->
samples_per_slotN0
:
frame_parms
->
samples_per_slot0
;
samp_count
=
(
(
slot
)
%
(
frame_parms
->
slots_per_subframe
/
2
))
?
frame_parms
->
samples_per_slotN0
:
frame_parms
->
samples_per_slot0
;
prach_start
=
s
ubframe
*
samp_count
-
UE
->
N_TA_offset
;
prach_start
=
s
lot
*
samp_count
-
UE
->
N_TA_offset
;
#ifdef NR_PRACH_DEBUG
LOG_M
(
"txsig0.m"
,
"txs0"
,
&
txdata
[
0
][
prach_start
],
samp_count
,
1
,
1
);
LOG_M
(
"txsig0.m"
,
"txs0"
,
&
txdata
[
0
][
prach_start
],
frame_parms
->
samples_per_subframe
,
1
,
1
);
//LOG_M("txsig1.m","txs1", txdata[1],FRAME_LENGTH_COMPLEX_SAMPLES,1,1);
#endif
// multipath channel
// dump_nr_prach_config(&gNB->frame_parms,subframe);
for
(
i
=
0
;
i
<
samp_count
<<
1
;
i
++
)
{
for
(
i
=
0
;
i
<
frame_parms
->
samples_per_subframe
<<
1
;
i
++
)
{
for
(
aa
=
0
;
aa
<
1
;
aa
++
)
{
if
(
awgn_flag
==
0
)
{
s_re
[
aa
][
i
]
=
((
double
)(((
short
*
)
&
txdata
[
aa
][
prach_start
]))[(
i
<<
1
)]);
...
...
@@ -618,7 +627,10 @@ int main(int argc, char **argv){
ue_speed1
=
ue_speed0
+
50
;
}
rx_prach_start
=
subframe
*
frame_parms
->
get_samples_per_slot
(
subframe
,
frame_parms
);
rx_prach_start
=
slot
*
frame_parms
->
get_samples_per_slot
(
slot
,
frame_parms
);
if
(
n_frames
==
1
)
printf
(
"slot %d, rx_prach_start %d
\n
"
,
slot
,
rx_prach_start
);
uint16_t
preamble_rx
,
preamble_energy
,
N_ZC
;
N_ZC
=
prach_sequence_length
==
0
?
839
:
139
;
for
(
SNR
=
snr0
;
SNR
<
snr1
;
SNR
+=
.
1
)
{
for
(
ue_speed
=
ue_speed0
;
ue_speed
<
ue_speed1
;
ue_speed
+=
10
)
{
...
...
@@ -630,9 +642,8 @@ int main(int argc, char **argv){
for
(
trial
=
0
;
trial
<
n_frames
;
trial
++
)
{
uint16_t
preamble_rx
,
preamble_energy
,
N_ZC
;
sigma2_dB
=
10
*
log10
((
double
)
tx_lev
)
-
SNR
;
sigma2_dB
=
10
*
log10
((
double
)
tx_lev
)
-
SNR
-
10
*
log10
(
N_RB_UL
*
12
/
N_ZC
)
;
if
(
n_frames
==
1
)
printf
(
"sigma2_dB %f (SNR %f dB) tx_lev_dB %f
\n
"
,
sigma2_dB
,
SNR
,
10
*
log10
((
double
)
tx_lev
));
...
...
@@ -651,20 +662,20 @@ int main(int argc, char **argv){
10
*
log10
(
tx_lev
));
}
for
(
i
=
0
;
i
<
frame_parms
->
get_samples_per_slot
(
subframe
,
frame_parms
)
;
i
++
)
{
for
(
i
=
0
;
i
<
frame_parms
->
samples_per_subframe
;
i
++
)
{
for
(
aa
=
0
;
aa
<
frame_parms
->
nb_antennas_rx
;
aa
++
)
{
((
short
*
)
&
gNB
->
common_vars
.
rxdata
[
aa
][
rx_prach_start
])[
2
*
i
]
=
(
short
)
(.
167
*
(
r_re
[
aa
][
i
]
+
sqrt
(
sigma2
/
2
)
*
gaussdouble
(
0
.
0
,
1
.
0
)));
((
short
*
)
&
gNB
->
common_vars
.
rxdata
[
aa
][
rx_prach_start
])[
2
*
i
+
1
]
=
(
short
)
(.
167
*
(
r_im
[
aa
][
i
]
+
(
iqim
*
r_re
[
aa
][
i
])
+
sqrt
(
sigma2
/
2
)
*
gaussdouble
(
0
.
0
,
1
.
0
)));
((
short
*
)
&
ru
->
common
.
rxdata
[
aa
][
rx_prach_start
])[
2
*
i
]
=
(
short
)
(.
167
*
(
r_re
[
aa
][
i
]
+
sqrt
(
sigma2
/
2
)
*
gaussdouble
(
0
.
0
,
1
.
0
)));
((
short
*
)
&
ru
->
common
.
rxdata
[
aa
][
rx_prach_start
])[
2
*
i
+
1
]
=
(
short
)
(.
167
*
(
r_im
[
aa
][
i
]
+
(
iqim
*
r_re
[
aa
][
i
])
+
sqrt
(
sigma2
/
2
)
*
gaussdouble
(
0
.
0
,
1
.
0
)));
}
}
rx_nr_prach_ru
(
ru
,
prach_format
,
numRA
,
prachStartSymbol
,
frame
,
s
ubframe
);
rx_nr_prach_ru
(
ru
,
prach_format
,
numRA
,
prachStartSymbol
,
frame
,
s
lot
);
gNB
->
prach_vars
.
rxsigF
=
ru
->
prach_rxsigF
;
rx_nr_prach
(
gNB
,
prach_pdu
,
frame
,
subframe
,
&
preamble_rx
,
&
preamble_energy
,
&
preamble_delay
);
printf
(
" preamble_energy %d preamble_rx %d preamble_tx %d
\n
"
,
preamble_energy
,
preamble_rx
,
preamble_tx
);
//
printf(" preamble_energy %d preamble_rx %d preamble_tx %d \n", preamble_energy, preamble_rx, preamble_tx);
if
(
preamble_rx
!=
preamble_tx
)
prach_errors
++
;
...
...
openair1/SIMULATION/TOOLS/taus.c
View file @
dd8cb984
...
...
@@ -21,7 +21,8 @@
#include <time.h>
#include <stdlib.h>
#include "SIMULATION/TOOLS/sim.h"
//#include "SIMULATION/TOOLS/sim.h"
unsigned
int
s0
,
s1
,
s2
,
b
;
...
...
openair2/LAYER2/NR_MAC_COMMON/nr_mac_common.c
View file @
dd8cb984
...
...
@@ -46,6 +46,7 @@ uint16_t NCS_restricted_TypeB_delta_f_RA_5[14] = {36,57,60,63,65,68,71,77,81,8
uint16_t
NCS_unrestricted_delta_f_RA_15
[
16
]
=
{
0
,
2
,
4
,
6
,
8
,
10
,
12
,
13
,
15
,
17
,
19
,
23
,
27
,
34
,
46
,
69
};
const
char
*
prachfmt
[]
=
{
"A1"
,
"A2"
,
"A3"
,
"B1"
,
"B2"
,
"B3"
,
"B4"
,
"C0"
,
"C2"
};
const
char
*
prachfmt03
[]
=
{
"0"
,
"1"
,
"2"
,
"3"
};
uint16_t
get_NCS
(
uint8_t
index
,
uint16_t
format0
,
uint8_t
restricted_set_config
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment