Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
O
OpenXG-RAN
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
wangjie
OpenXG-RAN
Commits
a5e6690d
Commit
a5e6690d
authored
Jan 18, 2017
by
Florian Kaltenberger
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
adding multadd_cpx_vector
parent
ab57b0dd
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
98 additions
and
0 deletions
+98
-0
openair1/PHY/TOOLS/cmult_vv.c
openair1/PHY/TOOLS/cmult_vv.c
+79
-0
openair1/PHY/TOOLS/defs.h
openair1/PHY/TOOLS/defs.h
+19
-0
No files found.
openair1/PHY/TOOLS/cmult_vv.c
View file @
a5e6690d
...
...
@@ -27,6 +27,7 @@
#if defined(__x86_64__) || defined(__i386__)
int16_t
conjug
[
8
]
__attribute__
((
aligned
(
16
)))
=
{
-
1
,
1
,
-
1
,
1
,
-
1
,
1
,
-
1
,
1
}
;
int16_t
conjug2
[
8
]
__attribute__
((
aligned
(
16
)))
=
{
1
,
-
1
,
1
,
-
1
,
1
,
-
1
,
1
,
-
1
}
;
#define simd_q15_t __m128i
#define simdshort_q15_t __m64
#elif defined(__arm__)
...
...
@@ -134,3 +135,81 @@ int mult_cpx_conj_vector(int16_t *x1,
return
(
0
);
}
int
multadd_cpx_vector
(
int16_t
*
x1
,
int16_t
*
x2
,
int16_t
*
y
,
uint8_t
zero_flag
,
uint32_t
N
,
int
output_shift
)
{
// Multiply elementwise the complex conjugate of x1 with x2.
// x1 - input 1 in the format |Re0 Im0 Re1 Im1|,......,|Re(N-2) Im(N-2) Re(N-1) Im(N-1)|
// We assume x1 with a dinamic of 15 bit maximum
//
// x2 - input 2 in the format |Re0 Im0 Re1 Im1|,......,|Re(N-2) Im(N-2) Re(N-1) Im(N-1)|
// We assume x2 with a dinamic of 14 bit maximum
///
// y - output in the format |Re0 Im0 Re1 Im1|,......,|Re(N-2) Im(N-2) Re(N-1) Im(N-1)|
//
// zero_flag - Set output (y) to zero prior to disable accumulation
//
// N - the size f the vectors (this function does N cpx mpy. WARNING: N>=4;
//
// output_shift - shift to be applied to generate output
uint32_t
i
;
// loop counter
simd_q15_t
*
x1_128
;
simd_q15_t
*
x2_128
;
simd_q15_t
*
y_128
;
#if defined(__x86_64__) || defined(__i386__)
simd_q15_t
tmp_re
,
tmp_im
;
simd_q15_t
tmpy0
,
tmpy1
;
#elif defined(__arm__)
int32x4_t
tmp_re
,
tmp_im
;
int32x4_t
tmp_re1
,
tmp_im1
;
int16x4x2_t
tmpy
;
int32x4_t
shift
=
vdupq_n_s32
(
-
output_shift
);
#endif
x1_128
=
(
simd_q15_t
*
)
&
x1
[
0
];
x2_128
=
(
simd_q15_t
*
)
&
x2
[
0
];
y_128
=
(
simd_q15_t
*
)
&
y
[
0
];
// we compute 4 cpx multiply for each loop
for
(
i
=
0
;
i
<
(
N
>>
2
);
i
++
)
{
#if defined(__x86_64__) || defined(__i386__)
tmp_re
=
_mm_sign_epi16
(
*
x1_128
,
*
(
__m128i
*
)
&
conjug2
[
0
]);
tmp_re
=
_mm_madd_epi16
(
tmp_re
,
*
x2_128
);
tmp_im
=
_mm_shufflelo_epi16
(
*
x1_128
,
_MM_SHUFFLE
(
2
,
3
,
0
,
1
));
tmp_im
=
_mm_shufflehi_epi16
(
tmp_im
,
_MM_SHUFFLE
(
2
,
3
,
0
,
1
));
tmp_im
=
_mm_madd_epi16
(
tmp_im
,
*
x2_128
);
tmp_re
=
_mm_srai_epi32
(
tmp_re
,
output_shift
);
tmp_im
=
_mm_srai_epi32
(
tmp_im
,
output_shift
);
tmpy0
=
_mm_unpacklo_epi32
(
tmp_re
,
tmp_im
);
//print_ints("unpack lo:",&tmpy0[i]);
tmpy1
=
_mm_unpackhi_epi32
(
tmp_re
,
tmp_im
);
//print_ints("unpack hi:",&tmpy1[i]);
if
(
zero_flag
==
1
)
*
y_128
=
_mm_packs_epi32
(
tmpy0
,
tmpy1
);
else
*
y_128
=
_mm_adds_epi16
(
*
y_128
,
_mm_packs_epi32
(
tmpy0
,
tmpy1
));
//print_shorts("*y_128:",&y_128[i]);
#elif defined(__arm__)
msg
(
"mult_cpx_vector not implemented for __arm__"
);
#endif
x1_128
++
;
x2_128
++
;
y_128
++
;
}
_mm_empty
();
_m_empty
();
return
(
0
);
}
openair1/PHY/TOOLS/defs.h
View file @
a5e6690d
...
...
@@ -126,6 +126,25 @@ int mult_cpx_conj_vector(int16_t *x1,
int
output_shift
,
int
madd
);
/*!
Element-wise multiplication and accumulation of two complex vectors x1 and x2.
@param x1 - input 1 in the format |Re0 Im0 Re1 Im1|,......,|Re(N-2) Im(N-2) Re(N-1) Im(N-1)|
We assume x1 with a dinamic of 15 bit maximum
@param x2 - input 2 in the format |Re0 Im0 Re1 Im1|,......,|Re(N-2) Im(N-2) Re(N-1) Im(N-1)|
We assume x2 with a dinamic of 14 bit maximum
@param y - output in the format |Re0 Im0 Re1 Im1|,......,|Re(N-2) Im(N-2) Re(N-1) Im(N-1)|
@param zero_flag Set output (y) to zero prior to accumulation
@param N - the size f the vectors (this function does N cpx mpy. WARNING: N>=4;
@param output_shift - shift to be applied to generate output
*/
int
multadd_cpx_vector
(
int16_t
*
x1
,
int16_t
*
x2
,
int16_t
*
y
,
uint8_t
zero_flag
,
uint32_t
N
,
int
output_shift
);
// lte_dfts.c
void
init_fft
(
uint16_t
size
,
uint8_t
logsize
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment