Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
O
OpenXG UE
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Michael Black
OpenXG UE
Commits
ba0e786e
Commit
ba0e786e
authored
Jan 18, 2017
by
Florian Kaltenberger
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
adding multadd_cpx_vector
parent
27a5ccf4
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
98 additions
and
0 deletions
+98
-0
openair1/PHY/TOOLS/cmult_vv.c
openair1/PHY/TOOLS/cmult_vv.c
+79
-0
openair1/PHY/TOOLS/defs.h
openair1/PHY/TOOLS/defs.h
+19
-0
No files found.
openair1/PHY/TOOLS/cmult_vv.c
View file @
ba0e786e
...
...
@@ -27,6 +27,7 @@
#if defined(__x86_64__) || defined(__i386__)
int16_t
conjug
[
8
]
__attribute__
((
aligned
(
16
)))
=
{
-
1
,
1
,
-
1
,
1
,
-
1
,
1
,
-
1
,
1
}
;
int16_t
conjug2
[
8
]
__attribute__
((
aligned
(
16
)))
=
{
1
,
-
1
,
1
,
-
1
,
1
,
-
1
,
1
,
-
1
}
;
#define simd_q15_t __m128i
#define simdshort_q15_t __m64
#elif defined(__arm__)
...
...
@@ -134,3 +135,81 @@ int mult_cpx_conj_vector(int16_t *x1,
return
(
0
);
}
int
multadd_cpx_vector
(
int16_t
*
x1
,
int16_t
*
x2
,
int16_t
*
y
,
uint8_t
zero_flag
,
uint32_t
N
,
int
output_shift
)
{
// Multiply elementwise the complex conjugate of x1 with x2.
// x1 - input 1 in the format |Re0 Im0 Re1 Im1|,......,|Re(N-2) Im(N-2) Re(N-1) Im(N-1)|
// We assume x1 with a dinamic of 15 bit maximum
//
// x2 - input 2 in the format |Re0 Im0 Re1 Im1|,......,|Re(N-2) Im(N-2) Re(N-1) Im(N-1)|
// We assume x2 with a dinamic of 14 bit maximum
///
// y - output in the format |Re0 Im0 Re1 Im1|,......,|Re(N-2) Im(N-2) Re(N-1) Im(N-1)|
//
// zero_flag - Set output (y) to zero prior to disable accumulation
//
// N - the size f the vectors (this function does N cpx mpy. WARNING: N>=4;
//
// output_shift - shift to be applied to generate output
uint32_t
i
;
// loop counter
simd_q15_t
*
x1_128
;
simd_q15_t
*
x2_128
;
simd_q15_t
*
y_128
;
#if defined(__x86_64__) || defined(__i386__)
simd_q15_t
tmp_re
,
tmp_im
;
simd_q15_t
tmpy0
,
tmpy1
;
#elif defined(__arm__)
int32x4_t
tmp_re
,
tmp_im
;
int32x4_t
tmp_re1
,
tmp_im1
;
int16x4x2_t
tmpy
;
int32x4_t
shift
=
vdupq_n_s32
(
-
output_shift
);
#endif
x1_128
=
(
simd_q15_t
*
)
&
x1
[
0
];
x2_128
=
(
simd_q15_t
*
)
&
x2
[
0
];
y_128
=
(
simd_q15_t
*
)
&
y
[
0
];
// we compute 4 cpx multiply for each loop
for
(
i
=
0
;
i
<
(
N
>>
2
);
i
++
)
{
#if defined(__x86_64__) || defined(__i386__)
tmp_re
=
_mm_sign_epi16
(
*
x1_128
,
*
(
__m128i
*
)
&
conjug2
[
0
]);
tmp_re
=
_mm_madd_epi16
(
tmp_re
,
*
x2_128
);
tmp_im
=
_mm_shufflelo_epi16
(
*
x1_128
,
_MM_SHUFFLE
(
2
,
3
,
0
,
1
));
tmp_im
=
_mm_shufflehi_epi16
(
tmp_im
,
_MM_SHUFFLE
(
2
,
3
,
0
,
1
));
tmp_im
=
_mm_madd_epi16
(
tmp_im
,
*
x2_128
);
tmp_re
=
_mm_srai_epi32
(
tmp_re
,
output_shift
);
tmp_im
=
_mm_srai_epi32
(
tmp_im
,
output_shift
);
tmpy0
=
_mm_unpacklo_epi32
(
tmp_re
,
tmp_im
);
//print_ints("unpack lo:",&tmpy0[i]);
tmpy1
=
_mm_unpackhi_epi32
(
tmp_re
,
tmp_im
);
//print_ints("unpack hi:",&tmpy1[i]);
if
(
zero_flag
==
1
)
*
y_128
=
_mm_packs_epi32
(
tmpy0
,
tmpy1
);
else
*
y_128
=
_mm_adds_epi16
(
*
y_128
,
_mm_packs_epi32
(
tmpy0
,
tmpy1
));
//print_shorts("*y_128:",&y_128[i]);
#elif defined(__arm__)
msg
(
"mult_cpx_vector not implemented for __arm__"
);
#endif
x1_128
++
;
x2_128
++
;
y_128
++
;
}
_mm_empty
();
_m_empty
();
return
(
0
);
}
openair1/PHY/TOOLS/defs.h
View file @
ba0e786e
...
...
@@ -126,6 +126,25 @@ int mult_cpx_conj_vector(int16_t *x1,
int
output_shift
,
int
madd
);
/*!
Element-wise multiplication and accumulation of two complex vectors x1 and x2.
@param x1 - input 1 in the format |Re0 Im0 Re1 Im1|,......,|Re(N-2) Im(N-2) Re(N-1) Im(N-1)|
We assume x1 with a dinamic of 15 bit maximum
@param x2 - input 2 in the format |Re0 Im0 Re1 Im1|,......,|Re(N-2) Im(N-2) Re(N-1) Im(N-1)|
We assume x2 with a dinamic of 14 bit maximum
@param y - output in the format |Re0 Im0 Re1 Im1|,......,|Re(N-2) Im(N-2) Re(N-1) Im(N-1)|
@param zero_flag Set output (y) to zero prior to accumulation
@param N - the size f the vectors (this function does N cpx mpy. WARNING: N>=4;
@param output_shift - shift to be applied to generate output
*/
int
multadd_cpx_vector
(
int16_t
*
x1
,
int16_t
*
x2
,
int16_t
*
y
,
uint8_t
zero_flag
,
uint32_t
N
,
int
output_shift
);
// lte_dfts.c
void
init_fft
(
uint16_t
size
,
uint8_t
logsize
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment