Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
O
OpenXG-RAN
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
wangjie
OpenXG-RAN
Commits
d4fbc3a1
Commit
d4fbc3a1
authored
6 years ago
by
Martino
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Final version of kernal function, now we save 320 us with the intrinsics
parent
4626a85d
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
33 additions
and
37 deletions
+33
-37
openair1/PHY/CODING/nrPolar_tools/nr_polar_kernal_operation.c
...air1/PHY/CODING/nrPolar_tools/nr_polar_kernal_operation.c
+33
-37
No files found.
openair1/PHY/CODING/nrPolar_tools/nr_polar_kernal_operation.c
View file @
d4fbc3a1
...
...
@@ -10,70 +10,66 @@ void nr_polar_kernal_operation(uint8_t *u, uint8_t *d, uint16_t N)
// Martino's algorithm to avoid multiplication for the generating matrix of polar codes
uint32_t
i
,
j
;
for
(
i
=
0
;
i
<
N
;
i
++
)
// Create the elements of d=u*G_N ...
{
d
[
i
]
=
0
;
for
(
j
=
0
;
j
<
N
;
j
++
)
// ... looking at all the elements of u
{
d
[
i
]
=
d
[
i
]
^
(
!
(
(
j
-
i
)
&
i
))
*
u
[
j
];
}
}
#ifdef __AVX2__
/*
* It works, but there are too many moves from memory and it's slow. With AVX-512 it could be done faster
*
__m256i A,B,C,E,U, OUT;
__m256i
A
,
B
,
C
,
D
,
E
,
U
,
zerosOnly
,
OUT
;
__m256i
inc
;
uint32_t
dTest
[
8
];
uint32_t
uArray
[
8
];
uint32_t
k
;
uint32_t toCheck[8];
uint32_t
incArray
[
8
];
//initialisation
for
(
k
=
0
;
k
<
8
;
k
++
)
incArray[k]=k; //0,1, ... 7
inc=_mm256_loadu_si256((__m256i const*)incArray);
incArray
[
k
]
=
k
;
inc
=
_mm256_loadu_si256
((
__m256i
const
*
)
incArray
);
// 0, 1, ..., 7 to increase
zerosOnly
=
_mm256_setzero_si256
();
// for comparison
for
(
i
=
0
;
i
<
N
;
i
+=
8
)
{
B=_mm256_set1_epi32((int)i); // i, ... i
B=_mm256_add_epi32(B, inc); //i, i+1, ... i+7
B
=
_mm256_set1_epi32
((
int
)
i
);
// i, ..., i
B
=
_mm256_add_epi32
(
B
,
inc
);
// i, i+1, ..., i+7
OUT=_mm256_setzero_si256();
OUT
=
_mm256_setzero_si256
();
// it will contain the result of all the XORs for the d(i)s
for
(
j
=
0
;
j
<
N
;
j
++
)
{
//initialisation
A=_mm256_set1_epi32((int)(j)); //j ...
A
=
_mm256_set1_epi32
((
int
)(
j
));
//j, j, ..., j
A
=
_mm256_sub_epi32
(
A
,
B
);
//(j-i), (j-(i+1)), ... (j-(i+7))
U
=
_mm256_set1_epi32
((
int
)
u
[
j
]);
_mm256_storeu_si256
((
__m256i
*
)
uArray
,
U
);
//u(j) ... u(j) for the maskload
C=_mm256_and_si256(A, B); //mask: if zero, then add
_mm256_storeu_si256((__m256i*)toCheck, C);
for(k=0; k<8; k++)
{
toCheck[k]=!toCheck[k] << 31;
}
C=_mm256_loadu_si256((__m256i const*)toCheck); //mask: if 1, add
E=_mm256_maskload_epi32((int const*)uArray, C);
C
=
_mm256_and_si256
(
A
,
B
);
//(j-i)&i -> If zero, then XOR with the u(j)
D
=
_mm256_cmpeq_epi32
(
C
,
zerosOnly
);
// compare with zero and use the result as mask
E
=
_mm256_maskload_epi32
((
int
const
*
)
uArray
,
D
);
// load only some u(j)s for the XOR
OUT
=
_mm256_xor_si256
(
OUT
,
E
);
//32 bit x 8
}
_mm256_storeu_si256
((
__m256i
*
)
dTest
,
OUT
);
for(k=0; k<8; k++)
for
(
k
=
0
;
k
<
8
;
k
++
)
// Conversion from 32 bits to 8 bits
{
d[i+k]=(uint8_t)dTest[k]; //
Conv from 32 to 8
d
[
i
+
k
]
=
(
uint8_t
)
dTest
[
k
];
//
With AVX512 there is an intrinsic to do it
}
}
*/
#else
for
(
i
=
0
;
i
<
N
;
i
++
)
// Create the elements of d=u*G_N ...
{
d
[
i
]
=
0
;
for
(
j
=
0
;
j
<
N
;
j
++
)
// ... looking at all the elements of u
{
d
[
i
]
=
d
[
i
]
^
(
!
(
(
j
-
i
)
&
i
))
*
u
[
j
];
// it's like ((j-i)&i)==0
}
}
#endif
}
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment