Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
O
OpenXG-RAN
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
wangwenhui
OpenXG-RAN
Commits
d4fbc3a1
Commit
d4fbc3a1
authored
Jun 18, 2018
by
Martino
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Final version of kernal function, now we save 320 us with the intrinsics
parent
4626a85d
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
33 additions
and
37 deletions
+33
-37
openair1/PHY/CODING/nrPolar_tools/nr_polar_kernal_operation.c
...air1/PHY/CODING/nrPolar_tools/nr_polar_kernal_operation.c
+33
-37
No files found.
openair1/PHY/CODING/nrPolar_tools/nr_polar_kernal_operation.c
View file @
d4fbc3a1
...
@@ -11,69 +11,65 @@ void nr_polar_kernal_operation(uint8_t *u, uint8_t *d, uint16_t N)
...
@@ -11,69 +11,65 @@ void nr_polar_kernal_operation(uint8_t *u, uint8_t *d, uint16_t N)
uint32_t
i
,
j
;
uint32_t
i
,
j
;
for
(
i
=
0
;
i
<
N
;
i
++
)
// Create the elements of d=u*G_N ...
#ifdef __AVX2__
{
d
[
i
]
=
0
;
for
(
j
=
0
;
j
<
N
;
j
++
)
// ... looking at all the elements of u
{
d
[
i
]
=
d
[
i
]
^
(
!
(
(
j
-
i
)
&
i
))
*
u
[
j
];
}
}
__m256i
A
,
B
,
C
,
D
,
E
,
U
,
zerosOnly
,
OUT
;
/*
* It works, but there are too many moves from memory and it's slow. With AVX-512 it could be done faster
*
__m256i A,B,C,E,U, OUT;
__m256i
inc
;
__m256i
inc
;
uint32_t
dTest
[
8
];
uint32_t
dTest
[
8
];
uint32_t
uArray
[
8
];
uint32_t
uArray
[
8
];
uint32_t
k
;
uint32_t
k
;
uint32_t toCheck[8];
uint32_t
incArray
[
8
];
uint32_t
incArray
[
8
];
//initialisation
for
(
k
=
0
;
k
<
8
;
k
++
)
for
(
k
=
0
;
k
<
8
;
k
++
)
incArray[k]=k; //0,1, ... 7
incArray
[
k
]
=
k
;
inc
=
_mm256_loadu_si256
((
__m256i
const
*
)
incArray
);
// 0, 1, ..., 7 to increase
inc=_mm256_loadu_si256((__m256i const*)incArray);
zerosOnly
=
_mm256_setzero_si256
();
// for comparison
for
(
i
=
0
;
i
<
N
;
i
+=
8
)
for
(
i
=
0
;
i
<
N
;
i
+=
8
)
{
{
B
=
_mm256_set1_epi32
((
int
)
i
);
// i, ..., i
B
=
_mm256_add_epi32
(
B
,
inc
);
// i, i+1, ..., i+7
B=_mm256_set1_epi32((int)i); // i, ... i
OUT
=
_mm256_setzero_si256
();
// it will contain the result of all the XORs for the d(i)s
B=_mm256_add_epi32(B, inc); //i, i+1, ... i+7
OUT=_mm256_setzero_si256();
for
(
j
=
0
;
j
<
N
;
j
++
)
for
(
j
=
0
;
j
<
N
;
j
++
)
{
{
//initialisation
A
=
_mm256_set1_epi32
((
int
)(
j
));
//j, j, ..., j
A=_mm256_set1_epi32((int)(j)); //j ...
A
=
_mm256_sub_epi32
(
A
,
B
);
//(j-i), (j-(i+1)), ... (j-(i+7))
A
=
_mm256_sub_epi32
(
A
,
B
);
//(j-i), (j-(i+1)), ... (j-(i+7))
U
=
_mm256_set1_epi32
((
int
)
u
[
j
]);
U
=
_mm256_set1_epi32
((
int
)
u
[
j
]);
_mm256_storeu_si256
((
__m256i
*
)
uArray
,
U
);
//u(j) ... u(j) for the maskload
_mm256_storeu_si256
((
__m256i
*
)
uArray
,
U
);
//u(j) ... u(j) for the maskload
C=_mm256_and_si256(A, B); //mask: if zero, then add
C
=
_mm256_and_si256
(
A
,
B
);
//(j-i)&i -> If zero, then XOR with the u(j)
D
=
_mm256_cmpeq_epi32
(
C
,
zerosOnly
);
// compare with zero and use the result as mask
_mm256_storeu_si256((__m256i*)toCheck, C);
E
=
_mm256_maskload_epi32
((
int
const
*
)
uArray
,
D
);
// load only some u(j)s for the XOR
for(k=0; k<8; k++)
{
toCheck[k]=!toCheck[k] << 31;
}
C=_mm256_loadu_si256((__m256i const*)toCheck); //mask: if 1, add
E=_mm256_maskload_epi32((int const*)uArray, C);
OUT
=
_mm256_xor_si256
(
OUT
,
E
);
//32 bit x 8
OUT
=
_mm256_xor_si256
(
OUT
,
E
);
//32 bit x 8
}
}
_mm256_storeu_si256
((
__m256i
*
)
dTest
,
OUT
);
_mm256_storeu_si256
((
__m256i
*
)
dTest
,
OUT
);
for(k=0; k<8; k++)
for
(
k
=
0
;
k
<
8
;
k
++
)
// Conversion from 32 bits to 8 bits
{
{
d[i+k]=(uint8_t)dTest[k]; //
Conv from 32 to 8
d
[
i
+
k
]
=
(
uint8_t
)
dTest
[
k
];
//
With AVX512 there is an intrinsic to do it
}
}
}
}
*/
#else
for
(
i
=
0
;
i
<
N
;
i
++
)
// Create the elements of d=u*G_N ...
{
d
[
i
]
=
0
;
for
(
j
=
0
;
j
<
N
;
j
++
)
// ... looking at all the elements of u
{
d
[
i
]
=
d
[
i
]
^
(
!
(
(
j
-
i
)
&
i
))
*
u
[
j
];
// it's like ((j-i)&i)==0
}
}
#endif
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment