Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
O
OpenXG UE
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Michael Black
OpenXG UE
Commits
ef2873a9
Commit
ef2873a9
authored
Jan 04, 2022
by
frtabu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix a warning. fix openCL regression after rebase
parent
5b2e4110
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
295 additions
and
5 deletions
+295
-5
cmake_targets/CMakeLists.txt
cmake_targets/CMakeLists.txt
+2
-2
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder_kernels_CL.c
...ir1/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder_kernels_CL.c
+292
-0
openair1/PHY/LTE_UE_TRANSPORT/pcfich_ue.c
openair1/PHY/LTE_UE_TRANSPORT/pcfich_ue.c
+1
-2
openair1/SCHED_NR_UE/phy_procedures_nr_ue.c
openair1/SCHED_NR_UE/phy_procedures_nr_ue.c
+0
-1
No files found.
cmake_targets/CMakeLists.txt
View file @
ef2873a9
...
...
@@ -1532,8 +1532,8 @@ set(PHY_LDPC_CL_SRC
${
OPENAIR1_DIR
}
/PHY/CODING/nrLDPC_encoder/ldpc_encoder_optim8segmulti.c
)
add_custom_target
(
nrLDPC_decoder_kernels_CL
COMMAND gcc
${
OPENAIR1_DIR
}
/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder_
kernels_
CL.c -dD -DNRLDPC_KERNEL_SOURCE -E -o
${
CMAKE_CURRENT_BINARY_DIR
}
/nrLDPC_decoder_kernels_CL.clc
SOURCES
${
OPENAIR1_DIR
}
/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder_
kernels_
CL.c
COMMAND gcc
${
OPENAIR1_DIR
}
/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder_CL.c -dD -DNRLDPC_KERNEL_SOURCE -E -o
${
CMAKE_CURRENT_BINARY_DIR
}
/nrLDPC_decoder_kernels_CL.clc
SOURCES
${
OPENAIR1_DIR
}
/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder_CL.c
)
set
(
PHY_NR_CODINGIF
...
...
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder_kernels_CL.c
0 → 100644
View file @
ef2873a9
/*
* Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The OpenAirInterface Software Alliance licenses this file to You under
* the OAI Public License, Version 1.0 (the "License"); you may not use this file
* except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.openairinterface.org/?page_id=698
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*-------------------------------------------------------------------------------
* For more information about the OpenAirInterface (OAI) Software Alliance:
* contact@openairinterface.org
*/
/*! \file PHY/CODING/nrLDPC_decoder_kernels_CL.cl
* \brief kernel functions for ldpc decoder accelerated via openCL
* \author Francois TABURET
* \date 2021
* \version 1.0
* \company Nokia BellLabs France
* \email: francois.taburet@nokia-bell-labs.com
* \note initial implem - translation of cuda version
* \warning
*/
//__global char dev_dt [46*68*384];
//__local char *dev_t;
//__global char dev_llr[68*384];
//__global unsigned char dev_tmp[68*384];
#define INT32_MAX 2147483647
//__constant h_element dev_h_compact1[46*19] = {}; // used in kernel 1
//__constant h_element dev_h_compact2[68*30] = {}; // used in kernel 2
// __device__ __constantant__ h_element dev_h_compact1[46*19]; // used in kernel 1
// __device__ __constantant__ h_element dev_h_compact2[68*30]; // used in kernel 2
// row and col element count
__constant
char
h_ele_row_bg1_count
[
46
]
=
{
19
,
19
,
19
,
19
,
3
,
8
,
9
,
7
,
10
,
9
,
7
,
8
,
7
,
6
,
7
,
7
,
6
,
6
,
6
,
6
,
6
,
6
,
5
,
5
,
6
,
5
,
5
,
4
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
4
,
5
,
5
,
4
,
5
,
4
,
5
,
5
,
4
};
__constant
char
h_ele_col_bg1_count
[
68
]
=
{
30
,
28
,
7
,
11
,
9
,
4
,
8
,
12
,
8
,
7
,
12
,
10
,
12
,
11
,
10
,
7
,
10
,
10
,
13
,
7
,
8
,
11
,
12
,
5
,
6
,
6
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
};
__constant
char
h_ele_row_bg2_count
[
42
]
=
{
8
,
10
,
8
,
10
,
4
,
6
,
6
,
6
,
4
,
5
,
5
,
5
,
4
,
5
,
5
,
4
,
5
,
5
,
4
,
4
,
4
,
4
,
3
,
4
,
4
,
3
,
5
,
3
,
4
,
3
,
5
,
3
,
4
,
4
,
4
,
4
,
4
,
3
,
4
,
4
,
4
,
4
};
__constant
char
h_ele_col_bg2_count
[
52
]
=
{
22
,
23
,
10
,
5
,
5
,
14
,
7
,
13
,
6
,
8
,
9
,
16
,
9
,
12
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
};
// Kernel 1
__kernel
void
ldpc_cnp_kernel_1st_iter
(
__global
char
*
dev_llr
,
__global
char
*
dev_dt
,
__local
h_element
*
dev_h_compact1
,
int
BG
,
int
row
,
int
col
,
int
Zc
)
{
// int iMCW = blockIdx.y; // codeword id
// int iBlkRow = blockIdx.x; // block row in h_base
// int iSubRow = threadIdx.x; // row index in sub_block of h_base
// if(blockIdx.x == 0 && threadIdx.x == 1) printf("cnp %d\n", threadIdx.x);
int
iMCW
=
get_group_id
(
1
);
// codeword id
int
iBlkRow
=
get_group_id
(
0
);
// block row in h_base
int
iBlkCol
;
// block col in h_base
int
iSubRow
=
get_local_id
(
0
);;
// row index in sub_block of h_base
int
iCol
;
// overall col index in h_base
int
offsetR
;
int
shift_t
;
// For 2-min algorithm.
int
Q_sign
=
0
;
int
sq
;
int
Q
,
Q_abs
;
int
R_temp
;
int
sign
=
1
;
int
rmin1
=
INT32_MAX
;
int
rmin2
=
INT32_MAX
;
char
idx_min
=
0
;
h_element
h_element_t
;
int
s
=
(
BG
==
1
)
?
h_ele_row_bg1_count
[
iBlkRow
]
:
h_ele_row_bg2_count
[
iBlkRow
];
offsetR
=
(
iMCW
*
row
*
col
*
Zc
)
+
iBlkRow
*
Zc
+
iSubRow
;
// row*col*Zc = size of dev_dt
// if(blockIdx.x == 0 && threadIdx.x == 1) printf("s: %d, offset %d\n", s, offsetR);
// The 1st recursion
for
(
int
i
=
0
;
i
<
s
;
i
++
)
// loop through all the ZxZ sub-blocks in a row
{
h_element_t
=
dev_h_compact1
[
i
*
row
+
iBlkRow
];
// compact_col == row
iBlkCol
=
h_element_t
.
y
;
shift_t
=
h_element_t
.
value
;
shift_t
=
(
iSubRow
+
shift_t
)
%
Zc
;
iCol
=
(
iMCW
*
col
*
Zc
)
+
iBlkCol
*
Zc
+
shift_t
;
// col*Zc = size of llr
Q
=
dev_llr
[
iCol
];
Q_abs
=
(
Q
>
0
)
?
Q
:
-
Q
;
sq
=
Q
<
0
;
// if(blockIdx.x == 0 && threadIdx.x == 1) printf("i %d, icol %d, Q: %d\n", i, iCol, Q);
// quick version
sign
=
sign
*
(
1
-
sq
*
2
);
Q_sign
|=
sq
<<
i
;
if
(
Q_abs
<
rmin1
){
rmin2
=
rmin1
;
rmin1
=
Q_abs
;
idx_min
=
i
;
}
else
if
(
Q_abs
<
rmin2
){
rmin2
=
Q_abs
;
}
}
// if(blockIdx.x == 0 && threadIdx.x == 1)printf("min1 %d, min2 %d, min1_idx %d\n", rmin1, rmin2, idx_min);
// The 2nd recursion
for
(
int
i
=
0
;
i
<
s
;
i
++
){
// v0: Best performance so far. 0.75f is the value of alpha.
sq
=
1
-
2
*
((
Q_sign
>>
i
)
&
0x01
);
R_temp
=
0
.
75
f
*
sign
*
sq
*
(
i
!=
idx_min
?
rmin1
:
rmin2
);
// write results to global memory
h_element_t
=
dev_h_compact1
[
i
*
row
+
iBlkRow
];
int
addr_temp
=
offsetR
+
h_element_t
.
y
*
row
*
Zc
;
dev_dt
[
addr_temp
]
=
R_temp
;
// if(blockIdx.x == 0 && threadIdx.x == 1)printf("R_temp %d, temp_addr %d\n", R_temp, addr_temp);
}
}
// Kernel_1
__kernel
void
ldpc_cnp_kernel
(
__global
char
*
dev_llr
,
__global
char
*
dev_dt
,
__local
h_element
*
dev_h_compact1
,
int
BG
,
int
row
,
int
col
,
int
Zc
)
{
// if(blockIdx.x == 0 && threadIdx.x == 1) printf("cnp\n");
// int iMCW = blockIdx.y;
// int iBlkRow = blockIdx.x; // block row in h_base // block col in h_base
// int iSubRow = threadIdx.x; // row index in sub_block of h_base
int
iMCW
=
get_group_id
(
1
);
int
iBlkRow
=
get_group_id
(
0
);
// block row in h_base
int
iBlkCol
;
// block col in h_base
int
iSubRow
=
get_local_id
(
0
);;
// row index in sub_block of h_base
int
iCol
;
// overall col index in h_base
int
offsetR
;
int
shift_t
;
// For 2-min algorithm.
int
Q_sign
=
0
;
int
sq
;
int
Q
,
Q_abs
;
int
R_temp
;
int
sign
=
1
;
int
rmin1
=
INT32_MAX
;
int
rmin2
=
INT32_MAX
;
char
idx_min
=
0
;
h_element
h_element_t
;
int
s
=
(
BG
==
1
)
?
h_ele_row_bg1_count
[
iBlkRow
]
:
h_ele_row_bg2_count
[
iBlkRow
];
offsetR
=
(
iMCW
*
row
*
col
*
Zc
)
+
iBlkRow
*
Zc
+
iSubRow
;
// row * col * Zc = size of dev_dt
// if(blockIdx.x == 0 && threadIdx.x == 1) printf("s: %d, offset %d\n", s, offsetR);
// The 1st recursion
for
(
int
i
=
0
;
i
<
s
;
i
++
)
// loop through all the ZxZ sub-blocks in a row
{
h_element_t
=
dev_h_compact1
[
i
*
row
+
iBlkRow
];
iBlkCol
=
h_element_t
.
y
;
shift_t
=
h_element_t
.
value
;
shift_t
=
(
iSubRow
+
shift_t
)
%
Zc
;
iCol
=
iBlkCol
*
Zc
+
shift_t
;
R_temp
=
dev_dt
[
offsetR
+
iBlkCol
*
row
*
Zc
];
Q
=
dev_llr
[
iMCW
*
(
col
*
Zc
)
+
iCol
]
-
R_temp
;
Q_abs
=
(
Q
>
0
)
?
Q
:
-
Q
;
// if(blockIdx.x == 0 && threadIdx.x == 1) printf("i %d, icol %d, Q: %d\n", i, iCol, Q);
sq
=
Q
<
0
;
sign
=
sign
*
(
1
-
sq
*
2
);
Q_sign
|=
sq
<<
i
;
if
(
Q_abs
<
rmin1
){
rmin2
=
rmin1
;
rmin1
=
Q_abs
;
idx_min
=
i
;
}
else
if
(
Q_abs
<
rmin2
){
rmin2
=
Q_abs
;
}
}
// if(blockIdx.x == 0 && threadIdx.x == 1)printf("min1 %d, min2 %d, min1_idx %d\n", rmin1, rmin2, idx_min);
// The 2nd recursion
for
(
int
i
=
0
;
i
<
s
;
i
++
){
sq
=
1
-
2
*
((
Q_sign
>>
i
)
&
0x01
);
R_temp
=
0
.
75
f
*
sign
*
sq
*
(
i
!=
idx_min
?
rmin1
:
rmin2
);
// write results to global memory
h_element_t
=
dev_h_compact1
[
i
*
row
+
iBlkRow
];
int
addr_temp
=
h_element_t
.
y
*
row
*
Zc
+
offsetR
;
dev_dt
[
addr_temp
]
=
R_temp
;
// if(blockIdx.x == 0 && threadIdx.x == 1)printf("R_temp %d, temp_addr %d\n", R_temp, addr_temp);
}
}
// Kernel 2: VNP processing
__kernel
void
ldpc_vnp_kernel_normal
(
__global
char
*
dev_llr
,
__global
char
*
dev_dt
,
__global
char
*
dev_const_llr
,
__local
h_element
*
dev_h_compact2
,
int
BG
,
int
row
,
int
col
,
int
Zc
)
{
// int iMCW = blockIdx.y;
// int iBlkCol = blockIdx.x;
// int iSubCol = threadIdx.x;
int
iMCW
=
get_group_id
(
1
);
int
iBlkCol
=
get_group_id
(
0
);
int
iBlkRow
;
int
iSubCol
=
get_local_id
(
0
);
int
iRow
;
int
iCol
;
int
shift_t
,
sf
;
int
APP
;
h_element
h_element_t
;
// update all the llr values
iCol
=
iBlkCol
*
Zc
+
iSubCol
;
APP
=
dev_const_llr
[
iMCW
*
col
*
Zc
+
iCol
];
int
offsetDt
=
iMCW
*
row
*
col
*
Zc
+
iBlkCol
*
row
*
Zc
;
int
s
=
(
BG
==
1
)
?
h_ele_col_bg1_count
[
iBlkCol
]
:
h_ele_col_bg2_count
[
iBlkCol
];
for
(
int
i
=
0
;
i
<
s
;
i
++
)
{
h_element_t
=
dev_h_compact2
[
i
*
col
+
iBlkCol
];
shift_t
=
h_element_t
.
value
%
Zc
;
iBlkRow
=
h_element_t
.
x
;
sf
=
iSubCol
-
shift_t
;
sf
=
(
sf
+
Zc
)
%
Zc
;
iRow
=
iBlkRow
*
Zc
+
sf
;
APP
=
APP
+
dev_dt
[
offsetDt
+
iRow
];
}
if
(
APP
>
SCHAR_MAX
)
APP
=
SCHAR_MAX
;
if
(
APP
<
SCHAR_MIN
)
APP
=
SCHAR_MIN
;
// write back to device global memory
dev_llr
[
iMCW
*
col
*
Zc
+
iCol
]
=
APP
;
}
__kernel
void
pack_decoded_bit
(
__global
unsigned
char
*
dev_llr
,
__global
unsigned
char
*
dev_tmp
,
int
col
,
int
Zc
)
{
// int iMCW = blockIdx.y;
// int btid = threadIdx.x;
unsigned
char
tmp
[
128
];
int
iMCW
=
get_group_id
(
1
);
int
btid
=
get_local_id
(
0
);
int
tid
=
iMCW
*
col
*
Zc
+
get_group_id
(
0
)
*
128
+
btid
;
tmp
[
btid
]
=
0
;
if
(
dev_llr
[
tid
]
<
0
){
tmp
[
btid
]
=
1
<<
(
7
-
(
btid
&
7
));
}
// __syncthreads();
if
(
btid
<
16
){
dev_tmp
[
iMCW
*
col
*
Zc
+
get_group_id
(
0
)
*
16
+
btid
]
=
0
;
for
(
int
i
=
0
;
i
<
8
;
i
++
){
dev_tmp
[
iMCW
*
col
*
Zc
+
get_group_id
(
0
)
*
16
+
btid
]
+=
tmp
[
btid
*
8
+
i
];
}
}
}
openair1/PHY/LTE_UE_TRANSPORT/pcfich_ue.c
View file @
ef2873a9
...
...
@@ -46,10 +46,9 @@ void pcfich_unscrambling(LTE_DL_FRAME_PARMS *frame_parms,
uint32_t
i
;
uint8_t
reset
;
uint32_t
x1
,
x2
,
s
=
0
;
uint32_t
x1
=
0
,
x2
,
s
=
0
;
// x1 is set in lte_gold_generic
reset
=
1
;
// x1 is set in lte_gold_generic
x2
=
((((
2
*
frame_parms
->
Nid_cell
)
+
1
)
*
(
1
+
subframe
))
<<
9
)
+
frame_parms
->
Nid_cell
;
//this is c_init in 36.211 Sec 6.7.1
for
(
i
=
0
;
i
<
32
;
i
++
)
{
...
...
openair1/SCHED_NR_UE/phy_procedures_nr_ue.c
View file @
ef2873a9
...
...
@@ -1085,7 +1085,6 @@ bool nr_ue_dlsch_procedures(PHY_VARS_NR_UE *ue,
LOG_D
(
PHY
,
"AbsSubframe %d.%d --> ldpc Decoding for CW1 %5.3f
\n
"
,
frame_rx
%
1024
,
nr_slot_rx
,(
ue
->
dlsch_decoding_stats
[
proc
->
thread_id
].
p_time
)
/
(
cpuf
*
1000
.
0
));
}
}
LOG_D
(
PHY
,
"harq_pid: %d, TBS expected dlsch1: %d
\n
"
,
harq_pid
,
dlsch1
->
harq_processes
[
harq_pid
]
->
TBS
);
}
// send to mac
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment