Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
O
OpenXG-RAN
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Michael Black
OpenXG-RAN
Commits
a7b6c4bc
Commit
a7b6c4bc
authored
Jan 04, 2022
by
frtabu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix a warning. fix openCL regression after rebase
parent
1dc47e5d
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
295 additions
and
5 deletions
+295
-5
cmake_targets/CMakeLists.txt
cmake_targets/CMakeLists.txt
+2
-2
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder_kernels_CL.c
...ir1/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder_kernels_CL.c
+292
-0
openair1/PHY/LTE_UE_TRANSPORT/pcfich_ue.c
openair1/PHY/LTE_UE_TRANSPORT/pcfich_ue.c
+1
-2
openair1/SCHED_NR_UE/phy_procedures_nr_ue.c
openair1/SCHED_NR_UE/phy_procedures_nr_ue.c
+0
-1
No files found.
cmake_targets/CMakeLists.txt
View file @
a7b6c4bc
...
...
@@ -1532,8 +1532,8 @@ set(PHY_LDPC_CL_SRC
${
OPENAIR1_DIR
}
/PHY/CODING/nrLDPC_encoder/ldpc_encoder_optim8segmulti.c
)
add_custom_target
(
nrLDPC_decoder_kernels_CL
COMMAND gcc
${
OPENAIR1_DIR
}
/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder_
kernels_
CL.c -dD -DNRLDPC_KERNEL_SOURCE -E -o
${
CMAKE_CURRENT_BINARY_DIR
}
/nrLDPC_decoder_kernels_CL.clc
SOURCES
${
OPENAIR1_DIR
}
/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder_
kernels_
CL.c
COMMAND gcc
${
OPENAIR1_DIR
}
/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder_CL.c -dD -DNRLDPC_KERNEL_SOURCE -E -o
${
CMAKE_CURRENT_BINARY_DIR
}
/nrLDPC_decoder_kernels_CL.clc
SOURCES
${
OPENAIR1_DIR
}
/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder_CL.c
)
set
(
PHY_NR_CODINGIF
...
...
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder_kernels_CL.c
0 → 100644
View file @
a7b6c4bc
/*
* Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The OpenAirInterface Software Alliance licenses this file to You under
* the OAI Public License, Version 1.0 (the "License"); you may not use this file
* except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.openairinterface.org/?page_id=698
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*-------------------------------------------------------------------------------
* For more information about the OpenAirInterface (OAI) Software Alliance:
* contact@openairinterface.org
*/
/*! \file PHY/CODING/nrLDPC_decoder_kernels_CL.cl
* \brief kernel functions for ldpc decoder accelerated via openCL
* \author Francois TABURET
* \date 2021
* \version 1.0
* \company Nokia BellLabs France
* \email: francois.taburet@nokia-bell-labs.com
* \note initial implem - translation of cuda version
* \warning
*/
//__global char dev_dt [46*68*384];
//__local char *dev_t;
//__global char dev_llr[68*384];
//__global unsigned char dev_tmp[68*384];
#define INT32_MAX 2147483647
//__constant h_element dev_h_compact1[46*19] = {}; // used in kernel 1
//__constant h_element dev_h_compact2[68*30] = {}; // used in kernel 2
// __device__ __constantant__ h_element dev_h_compact1[46*19]; // used in kernel 1
// __device__ __constantant__ h_element dev_h_compact2[68*30]; // used in kernel 2
// row and col element count
__constant
char
h_ele_row_bg1_count
[
46
]
=
{
19
,
19
,
19
,
19
,
3
,
8
,
9
,
7
,
10
,
9
,
7
,
8
,
7
,
6
,
7
,
7
,
6
,
6
,
6
,
6
,
6
,
6
,
5
,
5
,
6
,
5
,
5
,
4
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
4
,
5
,
5
,
4
,
5
,
4
,
5
,
5
,
4
};
__constant
char
h_ele_col_bg1_count
[
68
]
=
{
30
,
28
,
7
,
11
,
9
,
4
,
8
,
12
,
8
,
7
,
12
,
10
,
12
,
11
,
10
,
7
,
10
,
10
,
13
,
7
,
8
,
11
,
12
,
5
,
6
,
6
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
};
__constant
char
h_ele_row_bg2_count
[
42
]
=
{
8
,
10
,
8
,
10
,
4
,
6
,
6
,
6
,
4
,
5
,
5
,
5
,
4
,
5
,
5
,
4
,
5
,
5
,
4
,
4
,
4
,
4
,
3
,
4
,
4
,
3
,
5
,
3
,
4
,
3
,
5
,
3
,
4
,
4
,
4
,
4
,
4
,
3
,
4
,
4
,
4
,
4
};
__constant
char
h_ele_col_bg2_count
[
52
]
=
{
22
,
23
,
10
,
5
,
5
,
14
,
7
,
13
,
6
,
8
,
9
,
16
,
9
,
12
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
};
// Kernel 1
__kernel
void
ldpc_cnp_kernel_1st_iter
(
__global
char
*
dev_llr
,
__global
char
*
dev_dt
,
__local
h_element
*
dev_h_compact1
,
int
BG
,
int
row
,
int
col
,
int
Zc
)
{
// int iMCW = blockIdx.y; // codeword id
// int iBlkRow = blockIdx.x; // block row in h_base
// int iSubRow = threadIdx.x; // row index in sub_block of h_base
// if(blockIdx.x == 0 && threadIdx.x == 1) printf("cnp %d\n", threadIdx.x);
int
iMCW
=
get_group_id
(
1
);
// codeword id
int
iBlkRow
=
get_group_id
(
0
);
// block row in h_base
int
iBlkCol
;
// block col in h_base
int
iSubRow
=
get_local_id
(
0
);;
// row index in sub_block of h_base
int
iCol
;
// overall col index in h_base
int
offsetR
;
int
shift_t
;
// For 2-min algorithm.
int
Q_sign
=
0
;
int
sq
;
int
Q
,
Q_abs
;
int
R_temp
;
int
sign
=
1
;
int
rmin1
=
INT32_MAX
;
int
rmin2
=
INT32_MAX
;
char
idx_min
=
0
;
h_element
h_element_t
;
int
s
=
(
BG
==
1
)
?
h_ele_row_bg1_count
[
iBlkRow
]
:
h_ele_row_bg2_count
[
iBlkRow
];
offsetR
=
(
iMCW
*
row
*
col
*
Zc
)
+
iBlkRow
*
Zc
+
iSubRow
;
// row*col*Zc = size of dev_dt
// if(blockIdx.x == 0 && threadIdx.x == 1) printf("s: %d, offset %d\n", s, offsetR);
// The 1st recursion
for
(
int
i
=
0
;
i
<
s
;
i
++
)
// loop through all the ZxZ sub-blocks in a row
{
h_element_t
=
dev_h_compact1
[
i
*
row
+
iBlkRow
];
// compact_col == row
iBlkCol
=
h_element_t
.
y
;
shift_t
=
h_element_t
.
value
;
shift_t
=
(
iSubRow
+
shift_t
)
%
Zc
;
iCol
=
(
iMCW
*
col
*
Zc
)
+
iBlkCol
*
Zc
+
shift_t
;
// col*Zc = size of llr
Q
=
dev_llr
[
iCol
];
Q_abs
=
(
Q
>
0
)
?
Q
:
-
Q
;
sq
=
Q
<
0
;
// if(blockIdx.x == 0 && threadIdx.x == 1) printf("i %d, icol %d, Q: %d\n", i, iCol, Q);
// quick version
sign
=
sign
*
(
1
-
sq
*
2
);
Q_sign
|=
sq
<<
i
;
if
(
Q_abs
<
rmin1
){
rmin2
=
rmin1
;
rmin1
=
Q_abs
;
idx_min
=
i
;
}
else
if
(
Q_abs
<
rmin2
){
rmin2
=
Q_abs
;
}
}
// if(blockIdx.x == 0 && threadIdx.x == 1)printf("min1 %d, min2 %d, min1_idx %d\n", rmin1, rmin2, idx_min);
// The 2nd recursion
for
(
int
i
=
0
;
i
<
s
;
i
++
){
// v0: Best performance so far. 0.75f is the value of alpha.
sq
=
1
-
2
*
((
Q_sign
>>
i
)
&
0x01
);
R_temp
=
0
.
75
f
*
sign
*
sq
*
(
i
!=
idx_min
?
rmin1
:
rmin2
);
// write results to global memory
h_element_t
=
dev_h_compact1
[
i
*
row
+
iBlkRow
];
int
addr_temp
=
offsetR
+
h_element_t
.
y
*
row
*
Zc
;
dev_dt
[
addr_temp
]
=
R_temp
;
// if(blockIdx.x == 0 && threadIdx.x == 1)printf("R_temp %d, temp_addr %d\n", R_temp, addr_temp);
}
}
// Kernel_1
__kernel
void
ldpc_cnp_kernel
(
__global
char
*
dev_llr
,
__global
char
*
dev_dt
,
__local
h_element
*
dev_h_compact1
,
int
BG
,
int
row
,
int
col
,
int
Zc
)
{
// if(blockIdx.x == 0 && threadIdx.x == 1) printf("cnp\n");
// int iMCW = blockIdx.y;
// int iBlkRow = blockIdx.x; // block row in h_base // block col in h_base
// int iSubRow = threadIdx.x; // row index in sub_block of h_base
int
iMCW
=
get_group_id
(
1
);
int
iBlkRow
=
get_group_id
(
0
);
// block row in h_base
int
iBlkCol
;
// block col in h_base
int
iSubRow
=
get_local_id
(
0
);;
// row index in sub_block of h_base
int
iCol
;
// overall col index in h_base
int
offsetR
;
int
shift_t
;
// For 2-min algorithm.
int
Q_sign
=
0
;
int
sq
;
int
Q
,
Q_abs
;
int
R_temp
;
int
sign
=
1
;
int
rmin1
=
INT32_MAX
;
int
rmin2
=
INT32_MAX
;
char
idx_min
=
0
;
h_element
h_element_t
;
int
s
=
(
BG
==
1
)
?
h_ele_row_bg1_count
[
iBlkRow
]
:
h_ele_row_bg2_count
[
iBlkRow
];
offsetR
=
(
iMCW
*
row
*
col
*
Zc
)
+
iBlkRow
*
Zc
+
iSubRow
;
// row * col * Zc = size of dev_dt
// if(blockIdx.x == 0 && threadIdx.x == 1) printf("s: %d, offset %d\n", s, offsetR);
// The 1st recursion
for
(
int
i
=
0
;
i
<
s
;
i
++
)
// loop through all the ZxZ sub-blocks in a row
{
h_element_t
=
dev_h_compact1
[
i
*
row
+
iBlkRow
];
iBlkCol
=
h_element_t
.
y
;
shift_t
=
h_element_t
.
value
;
shift_t
=
(
iSubRow
+
shift_t
)
%
Zc
;
iCol
=
iBlkCol
*
Zc
+
shift_t
;
R_temp
=
dev_dt
[
offsetR
+
iBlkCol
*
row
*
Zc
];
Q
=
dev_llr
[
iMCW
*
(
col
*
Zc
)
+
iCol
]
-
R_temp
;
Q_abs
=
(
Q
>
0
)
?
Q
:
-
Q
;
// if(blockIdx.x == 0 && threadIdx.x == 1) printf("i %d, icol %d, Q: %d\n", i, iCol, Q);
sq
=
Q
<
0
;
sign
=
sign
*
(
1
-
sq
*
2
);
Q_sign
|=
sq
<<
i
;
if
(
Q_abs
<
rmin1
){
rmin2
=
rmin1
;
rmin1
=
Q_abs
;
idx_min
=
i
;
}
else
if
(
Q_abs
<
rmin2
){
rmin2
=
Q_abs
;
}
}
// if(blockIdx.x == 0 && threadIdx.x == 1)printf("min1 %d, min2 %d, min1_idx %d\n", rmin1, rmin2, idx_min);
// The 2nd recursion
for
(
int
i
=
0
;
i
<
s
;
i
++
){
sq
=
1
-
2
*
((
Q_sign
>>
i
)
&
0x01
);
R_temp
=
0
.
75
f
*
sign
*
sq
*
(
i
!=
idx_min
?
rmin1
:
rmin2
);
// write results to global memory
h_element_t
=
dev_h_compact1
[
i
*
row
+
iBlkRow
];
int
addr_temp
=
h_element_t
.
y
*
row
*
Zc
+
offsetR
;
dev_dt
[
addr_temp
]
=
R_temp
;
// if(blockIdx.x == 0 && threadIdx.x == 1)printf("R_temp %d, temp_addr %d\n", R_temp, addr_temp);
}
}
// Kernel 2: VNP processing
__kernel
void
ldpc_vnp_kernel_normal
(
__global
char
*
dev_llr
,
__global
char
*
dev_dt
,
__global
char
*
dev_const_llr
,
__local
h_element
*
dev_h_compact2
,
int
BG
,
int
row
,
int
col
,
int
Zc
)
{
// int iMCW = blockIdx.y;
// int iBlkCol = blockIdx.x;
// int iSubCol = threadIdx.x;
int
iMCW
=
get_group_id
(
1
);
int
iBlkCol
=
get_group_id
(
0
);
int
iBlkRow
;
int
iSubCol
=
get_local_id
(
0
);
int
iRow
;
int
iCol
;
int
shift_t
,
sf
;
int
APP
;
h_element
h_element_t
;
// update all the llr values
iCol
=
iBlkCol
*
Zc
+
iSubCol
;
APP
=
dev_const_llr
[
iMCW
*
col
*
Zc
+
iCol
];
int
offsetDt
=
iMCW
*
row
*
col
*
Zc
+
iBlkCol
*
row
*
Zc
;
int
s
=
(
BG
==
1
)
?
h_ele_col_bg1_count
[
iBlkCol
]
:
h_ele_col_bg2_count
[
iBlkCol
];
for
(
int
i
=
0
;
i
<
s
;
i
++
)
{
h_element_t
=
dev_h_compact2
[
i
*
col
+
iBlkCol
];
shift_t
=
h_element_t
.
value
%
Zc
;
iBlkRow
=
h_element_t
.
x
;
sf
=
iSubCol
-
shift_t
;
sf
=
(
sf
+
Zc
)
%
Zc
;
iRow
=
iBlkRow
*
Zc
+
sf
;
APP
=
APP
+
dev_dt
[
offsetDt
+
iRow
];
}
if
(
APP
>
SCHAR_MAX
)
APP
=
SCHAR_MAX
;
if
(
APP
<
SCHAR_MIN
)
APP
=
SCHAR_MIN
;
// write back to device global memory
dev_llr
[
iMCW
*
col
*
Zc
+
iCol
]
=
APP
;
}
__kernel
void
pack_decoded_bit
(
__global
unsigned
char
*
dev_llr
,
__global
unsigned
char
*
dev_tmp
,
int
col
,
int
Zc
)
{
// int iMCW = blockIdx.y;
// int btid = threadIdx.x;
unsigned
char
tmp
[
128
];
int
iMCW
=
get_group_id
(
1
);
int
btid
=
get_local_id
(
0
);
int
tid
=
iMCW
*
col
*
Zc
+
get_group_id
(
0
)
*
128
+
btid
;
tmp
[
btid
]
=
0
;
if
(
dev_llr
[
tid
]
<
0
){
tmp
[
btid
]
=
1
<<
(
7
-
(
btid
&
7
));
}
// __syncthreads();
if
(
btid
<
16
){
dev_tmp
[
iMCW
*
col
*
Zc
+
get_group_id
(
0
)
*
16
+
btid
]
=
0
;
for
(
int
i
=
0
;
i
<
8
;
i
++
){
dev_tmp
[
iMCW
*
col
*
Zc
+
get_group_id
(
0
)
*
16
+
btid
]
+=
tmp
[
btid
*
8
+
i
];
}
}
}
openair1/PHY/LTE_UE_TRANSPORT/pcfich_ue.c
View file @
a7b6c4bc
...
...
@@ -46,10 +46,9 @@ void pcfich_unscrambling(LTE_DL_FRAME_PARMS *frame_parms,
uint32_t
i
;
uint8_t
reset
;
uint32_t
x1
,
x2
,
s
=
0
;
uint32_t
x1
=
0
,
x2
,
s
=
0
;
// x1 is set in lte_gold_generic
reset
=
1
;
// x1 is set in lte_gold_generic
x2
=
((((
2
*
frame_parms
->
Nid_cell
)
+
1
)
*
(
1
+
subframe
))
<<
9
)
+
frame_parms
->
Nid_cell
;
//this is c_init in 36.211 Sec 6.7.1
for
(
i
=
0
;
i
<
32
;
i
++
)
{
...
...
openair1/SCHED_NR_UE/phy_procedures_nr_ue.c
View file @
a7b6c4bc
...
...
@@ -1085,7 +1085,6 @@ bool nr_ue_dlsch_procedures(PHY_VARS_NR_UE *ue,
LOG_D
(
PHY
,
"AbsSubframe %d.%d --> ldpc Decoding for CW1 %5.3f
\n
"
,
frame_rx
%
1024
,
nr_slot_rx
,(
ue
->
dlsch_decoding_stats
[
proc
->
thread_id
].
p_time
)
/
(
cpuf
*
1000
.
0
));
}
}
LOG_D
(
PHY
,
"harq_pid: %d, TBS expected dlsch1: %d
\n
"
,
harq_pid
,
dlsch1
->
harq_processes
[
harq_pid
]
->
TBS
);
}
// send to mac
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment