Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
O
OpenXG-RAN
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Michael Black
OpenXG-RAN
Commits
8dd5346e
Commit
8dd5346e
authored
Oct 27, 2021
by
frtabu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fixes after rebase
parent
68d415fd
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
149 additions
and
41 deletions
+149
-41
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder_CL.c
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder_CL.c
+138
-33
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder_kernels_CL.cl
...r1/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder_kernels_CL.cl
+11
-8
No files found.
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder_CL.c
View file @
8dd5346e
...
...
@@ -51,6 +51,7 @@ typedef struct{
char
y
;
short
value
;
}
h_element
;
#include "../nrLDPC_decoder_LYC/bgs/BG1_compact_in_C.h"
typedef
struct
{
cl_uint
max_CU
;
...
...
@@ -71,6 +72,8 @@ typedef struct{
ocldev_t
ocldev
[
MAX_OCLDEV
];
cl_context
context
;
cl_program
program
;
cl_mem
dev_h_compact1
;
cl_mem
dev_h_compact2
;
cl_mem
dev_const_llr
;
cl_mem
dev_llr
;
cl_mem
dev_dt
;
...
...
@@ -87,12 +90,10 @@ typedef struct{
ocl_t
ocl
;
void
init_LLR_DMA
(
t_nrLDPC_dec_params
*
p_decParams
,
int8_t
*
p_llr
,
int8_t
*
p_out
){
uint16_t
Zc
=
p_decParams
->
Z
;
uint8_t
BG
=
p_decParams
->
BG
;
int
block_length
=
p_decParams
->
block_length
;
uint8_t
row
,
col
;
void
set_compact_BG
(
int
Zc
,
short
BG
){
cl_uint
rt
;
int
row
,
col
;
if
(
BG
==
1
){
row
=
46
;
col
=
68
;
...
...
@@ -101,12 +102,81 @@ void init_LLR_DMA(t_nrLDPC_dec_params* p_decParams, int8_t* p_llr, int8_t* p_out
row
=
42
;
col
=
52
;
}
unsigned
char
*
hard_decision
=
(
unsigned
char
*
)
p_out
;
int
memorySize_llr_cuda
=
col
*
Zc
*
sizeof
(
char
)
*
MC
;
// cudaCheck( cudaMemcpyToSymbol(dev_const_llr, p_llr, memorySize_llr_cuda) );
// cudaCheck( cudaMemcpyToSymbol(dev_llr, p_llr, memorySize_llr_cuda) );
// cudaDeviceSynchronize();
int
compact_row
=
30
;
int
compact_col
=
19
;
if
(
BG
==
2
){
compact_row
=
10
,
compact_col
=
23
;}
int
memorySize_h_compact1
=
row
*
compact_col
*
sizeof
(
h_element
);
int
memorySize_h_compact2
=
compact_row
*
col
*
sizeof
(
h_element
);
int
lift_index
=
0
;
short
lift_set
[][
9
]
=
{
{
2
,
4
,
8
,
16
,
32
,
64
,
128
,
256
},
{
3
,
6
,
12
,
24
,
48
,
96
,
192
,
384
},
{
5
,
10
,
20
,
40
,
80
,
160
,
320
},
{
7
,
14
,
28
,
56
,
112
,
224
},
{
9
,
18
,
36
,
72
,
144
,
288
},
{
11
,
22
,
44
,
88
,
176
,
352
},
{
13
,
26
,
52
,
104
,
208
},
{
15
,
30
,
60
,
120
,
240
},
{
0
}
};
for
(
int
i
=
0
;
lift_set
[
i
][
0
]
!=
0
;
i
++
){
for
(
int
j
=
0
;
lift_set
[
i
][
j
]
!=
0
;
j
++
){
if
(
Zc
==
lift_set
[
i
][
j
]){
lift_index
=
i
;
break
;
}
}
}
printf
(
"
\n
Zc = %d BG = %d
\n
"
,
Zc
,
BG
);
ocl
.
runtime
[
0
].
dev_h_compact1
=
clCreateBuffer
(
ocl
.
runtime
[
0
].
context
,
CL_MEM_READ_ONLY
|
CL_MEM_HOST_WRITE_ONLY
,
memorySize_h_compact1
,
NULL
,
(
cl_int
*
)
&
rt
);
AssertFatal
(
rt
==
CL_SUCCESS
,
"Error %d creating buffer dev_h_compact1 for platform %i
\n
"
,
(
int
)
rt
,
0
);
ocl
.
runtime
[
0
].
dev_h_compact2
=
clCreateBuffer
(
ocl
.
runtime
[
0
].
context
,
CL_MEM_READ_ONLY
|
CL_MEM_HOST_WRITE_ONLY
,
memorySize_h_compact2
,
NULL
,
(
cl_int
*
)
&
rt
);
AssertFatal
(
rt
==
CL_SUCCESS
,
"Error %d creating buffer dev_h_compact2 for platform %i
\n
"
,
(
int
)
rt
,
0
);
h_element
*
h1
;
h_element
*
h2
;
switch
(
lift_index
){
case
0
:
h1
=
host_h_compact1_I0
;
h2
=
host_h_compact2_I0
;
break
;
case
1
:
h1
=
host_h_compact1_I1
;
h2
=
host_h_compact2_I1
;
break
;
case
2
:
h1
=
host_h_compact1_I2
;
h2
=
host_h_compact2_I2
;
break
;
case
3
:
h1
=
host_h_compact1_I3
;
h2
=
host_h_compact2_I3
;
break
;
case
4
:
h1
=
host_h_compact1_I4
;
h2
=
host_h_compact2_I4
;
break
;
case
5
:
h1
=
host_h_compact1_I5
;
h2
=
host_h_compact2_I5
;
break
;
case
6
:
h1
=
host_h_compact1_I6
;
h2
=
host_h_compact2_I6
;
break
;
case
7
:
h1
=
host_h_compact1_I7
;
h2
=
host_h_compact2_I7
;
break
;
default:
AssertFatal
(
0
,
"Invalid lift_index value %i
\n
"
,
lift_index
);
break
;
}
rt
=
clEnqueueWriteBuffer
(
ocl
.
runtime
[
0
].
queue
[
0
],
ocl
.
runtime
[
0
].
dev_h_compact1
,
CL_TRUE
,
0
,
memorySize_h_compact1
,
h1
,
0
,
NULL
,
NULL
);
AssertFatal
(
rt
==
CL_SUCCESS
,
"Error %d moving h_compact1 memory to pltf %i dev %i
\n
"
,
(
int
)
rt
,
0
,
0
);
rt
=
clEnqueueWriteBuffer
(
ocl
.
runtime
[
0
].
queue
[
0
],
ocl
.
runtime
[
0
].
dev_h_compact2
,
CL_TRUE
,
0
,
memorySize_h_compact2
,
h2
,
0
,
NULL
,
NULL
);
AssertFatal
(
rt
==
CL_SUCCESS
,
"Error %d moving h_compact2 memory to pltf %i dev %i
\n
"
,
(
int
)
rt
,
0
,
0
);
// return 0;
}
void
cl_error_callback
(
const
char
*
errinfo
,
const
void
*
private_info
,
size_t
cb
,
void
*
user_data
)
{
oclruntime_t
*
runtime
=
(
oclruntime_t
*
)
user_data
;
...
...
@@ -244,7 +314,8 @@ int ldpc_autoinit(void) { // called by the library loader
void
nrLDPC_initcall
(
t_nrLDPC_dec_params
*
p_decParams
,
int8_t
*
p_llr
,
int8_t
*
p_out
)
{
set_compact_BG
(
p_decParams
->
Z
,
p_decParams
->
BG
);
// init_LLR_DMA(p_decParams, p_llr, p_out);
}
...
...
@@ -252,10 +323,9 @@ int32_t nrLDPC_decod(t_nrLDPC_dec_params* p_decParams, int8_t* p_llr, int8_t* p_
{
uint16_t
Zc
=
p_decParams
->
Z
;
uint8_t
BG
=
p_decParams
->
BG
;
uint8_t
numMaxIter
=
p_decParams
->
numMaxIter
;
//
uint8_t numMaxIter = p_decParams->numMaxIter;
int
block_length
=
p_decParams
->
block_length
;
e_nrLDPC_outMode
outMode
=
p_decParams
->
outMode
;
cudaError_t
cudaStatus
;
// e_nrLDPC_outMode outMode = p_decParams->outMode;
uint8_t
row
,
col
;
if
(
BG
==
1
){
row
=
46
;
...
...
@@ -267,7 +337,7 @@ int32_t nrLDPC_decod(t_nrLDPC_dec_params* p_decParams, int8_t* p_llr, int8_t* p_
}
// alloc memory
unsigned
char
*
hard_decision
=
(
unsigned
char
*
)
p_out
;
//
unsigned char *hard_decision = (unsigned char*)p_out;
// gpu
int
memorySize_llr
=
col
*
Zc
*
sizeof
(
char
)
*
MC
;
// cudaCheck( cudaMemcpyToSymbol(dev_const_llr, p_llr, memorySize_llr_cuda) );
...
...
@@ -279,7 +349,7 @@ int32_t nrLDPC_decod(t_nrLDPC_dec_params* p_decParams, int8_t* p_llr, int8_t* p_
memorySize_llr
,
p_llr
,
0
,
NULL
,
NULL
);
AssertFatal
(
rt
==
CL_SUCCESS
,
"Error %d moving p_llr data to read-write memory in pltf %i dev %i
\n
"
,
(
int
)
rt
,
0
,
0
);
// Define CUDA kernel dimension
int
blockSizeX
=
Zc
;
//
int blockSizeX = Zc;
// dim3 dimGridKernel1(row, MC, 1); // dim of the thread blocks
// dim3 dimBlockKernel1(blockSizeX, 1, 1);
...
...
@@ -288,25 +358,59 @@ int32_t nrLDPC_decod(t_nrLDPC_dec_params* p_decParams, int8_t* p_llr, int8_t* p_
// cudaDeviceSynchronize();
// lauch kernel
/*
size_t
global_item_sizek0
[
2
]
=
{
row
*
Zc
,
MC
};
// Process the entire lists
size_t
global_item_sizek1
[
2
]
=
{
col
*
Zc
,
MC
};
// Process the entire lists
size_t
local_item_sizek
[
2
]
=
{
128
,
1
};
// Divide work items into groups of 128
for
(
int
ii
=
0
;
ii
<
MAX_ITERATION
;
ii
++
){
// first kernel
if
(
ii
==
0
){
ldpc_cnp_kernel_1st_iter
<<<dimGridKernel1, dimBlockKernel1>>>
( BG, row, col, Zc);
// ldpc_cnp_kernel_1st_iter
// <<<dimGridKernel1, dimBlockKernel1>>>
// ( BG, row, col, Zc);
CLSETKERNELARG
(
ocl
.
runtime
[
0
].
kernels
[
0
].
cnp_kernel_1st
,
0
,
sizeof
(
cl_mem
),
(
void
*
)
&
(
ocl
.
runtime
[
0
].
dev_llr
));
CLSETKERNELARG
(
ocl
.
runtime
[
0
].
kernels
[
0
].
cnp_kernel_1st
,
1
,
sizeof
(
cl_mem
),
(
void
*
)
&
(
ocl
.
runtime
[
0
].
dev_dt
));
CLSETKERNELARG
(
ocl
.
runtime
[
0
].
kernels
[
0
].
cnp_kernel_1st
,
2
,
sizeof
(
cl_mem
),
(
void
*
)
&
(
ocl
.
runtime
[
0
].
dev_h_compact1
));
CLSETKERNELARG
(
ocl
.
runtime
[
0
].
kernels
[
0
].
cnp_kernel_1st
,
3
,
sizeof
(
int
),
(
void
*
)
&
(
BG
));
CLSETKERNELARG
(
ocl
.
runtime
[
0
].
kernels
[
0
].
cnp_kernel_1st
,
4
,
sizeof
(
int
),
(
void
*
)
&
(
row
));
CLSETKERNELARG
(
ocl
.
runtime
[
0
].
kernels
[
0
].
cnp_kernel_1st
,
5
,
sizeof
(
int
),
(
void
*
)
&
(
col
));
CLSETKERNELARG
(
ocl
.
runtime
[
0
].
kernels
[
0
].
cnp_kernel_1st
,
6
,
sizeof
(
int
),
(
void
*
)
&
(
Zc
));
rt
=
clEnqueueNDRangeKernel
(
ocl
.
runtime
[
0
].
queue
[
0
],
ocl
.
runtime
[
0
].
kernels
[
0
].
cnp_kernel_1st
,
2
,
NULL
,
global_item_sizek0
,
local_item_sizek
,
0
,
NULL
,
NULL
);
AssertFatal
(
rt
==
CL_SUCCESS
,
"Error %d enqueing cnp_kernel_1st
\n
"
,
(
int
)
rt
);
}
else
{
ldpc_cnp_kernel
<<<dimGridKernel1, dimBlockKernel1>>>
( BG, row, col, Zc);
// ldpc_cnp_kernel
// <<<dimGridKernel1, dimBlockKernel1>>>
// ( BG, row, col, Zc);
CLSETKERNELARG
(
ocl
.
runtime
[
0
].
kernels
[
0
].
cnp_kernel
,
0
,
sizeof
(
cl_mem
),
(
void
*
)
&
(
ocl
.
runtime
[
0
].
dev_llr
));
CLSETKERNELARG
(
ocl
.
runtime
[
0
].
kernels
[
0
].
cnp_kernel
,
1
,
sizeof
(
cl_mem
),
(
void
*
)
&
(
ocl
.
runtime
[
0
].
dev_dt
));
CLSETKERNELARG
(
ocl
.
runtime
[
0
].
kernels
[
0
].
cnp_kernel
,
2
,
sizeof
(
cl_mem
),
(
void
*
)
&
(
ocl
.
runtime
[
0
].
dev_h_compact1
));
CLSETKERNELARG
(
ocl
.
runtime
[
0
].
kernels
[
0
].
cnp_kernel
,
3
,
sizeof
(
int
),
(
void
*
)
&
(
BG
));
CLSETKERNELARG
(
ocl
.
runtime
[
0
].
kernels
[
0
].
cnp_kernel
,
4
,
sizeof
(
int
),
(
void
*
)
&
(
row
));
CLSETKERNELARG
(
ocl
.
runtime
[
0
].
kernels
[
0
].
cnp_kernel
,
5
,
sizeof
(
int
),
(
void
*
)
&
(
col
));
CLSETKERNELARG
(
ocl
.
runtime
[
0
].
kernels
[
0
].
cnp_kernel
,
6
,
sizeof
(
int
),
(
void
*
)
&
(
Zc
));
rt
=
clEnqueueNDRangeKernel
(
ocl
.
runtime
[
0
].
queue
[
0
],
ocl
.
runtime
[
0
].
kernels
[
0
].
cnp_kernel
,
2
,
NULL
,
global_item_sizek0
,
local_item_sizek
,
0
,
NULL
,
NULL
);
AssertFatal
(
rt
==
CL_SUCCESS
,
"Error %d enqueing cnp_kernel
\n
"
,
(
int
)
rt
);
}
// second kernel
ldpc_vnp_kernel_normal
<<<dimGridKernel2, dimBlockKernel2>>>
// (dev_llr, dev_const_llr,BG, row, col, Zc);
(BG, row, col, Zc);
// ldpc_vnp_kernel_normal
// <<<dimGridKernel2, dimBlockKernel2>>>
// // (dev_llr, dev_const_llr,BG, row, col, Zc);
// (BG, row, col, Zc);
CLSETKERNELARG
(
ocl
.
runtime
[
0
].
kernels
[
0
].
vnp_kernel_normal
,
0
,
sizeof
(
cl_mem
),
(
void
*
)
&
(
ocl
.
runtime
[
0
].
dev_llr
));
CLSETKERNELARG
(
ocl
.
runtime
[
0
].
kernels
[
0
].
vnp_kernel_normal
,
1
,
sizeof
(
cl_mem
),
(
void
*
)
&
(
ocl
.
runtime
[
0
].
dev_dt
));
CLSETKERNELARG
(
ocl
.
runtime
[
0
].
kernels
[
0
].
vnp_kernel_normal
,
2
,
sizeof
(
cl_mem
),
(
void
*
)
&
(
ocl
.
runtime
[
0
].
dev_const_llr
));
CLSETKERNELARG
(
ocl
.
runtime
[
0
].
kernels
[
0
].
vnp_kernel_normal
,
3
,
sizeof
(
cl_mem
),
(
void
*
)
&
(
ocl
.
runtime
[
0
].
dev_h_compact2
));
CLSETKERNELARG
(
ocl
.
runtime
[
0
].
kernels
[
0
].
vnp_kernel_normal
,
4
,
sizeof
(
int
),
(
void
*
)
&
(
BG
));
CLSETKERNELARG
(
ocl
.
runtime
[
0
].
kernels
[
0
].
vnp_kernel_normal
,
5
,
sizeof
(
int
),
(
void
*
)
&
(
row
));
CLSETKERNELARG
(
ocl
.
runtime
[
0
].
kernels
[
0
].
vnp_kernel_normal
,
6
,
sizeof
(
int
),
(
void
*
)
&
(
col
));
CLSETKERNELARG
(
ocl
.
runtime
[
0
].
kernels
[
0
].
vnp_kernel_normal
,
7
,
sizeof
(
int
),
(
void
*
)
&
(
Zc
));
rt
=
clEnqueueNDRangeKernel
(
ocl
.
runtime
[
0
].
queue
[
0
],
ocl
.
runtime
[
0
].
kernels
[
0
].
vnp_kernel_normal
,
2
,
NULL
,
global_item_sizek1
,
local_item_sizek
,
0
,
NULL
,
NULL
);
AssertFatal
(
rt
==
CL_SUCCESS
,
"Error %d enqueing vnp_kernel_normal
\n
"
,
(
int
)
rt
);
}
*/
// int pack = (block_length/128)+1;
// dim3 pack_block(pack, MC, 1);
// pack_decoded_bit<<<pack_block,128>>>( col, Zc);
...
...
@@ -316,10 +420,11 @@ int32_t nrLDPC_decod(t_nrLDPC_dec_params* p_decParams, int8_t* p_llr, int8_t* p_
CLSETKERNELARG
(
ocl
.
runtime
[
0
].
kernels
[
0
].
pack_decoded_bit
,
3
,
sizeof
(
int
),
(
void
*
)
&
(
Zc
));
// Execute the OpenCL kernel on the list
size_t
global_item_size
=
block_length
;
// Process the entire lists
size_t
local_item_size
=
128
;
// Divide work items into groups of 128
size_t
global_item_size
[
2
]
=
{
block_length
,
MC
}
;
// Process the entire lists
size_t
local_item_size
[
2
]
=
{
128
,
1
}
;
// Divide work items into groups of 128
rt
=
clEnqueueNDRangeKernel
(
ocl
.
runtime
[
0
].
queue
[
0
],
ocl
.
runtime
[
0
].
kernels
[
0
].
pack_decoded_bit
,
2
,
NULL
,
&
global_item_size
,
&
local_item_size
,
0
,
NULL
,
NULL
);
global_item_size
,
local_item_size
,
0
,
NULL
,
NULL
);
AssertFatal
(
rt
==
CL_SUCCESS
,
"Error %d enqueing pack_decoded_bit
\n
"
,
(
int
)
rt
);
// cudaCheck( cudaMemcpyFromSymbol((void*)hard_decision, (const void*)dev_tmp, (block_length/8)*sizeof(unsigned char)) );
// cudaDeviceSynchronize();
...
...
openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder_kernels_CL.cl
View file @
8dd5346e
...
...
@@ -45,11 +45,11 @@ typedef struct{
//__global
char
dev_llr[68*384]
;
//__global
unsigned
char
dev_tmp[68*384]
;
__constant
h_element
h_compact1
[46*19]
=
{}
;
__constant
h_element
h_compact2
[68*30]
=
{}
;
__constant
h_element
dev_h_compact1[46*19]
=
{}
; // used in kernel 1
__constant
h_element
dev_h_compact2[68*30]
=
{}
; // used in kernel 2
//__constant
h_element
dev_h_compact1[46*19]
=
{}
; // used in kernel 1
//__constant
h_element
dev_h_compact2[68*30]
=
{}
; // used in kernel 2
//
__device__
__constantant__
h_element
dev_h_compact1[46*19]
; // used in kernel 1
//
__device__
__constantant__
h_element
dev_h_compact2[68*30]
; // used in kernel 2
...
...
@@ -86,7 +86,7 @@ __constant char h_ele_col_bg2_count[52] = {
//
Kernel
1
__kernel
void
ldpc_cnp_kernel_1st_iter
(
__
local
char
*
dev_llr,
__local
char
*
dev_dt
,
int
BG,
int
row,
int
col,
int
Zc
)
__kernel
void
ldpc_cnp_kernel_1st_iter
(
__
global
char
*
dev_llr,
__global
char
*
dev_dt,
__local
h_element
*dev_h_compact1
,
int
BG,
int
row,
int
col,
int
Zc
)
{
//
int
iMCW
=
blockIdx.y
; // codeword id
//
int
iBlkRow
=
blockIdx.x
; // block row in h_base
...
...
@@ -158,7 +158,7 @@ __kernel void ldpc_cnp_kernel_1st_iter( __local char * dev_llr, __local char * d
}
// Kernel_1
__kernel void ldpc_cnp_kernel( __
local char * dev_llr, __local char * dev_dt
, int BG, int row, int col, int Zc)
__kernel void ldpc_cnp_kernel( __
global char * dev_llr, __global char * dev_dt, __local h_element *dev_h_compact1
, int BG, int row, int col, int Zc)
{
// if(blockIdx.x == 0 && threadIdx.x == 1) printf("cnp\n");
// int iMCW = blockIdx.y;
...
...
@@ -234,8 +234,11 @@ __kernel void ldpc_cnp_kernel( __local char * dev_llr, __local char * dev_dt, in
//
Kernel
2:
VNP
processing
__kernel
void
ldpc_vnp_kernel_normal
(
__
local
char
*
dev_llr,
__local
char
*
dev_dt,
/*
char
*
dev_const_llr,*/
int
BG,
int
row,
int
col,
int
Zc
)
ldpc_vnp_kernel_normal
(
__
global
char
*
dev_llr,
__global
char
*
dev_dt,
__global
char
*
dev_const_llr,
__local
h_element
*dev_h_compact2,
int
BG,
int
row,
int
col,
int
Zc
)
{
//
int
iMCW
=
blockIdx.y
;
//
int
iBlkCol
=
blockIdx.x
;
//
int
iSubCol
=
threadIdx.x
;
int
iMCW
=
get_group_id
(
1
)
;
int
iBlkCol
=
get_group_id
(
0
)
;
int
iBlkRow
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment