Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
O
OpenXG-RAN
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
canghaiwuhen
OpenXG-RAN
Commits
10d52466
Commit
10d52466
authored
Aug 13, 2020
by
NCTU OpinConnect Terng-Yin Hsu/WEI-YING,LIN
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fix CUDA diff with develop
parent
96023eeb
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
4 additions
and
223 deletions
+4
-223
cmake_targets/build_oai
cmake_targets/build_oai
+2
-2
openair1/PHY/CODING/TESTBENCH/ldpctest.c
openair1/PHY/CODING/TESTBENCH/ldpctest.c
+0
-8
openair1/PHY/CODING/nrLDPC_decoder_LYC/nrLDPC_decoder_LYC.cu
openair1/PHY/CODING/nrLDPC_decoder_LYC/nrLDPC_decoder_LYC.cu
+0
-210
openair1/PHY/CODING/nrLDPC_decoder_LYC/nrLDPC_decoder_LYC.h
openair1/PHY/CODING/nrLDPC_decoder_LYC/nrLDPC_decoder_LYC.h
+2
-3
No files found.
cmake_targets/build_oai
View file @
10d52466
...
@@ -701,8 +701,8 @@ function main() {
...
@@ -701,8 +701,8 @@ function main() {
if
[
"
$SIMUS_PHY
"
=
"1"
]
;
then
if
[
"
$SIMUS_PHY
"
=
"1"
]
;
then
echo_info
"Compiling physical unitary tests simulators"
echo_info
"Compiling physical unitary tests simulators"
# TODO: fix: dlsim_tm4 pucchsim prachsim pdcchsim pbchsim mbmssim
# TODO: fix: dlsim_tm4 pucchsim prachsim pdcchsim pbchsim mbmssim
simlist
=
"dlsim ulsim ldpctest polartest smallblocktest nr_pbchsim nr_dlschsim nr_ulschsim nr_dlsim nr_ulsim nr_pucchsim nr_prachsim"
#
simlist="dlsim ulsim ldpctest polartest smallblocktest nr_pbchsim nr_dlschsim nr_ulschsim nr_dlsim nr_ulsim nr_pucchsim nr_prachsim"
#
simlist="ldpctest"
simlist
=
"ldpctest"
for
f
in
$simlist
;
do
for
f
in
$simlist
;
do
compilations
\
compilations
\
phy_simulators
$f
\
phy_simulators
$f
\
...
...
openair1/PHY/CODING/TESTBENCH/ldpctest.c
View file @
10d52466
...
@@ -402,17 +402,9 @@ int test_ldpc(short No_iteration,
...
@@ -402,17 +402,9 @@ int test_ldpc(short No_iteration,
start_meas
(
time_decoder
);
start_meas
(
time_decoder
);
#ifdef CUDA_FLAG
#ifdef CUDA_FLAG
if
(
run_cuda
){
if
(
run_cuda
){
<<<<<<<
HEAD
n_iter
=
nrLDPC_decoder_LYC
(
&
decParams
,
(
int8_t
*
)
channel_output_fixed
[
j
],
(
int8_t
*
)
estimated_output
[
j
],
block_length
,
time_decoder
);
n_iter
=
nrLDPC_decoder_LYC
(
&
decParams
,
(
int8_t
*
)
channel_output_fixed
[
j
],
(
int8_t
*
)
estimated_output
[
j
],
block_length
,
time_decoder
);
}
}
else
{
else
{
=======
printf
(
"***********run ldpc by cuda
\n
"
);
n_iter
=
nrLDPC_decoder_LYC
(
&
decParams
,
(
int8_t
*
)
channel_output_fixed
[
j
],
(
int8_t
*
)
estimated_output
[
j
],
block_length
,
time_decoder
);
}
else
{
printf
(
"**************run ldpc by cpu
\n
"
);
>>>>>>>
origin
/
develop
// decode the sequence
// decode the sequence
// decoder supports BG2, Z=128 & 256
// decoder supports BG2, Z=128 & 256
//esimated_output=ldpc_decoder(channel_output_fixed, block_length, No_iteration, (double)((float)nom_rate/(float)denom_rate));
//esimated_output=ldpc_decoder(channel_output_fixed, block_length, No_iteration, (double)((float)nom_rate/(float)denom_rate));
...
...
openair1/PHY/CODING/nrLDPC_decoder_LYC/nrLDPC_decoder_LYC.cu
View file @
10d52466
...
@@ -32,11 +32,7 @@
...
@@ -32,11 +32,7 @@
#include "bgs/BG2_I6"
#include "bgs/BG2_I6"
#include "bgs/BG2_I7"
#include "bgs/BG2_I7"
<<<<<<<
HEAD
#define MAX_ITERATION 2
#define MAX_ITERATION 2
=======
#define MAX_ITERATION 5
>>>>>>>
origin
/
develop
#define MC 1
#define MC 1
#define cudaCheck(ans) { cudaAssert((ans), __FILE__, __LINE__); }
#define cudaCheck(ans) { cudaAssert((ans), __FILE__, __LINE__); }
...
@@ -53,30 +49,21 @@ typedef struct{
...
@@ -53,30 +49,21 @@ typedef struct{
char
y
;
char
y
;
short
value
;
short
value
;
}
h_element
;
}
h_element
;
<<<<<<<
HEAD
#include "bgs/BG1_compact_in_C.h"
#include "bgs/BG1_compact_in_C.h"
__device__
char
dev_const_llr
[
68
*
384
];
__device__
char
dev_const_llr
[
68
*
384
];
__device__
char
dev_dt
[
46
*
68
*
384
];
__device__
char
dev_dt
[
46
*
68
*
384
];
__device__
char
dev_llr
[
68
*
384
];
__device__
char
dev_llr
[
68
*
384
];
__device__
unsigned
char
dev_tmp
[
68
*
384
];
__device__
unsigned
char
dev_tmp
[
68
*
384
];
=======
>>>>>>>
origin
/
develop
h_element
h_compact1
[
46
*
19
]
=
{};
h_element
h_compact1
[
46
*
19
]
=
{};
h_element
h_compact2
[
68
*
30
]
=
{};
h_element
h_compact2
[
68
*
30
]
=
{};
<<<<<<<
HEAD
__device__
h_element
dev_h_compact1
[
46
*
19
];
// used in kernel 1
__device__
h_element
dev_h_compact1
[
46
*
19
];
// used in kernel 1
__device__
h_element
dev_h_compact2
[
68
*
30
];
// used in kernel 2
__device__
h_element
dev_h_compact2
[
68
*
30
];
// used in kernel 2
// __device__ __constant__ h_element dev_h_compact1[46*19]; // used in kernel 1
// __device__ __constant__ h_element dev_h_compact1[46*19]; // used in kernel 1
// __device__ __constant__ h_element dev_h_compact2[68*30]; // used in kernel 2
// __device__ __constant__ h_element dev_h_compact2[68*30]; // used in kernel 2
=======
__device__
__constant__
h_element
dev_h_compact1
[
46
*
19
];
// used in kernel 1
__device__
__constant__
h_element
dev_h_compact2
[
68
*
30
];
// used in kernel 2
>>>>>>>
origin
/
develop
// row and col element count
// row and col element count
__device__
__constant__
char
h_ele_row_bg1_count
[
46
]
=
{
__device__
__constant__
char
h_ele_row_bg1_count
[
46
]
=
{
...
@@ -113,7 +100,6 @@ __global__ void warmup()
...
@@ -113,7 +100,6 @@ __global__ void warmup()
// warm up gpu for time measurement
// warm up gpu for time measurement
}
}
<<<<<<<
HEAD
extern
"C"
extern
"C"
void
warmup_for_GPU
(){
void
warmup_for_GPU
(){
...
@@ -201,11 +187,6 @@ void set_compact_BG(int Zc,short BG){
...
@@ -201,11 +187,6 @@ void set_compact_BG(int Zc,short BG){
// Kernel 1
// Kernel 1
__global__
void
ldpc_cnp_kernel_1st_iter
(
/*char * dev_llr,*/
int
BG
,
int
row
,
int
col
,
int
Zc
)
__global__
void
ldpc_cnp_kernel_1st_iter
(
/*char * dev_llr,*/
int
BG
,
int
row
,
int
col
,
int
Zc
)
=======
// Kernel 1
__global__
void
ldpc_cnp_kernel_1st_iter
(
char
*
dev_llr
,
char
*
dev_dt
,
int
BG
,
int
row
,
int
col
,
int
Zc
)
>>>>>>>
origin
/
develop
{
{
// if(blockIdx.x == 0 && threadIdx.x == 1) printf("cnp %d\n", threadIdx.x);
// if(blockIdx.x == 0 && threadIdx.x == 1) printf("cnp %d\n", threadIdx.x);
int
iMCW
=
blockIdx
.
y
;
// codeword id
int
iMCW
=
blockIdx
.
y
;
// codeword id
...
@@ -264,11 +245,7 @@ __global__ void ldpc_cnp_kernel_1st_iter(char * dev_llr, char * dev_dt, int BG,
...
@@ -264,11 +245,7 @@ __global__ void ldpc_cnp_kernel_1st_iter(char * dev_llr, char * dev_dt, int BG,
for
(
int
i
=
0
;
i
<
s
;
i
++
){
for
(
int
i
=
0
;
i
<
s
;
i
++
){
// v0: Best performance so far. 0.75f is the value of alpha.
// v0: Best performance so far. 0.75f is the value of alpha.
sq
=
1
-
2
*
((
Q_sign
>>
i
)
&
0x01
);
sq
=
1
-
2
*
((
Q_sign
>>
i
)
&
0x01
);
<<<<<<<
HEAD
R_temp
=
0.75
f
*
sign
*
sq
*
(
i
!=
idx_min
?
rmin1
:
rmin2
);
R_temp
=
0.75
f
*
sign
*
sq
*
(
i
!=
idx_min
?
rmin1
:
rmin2
);
=======
R_temp
=
0.8
*
sign
*
sq
*
(
i
!=
idx_min
?
rmin1
:
rmin2
);
>>>>>>>
origin
/
develop
// write results to global memory
// write results to global memory
h_element_t
=
dev_h_compact1
[
i
*
row
+
iBlkRow
];
h_element_t
=
dev_h_compact1
[
i
*
row
+
iBlkRow
];
int
addr_temp
=
offsetR
+
h_element_t
.
y
*
row
*
Zc
;
int
addr_temp
=
offsetR
+
h_element_t
.
y
*
row
*
Zc
;
...
@@ -278,11 +255,7 @@ __global__ void ldpc_cnp_kernel_1st_iter(char * dev_llr, char * dev_dt, int BG,
...
@@ -278,11 +255,7 @@ __global__ void ldpc_cnp_kernel_1st_iter(char * dev_llr, char * dev_dt, int BG,
}
}
// Kernel_1
// Kernel_1
<<<<<<<
HEAD
__global__
void
ldpc_cnp_kernel
(
/*char * dev_llr, char * dev_dt,*/
int
BG
,
int
row
,
int
col
,
int
Zc
)
__global__
void
ldpc_cnp_kernel
(
/*char * dev_llr, char * dev_dt,*/
int
BG
,
int
row
,
int
col
,
int
Zc
)
=======
__global__
void
ldpc_cnp_kernel
(
char
*
dev_llr
,
char
*
dev_dt
,
int
BG
,
int
row
,
int
col
,
int
Zc
)
>>>>>>>
origin
/
develop
{
{
// if(blockIdx.x == 0 && threadIdx.x == 1) printf("cnp\n");
// if(blockIdx.x == 0 && threadIdx.x == 1) printf("cnp\n");
int
iMCW
=
blockIdx
.
y
;
int
iMCW
=
blockIdx
.
y
;
...
@@ -342,11 +315,7 @@ __global__ void ldpc_cnp_kernel(char * dev_llr, char * dev_dt, int BG, int row,
...
@@ -342,11 +315,7 @@ __global__ void ldpc_cnp_kernel(char * dev_llr, char * dev_dt, int BG, int row,
// The 2nd recursion
// The 2nd recursion
for
(
int
i
=
0
;
i
<
s
;
i
++
){
for
(
int
i
=
0
;
i
<
s
;
i
++
){
sq
=
1
-
2
*
((
Q_sign
>>
i
)
&
0x01
);
sq
=
1
-
2
*
((
Q_sign
>>
i
)
&
0x01
);
<<<<<<<
HEAD
R_temp
=
0.75
f
*
sign
*
sq
*
(
i
!=
idx_min
?
rmin1
:
rmin2
);
R_temp
=
0.75
f
*
sign
*
sq
*
(
i
!=
idx_min
?
rmin1
:
rmin2
);
=======
R_temp
=
0.8
*
sign
*
sq
*
(
i
!=
idx_min
?
rmin1
:
rmin2
);
>>>>>>>
origin
/
develop
// write results to global memory
// write results to global memory
...
@@ -359,11 +328,7 @@ __global__ void ldpc_cnp_kernel(char * dev_llr, char * dev_dt, int BG, int row,
...
@@ -359,11 +328,7 @@ __global__ void ldpc_cnp_kernel(char * dev_llr, char * dev_dt, int BG, int row,
// Kernel 2: VNP processing
// Kernel 2: VNP processing
__global__
void
__global__
void
<<<<<<<
HEAD
ldpc_vnp_kernel_normal
(
/*char * dev_llr, char * dev_dt, char * dev_const_llr,*/
int
BG
,
int
row
,
int
col
,
int
Zc
)
ldpc_vnp_kernel_normal
(
/*char * dev_llr, char * dev_dt, char * dev_const_llr,*/
int
BG
,
int
row
,
int
col
,
int
Zc
)
=======
ldpc_vnp_kernel_normal
(
char
*
dev_llr
,
char
*
dev_dt
,
char
*
dev_const_llr
,
int
BG
,
int
row
,
int
col
,
int
Zc
)
>>>>>>>
origin
/
develop
{
{
int
iMCW
=
blockIdx
.
y
;
int
iMCW
=
blockIdx
.
y
;
int
iBlkCol
=
blockIdx
.
x
;
int
iBlkCol
=
blockIdx
.
x
;
...
@@ -403,11 +368,7 @@ ldpc_vnp_kernel_normal(char * dev_llr, char * dev_dt, char * dev_const_llr, int
...
@@ -403,11 +368,7 @@ ldpc_vnp_kernel_normal(char * dev_llr, char * dev_dt, char * dev_const_llr, int
}
}
<<<<<<<
HEAD
__global__
void
pack_decoded_bit
(
/*char *dev, unsigned char *host,*/
int
col
,
int
Zc
)
__global__
void
pack_decoded_bit
(
/*char *dev, unsigned char *host,*/
int
col
,
int
Zc
)
=======
__global__
void
pack_decoded_bit
(
char
*
dev
,
unsigned
char
*
host
,
int
col
,
int
Zc
)
>>>>>>>
origin
/
develop
{
{
__shared__
unsigned
char
tmp
[
128
];
__shared__
unsigned
char
tmp
[
128
];
int
iMCW
=
blockIdx
.
y
;
int
iMCW
=
blockIdx
.
y
;
...
@@ -415,25 +376,15 @@ __global__ void pack_decoded_bit(char *dev, unsigned char *host, int col, int Zc
...
@@ -415,25 +376,15 @@ __global__ void pack_decoded_bit(char *dev, unsigned char *host, int col, int Zc
int
btid
=
threadIdx
.
x
;
int
btid
=
threadIdx
.
x
;
tmp
[
btid
]
=
0
;
tmp
[
btid
]
=
0
;
<<<<<<<
HEAD
if
(
dev_llr
[
tid
]
<
0
){
if
(
dev_llr
[
tid
]
<
0
){
=======
if
(
dev
[
tid
]
<
0
){
>>>>>>>
origin
/
develop
tmp
[
btid
]
=
1
<<
(
7
-
(
btid
&
7
));
tmp
[
btid
]
=
1
<<
(
7
-
(
btid
&
7
));
}
}
__syncthreads
();
__syncthreads
();
if
(
threadIdx
.
x
<
16
){
if
(
threadIdx
.
x
<
16
){
<<<<<<<
HEAD
dev_tmp
[
iMCW
*
col
*
Zc
+
blockIdx
.
x
*
16
+
threadIdx
.
x
]
=
0
;
dev_tmp
[
iMCW
*
col
*
Zc
+
blockIdx
.
x
*
16
+
threadIdx
.
x
]
=
0
;
for
(
int
i
=
0
;
i
<
8
;
i
++
){
for
(
int
i
=
0
;
i
<
8
;
i
++
){
dev_tmp
[
iMCW
*
col
*
Zc
+
blockIdx
.
x
*
16
+
threadIdx
.
x
]
+=
tmp
[
threadIdx
.
x
*
8
+
i
];
dev_tmp
[
iMCW
*
col
*
Zc
+
blockIdx
.
x
*
16
+
threadIdx
.
x
]
+=
tmp
[
threadIdx
.
x
*
8
+
i
];
=======
host
[
iMCW
*
col
*
Zc
+
blockIdx
.
x
*
16
+
threadIdx
.
x
]
=
0
;
for
(
int
i
=
0
;
i
<
8
;
i
++
){
host
[
iMCW
*
col
*
Zc
+
blockIdx
.
x
*
16
+
threadIdx
.
x
]
+=
tmp
[
threadIdx
.
x
*
8
+
i
];
>>>>>>>
origin
/
develop
}
}
}
}
}
}
...
@@ -510,7 +461,6 @@ void read_BG(int BG, int *h, int row, int col)
...
@@ -510,7 +461,6 @@ void read_BG(int BG, int *h, int row, int col)
*/
*/
}
}
<<<<<<<
HEAD
extern
"C"
extern
"C"
void
init_LLR_DMA_for_CUDA
(
t_nrLDPC_dec_params
*
p_decParams
,
int8_t
*
p_llr
,
int8_t
*
p_out
,
int
block_length
){
void
init_LLR_DMA_for_CUDA
(
t_nrLDPC_dec_params
*
p_decParams
,
int8_t
*
p_llr
,
int8_t
*
p_out
,
int
block_length
){
...
@@ -532,28 +482,17 @@ void init_LLR_DMA_for_CUDA(t_nrLDPC_dec_params* p_decParams, int8_t* p_llr, int8
...
@@ -532,28 +482,17 @@ void init_LLR_DMA_for_CUDA(t_nrLDPC_dec_params* p_decParams, int8_t* p_llr, int8
cudaDeviceSynchronize
();
cudaDeviceSynchronize
();
}
}
=======
>>>>>>>
origin
/
develop
extern
"C"
extern
"C"
int32_t
nrLDPC_decoder_LYC
(
t_nrLDPC_dec_params
*
p_decParams
,
int8_t
*
p_llr
,
int8_t
*
p_out
,
int
block_length
,
time_stats_t
*
time_decoder
)
int32_t
nrLDPC_decoder_LYC
(
t_nrLDPC_dec_params
*
p_decParams
,
int8_t
*
p_llr
,
int8_t
*
p_out
,
int
block_length
,
time_stats_t
*
time_decoder
)
{
{
<<<<<<<
HEAD
=======
// alloc mem
//unsigned char *decision = (unsigned char*)p_out;
>>>>>>>
origin
/
develop
uint16_t
Zc
=
p_decParams
->
Z
;
uint16_t
Zc
=
p_decParams
->
Z
;
uint8_t
BG
=
p_decParams
->
BG
;
uint8_t
BG
=
p_decParams
->
BG
;
uint8_t
numMaxIter
=
p_decParams
->
numMaxIter
;
uint8_t
numMaxIter
=
p_decParams
->
numMaxIter
;
e_nrLDPC_outMode
outMode
=
p_decParams
->
outMode
;
e_nrLDPC_outMode
outMode
=
p_decParams
->
outMode
;
<<<<<<<
HEAD
cudaError_t
cudaStatus
;
cudaError_t
cudaStatus
;
=======
>>>>>>>
origin
/
develop
uint8_t
row
,
col
;
uint8_t
row
,
col
;
if
(
BG
==
1
){
if
(
BG
==
1
){
row
=
46
;
row
=
46
;
...
@@ -563,7 +502,6 @@ int32_t nrLDPC_decoder_LYC(t_nrLDPC_dec_params* p_decParams, int8_t* p_llr, int8
...
@@ -563,7 +502,6 @@ int32_t nrLDPC_decoder_LYC(t_nrLDPC_dec_params* p_decParams, int8_t* p_llr, int8
row
=
42
;
row
=
42
;
col
=
52
;
col
=
52
;
}
}
<<<<<<<
HEAD
// alloc memory
// alloc memory
unsigned
char
*
hard_decision
=
(
unsigned
char
*
)
p_out
;
unsigned
char
*
hard_decision
=
(
unsigned
char
*
)
p_out
;
...
@@ -572,98 +510,6 @@ int32_t nrLDPC_decoder_LYC(t_nrLDPC_dec_params* p_decParams, int8_t* p_llr, int8
...
@@ -572,98 +510,6 @@ int32_t nrLDPC_decoder_LYC(t_nrLDPC_dec_params* p_decParams, int8_t* p_llr, int8
cudaCheck
(
cudaMemcpyToSymbol
(
dev_const_llr
,
p_llr
,
memorySize_llr_cuda
)
);
cudaCheck
(
cudaMemcpyToSymbol
(
dev_const_llr
,
p_llr
,
memorySize_llr_cuda
)
);
cudaCheck
(
cudaMemcpyToSymbol
(
dev_llr
,
p_llr
,
memorySize_llr_cuda
)
);
cudaCheck
(
cudaMemcpyToSymbol
(
dev_llr
,
p_llr
,
memorySize_llr_cuda
)
);
=======
int
compact_row
=
30
,
compact_col
=
19
,
lift_index
=
0
;;
if
(
BG
==
2
){
compact_row
=
10
,
compact_col
=
23
;}
short
lift_set
[][
9
]
=
{
{
2
,
4
,
8
,
16
,
32
,
64
,
128
,
256
},
{
3
,
6
,
12
,
24
,
48
,
96
,
192
,
384
},
{
5
,
10
,
20
,
40
,
80
,
160
,
320
},
{
7
,
14
,
28
,
56
,
112
,
224
},
{
9
,
18
,
36
,
72
,
144
,
288
},
{
11
,
22
,
44
,
88
,
176
,
352
},
{
13
,
26
,
52
,
104
,
208
},
{
15
,
30
,
60
,
120
,
240
},
{
0
}
};
for
(
int
i
=
0
;
lift_set
[
i
][
0
]
!=
0
;
i
++
){
for
(
int
j
=
0
;
lift_set
[
i
][
j
]
!=
0
;
j
++
){
if
(
Zc
==
lift_set
[
i
][
j
]){
lift_index
=
i
;
break
;
}
}
}
int
*
h
=
NULL
;
switch
(
lift_index
){
case
0
:
h
=
(
BG
==
1
)
?
h_base_0
:
h_base_8
;
break
;
case
1
:
h
=
(
BG
==
1
)
?
h_base_1
:
h_base_9
;
break
;
case
2
:
h
=
(
BG
==
1
)
?
h_base_2
:
h_base_10
;
break
;
case
3
:
h
=
(
BG
==
1
)
?
h_base_3
:
h_base_11
;
break
;
case
4
:
h
=
(
BG
==
1
)
?
h_base_4
:
h_base_12
;
break
;
case
5
:
h
=
(
BG
==
1
)
?
h_base_5
:
h_base_13
;
break
;
case
6
:
h
=
(
BG
==
1
)
?
h_base_6
:
h_base_14
;
break
;
case
7
:
h
=
(
BG
==
1
)
?
h_base_7
:
h_base_15
;
break
;
}
/* pack BG in compact graph */
read_BG
(
BG
,
h
,
row
,
col
);
int
memorySize_h_compact1
=
row
*
compact_col
*
sizeof
(
h_element
);
int
memorySize_h_compact2
=
compact_row
*
col
*
sizeof
(
h_element
);
// cpu
int
memorySize_hard_decision
=
col
*
Zc
*
sizeof
(
unsigned
char
)
*
MC
;
// alloc memory
unsigned
char
*
hard_decision
=
(
unsigned
char
*
)
p_out
;
// gpu
int
memorySize_llr_cuda
=
col
*
Zc
*
sizeof
(
char
)
*
MC
;
int
memorySize_dt_cuda
=
row
*
Zc
*
col
*
sizeof
(
char
)
*
MC
;
// alloc memory
char
*
dev_llr
;
char
*
dev_dt
;
char
*
dev_const_llr
;
unsigned
char
*
dev_tmp
;
cudaCheck
(
cudaMalloc
((
void
**
)
&
dev_tmp
,
memorySize_hard_decision
)
);
cudaCheck
(
cudaMalloc
((
void
**
)
&
dev_llr
,
memorySize_llr_cuda
)
);
cudaCheck
(
cudaMalloc
((
void
**
)
&
dev_const_llr
,
memorySize_llr_cuda
)
);
cudaCheck
(
cudaMalloc
((
void
**
)
&
dev_dt
,
memorySize_dt_cuda
)
);
// memcpy host to device
cudaCheck
(
cudaMemcpyToSymbol
(
dev_h_compact1
,
h_compact1
,
memorySize_h_compact1
)
);
cudaCheck
(
cudaMemcpyToSymbol
(
dev_h_compact2
,
h_compact2
,
memorySize_h_compact2
)
);
cudaCheck
(
cudaMemcpy
((
void
*
)
dev_const_llr
,
p_llr
,
memorySize_llr_cuda
,
cudaMemcpyHostToDevice
)
);
start_meas
(
time_decoder
);
cudaCheck
(
cudaMemcpy
((
void
*
)
dev_llr
,
p_llr
,
memorySize_llr_cuda
,
cudaMemcpyHostToDevice
)
);
>>>>>>>
origin
/
develop
// Define CUDA kernel dimension
// Define CUDA kernel dimension
int
blockSizeX
=
Zc
;
int
blockSizeX
=
Zc
;
dim3
dimGridKernel1
(
row
,
MC
,
1
);
// dim of the thread blocks
dim3
dimGridKernel1
(
row
,
MC
,
1
);
// dim of the thread blocks
...
@@ -672,33 +518,14 @@ start_meas(time_decoder);
...
@@ -672,33 +518,14 @@ start_meas(time_decoder);
dim3
dimGridKernel2
(
col
,
MC
,
1
);
dim3
dimGridKernel2
(
col
,
MC
,
1
);
dim3
dimBlockKernel2
(
blockSizeX
,
1
,
1
);
dim3
dimBlockKernel2
(
blockSizeX
,
1
,
1
);
cudaDeviceSynchronize
();
cudaDeviceSynchronize
();
<<<<<<<
HEAD
// lauch kernel
// lauch kernel
=======
cudaEvent_t
start
,
end
;
float
time
;
warmup
<<<
dimGridKernel1
,
dimBlockKernel1
>>>
();
warmup
<<<
dimGridKernel2
,
dimBlockKernel2
>>>
();
cudaEventCreate
(
&
start
);
cudaEventCreate
(
&
end
);
cudaEventRecord
(
start
,
0
);
// cudaProfilerStart();
// lauch kernel
>>>>>>>
origin
/
develop
for
(
int
ii
=
0
;
ii
<
MAX_ITERATION
;
ii
++
){
for
(
int
ii
=
0
;
ii
<
MAX_ITERATION
;
ii
++
){
// first kernel
// first kernel
if
(
ii
==
0
){
if
(
ii
==
0
){
ldpc_cnp_kernel_1st_iter
ldpc_cnp_kernel_1st_iter
<<<
dimGridKernel1
,
dimBlockKernel1
>>>
<<<
dimGridKernel1
,
dimBlockKernel1
>>>
<<<<<<<
HEAD
(
/*dev_llr,*/
BG
,
row
,
col
,
Zc
);
(
/*dev_llr,*/
BG
,
row
,
col
,
Zc
);
}
else
{
}
else
{
ldpc_cnp_kernel
ldpc_cnp_kernel
...
@@ -720,43 +547,6 @@ start_meas(time_decoder);
...
@@ -720,43 +547,6 @@ start_meas(time_decoder);
cudaDeviceSynchronize
();
cudaDeviceSynchronize
();
=======
(
dev_llr
,
dev_dt
,
BG
,
row
,
col
,
Zc
);
}
else
{
ldpc_cnp_kernel
<<<
dimGridKernel1
,
dimBlockKernel1
>>>
(
dev_llr
,
dev_dt
,
BG
,
row
,
col
,
Zc
);
}
// second kernel
ldpc_vnp_kernel_normal
<<<
dimGridKernel2
,
dimBlockKernel2
>>>
(
dev_llr
,
dev_dt
,
dev_const_llr
,
BG
,
row
,
col
,
Zc
);
}
int
pack
=
(
block_length
/
128
)
+
1
;
dim3
pack_block
(
pack
,
MC
,
1
);
pack_decoded_bit
<<<
pack_block
,
128
>>>
(
dev_llr
,
dev_tmp
,
col
,
Zc
);
cudaEventRecord
(
end
,
0
);
cudaEventSynchronize
(
end
);
cudaEventElapsedTime
(
&
time
,
start
,
end
);
//cudaCheck( cudaMemcpy((*)hard_decision, (const void*)dev_tmp, memorySize_hard_decision, cudaMemcpyDeviceToHost) );
cudaCheck
(
cudaMemcpy
((
void
*
)
hard_decision
,
(
const
void
*
)
dev_tmp
,
(
block_length
/
8
)
*
sizeof
(
unsigned
char
),
cudaMemcpyDeviceToHost
)
);
cudaDeviceSynchronize
();
stop_meas
(
time_decoder
);
cudaCheck
(
cudaFree
(
dev_llr
)
);
cudaCheck
(
cudaFree
(
dev_dt
)
);
cudaCheck
(
cudaFree
(
dev_const_llr
)
);
cudaCheck
(
cudaFree
(
dev_tmp
)
);
//free(hard_decision);
>>>>>>>
origin
/
develop
return
MAX_ITERATION
;
return
MAX_ITERATION
;
}
}
openair1/PHY/CODING/nrLDPC_decoder_LYC/nrLDPC_decoder_LYC.h
View file @
10d52466
...
@@ -23,13 +23,12 @@
...
@@ -23,13 +23,12 @@
int32_t
nrLDPC_decoder_LYC
(
t_nrLDPC_dec_params
*
p_decParams
,
int8_t
*
p_llr
,
int8_t
*
p_out
,
int
block_length
,
time_stats_t
*
time_decoder
);
int32_t
nrLDPC_decoder_LYC
(
t_nrLDPC_dec_params
*
p_decParams
,
int8_t
*
p_llr
,
int8_t
*
p_out
,
int
block_length
,
time_stats_t
*
time_decoder
);
<<<<<<<
HEAD
void
init_LLR_DMA_for_CUDA
(
t_nrLDPC_dec_params
*
p_decParams
,
int8_t
*
p_llr
,
int8_t
*
p_out
,
int
block_length
);
void
init_LLR_DMA_for_CUDA
(
t_nrLDPC_dec_params
*
p_decParams
,
int8_t
*
p_llr
,
int8_t
*
p_out
,
int
block_length
);
void
warmup_for_GPU
(
void
);
void
warmup_for_GPU
(
void
);
void
set_compact_BG
(
int
Zc
,
short
BG
);
void
set_compact_BG
(
int
Zc
,
short
BG
);
=======
>>>>>>>
origin
/
develop
#endif
#endif
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment