Commit 4ce2a98a authored by frtabu's avatar frtabu

go on with openCL implementation

parent cccc7761
...@@ -40,6 +40,12 @@ ...@@ -40,6 +40,12 @@
#define MAX_OCLDEV 10 #define MAX_OCLDEV 10
#define MAX_OCLRUNTIME 5 #define MAX_OCLRUNTIME 5
#define CLSETKERNELARG(A,B,C,D) \
rt=clSetKernelArg(A,B,C,D) ;\
AssertFatal(rt == CL_SUCCESS, "Error %d setting kernel argument index %d\n" , (int)rt, B);
typedef struct{ typedef struct{
char x; char x;
char y; char y;
...@@ -304,10 +310,10 @@ int32_t nrLDPC_decod(t_nrLDPC_dec_params* p_decParams, int8_t* p_llr, int8_t* p_ ...@@ -304,10 +310,10 @@ int32_t nrLDPC_decod(t_nrLDPC_dec_params* p_decParams, int8_t* p_llr, int8_t* p_
// int pack = (block_length/128)+1; // int pack = (block_length/128)+1;
// dim3 pack_block(pack, MC, 1); // dim3 pack_block(pack, MC, 1);
// pack_decoded_bit<<<pack_block,128>>>( col, Zc); // pack_decoded_bit<<<pack_block,128>>>( col, Zc);
rt = clSetKernelArg(ocl.runtime[0].kernels[0].pack_decoded_bit, 0, sizeof(cl_mem), (void *)&(ocl.runtime[0].dev_llr)); CLSETKERNELARG(ocl.runtime[0].kernels[0].pack_decoded_bit, 0, sizeof(cl_mem), (void *)&(ocl.runtime[0].dev_llr));
rt = clSetKernelArg(ocl.runtime[0].kernels[0].pack_decoded_bit, 1, sizeof(cl_mem), (void *)&(ocl.runtime[0].dev_tmp)); CLSETKERNELARG(ocl.runtime[0].kernels[0].pack_decoded_bit, 1, sizeof(cl_mem), (void *)&(ocl.runtime[0].dev_tmp));
rt = clSetKernelArg(ocl.runtime[0].kernels[0].pack_decoded_bit, 2, sizeof(int), (void *)&(col)); CLSETKERNELARG(ocl.runtime[0].kernels[0].pack_decoded_bit, 2, sizeof(int), (void *)&(col));
rt = clSetKernelArg(ocl.runtime[0].kernels[0].pack_decoded_bit, 3, sizeof(int), (void *)&(Zc)); CLSETKERNELARG(ocl.runtime[0].kernels[0].pack_decoded_bit, 3, sizeof(int), (void *)&(Zc));
// Execute the OpenCL kernel on the list // Execute the OpenCL kernel on the list
size_t global_item_size = block_length; // Process the entire lists size_t global_item_size = block_length; // Process the entire lists
......
...@@ -88,6 +88,9 @@ __constant char h_ele_col_bg2_count[52] = { ...@@ -88,6 +88,9 @@ __constant char h_ele_col_bg2_count[52] = {
// Kernel 1 // Kernel 1
__kernel void ldpc_cnp_kernel_1st_iter( __local char * dev_llr, __local char * dev_dt, int BG, int row, int col, int Zc) __kernel void ldpc_cnp_kernel_1st_iter( __local char * dev_llr, __local char * dev_dt, int BG, int row, int col, int Zc)
{ {
// int iMCW = blockIdx.y; // codeword id
// int iBlkRow = blockIdx.x; // block row in h_base
// int iSubRow = threadIdx.x; // row index in sub_block of h_base
// if(blockIdx.x == 0 && threadIdx.x == 1) printf("cnp %d\n", threadIdx.x); // if(blockIdx.x == 0 && threadIdx.x == 1) printf("cnp %d\n", threadIdx.x);
int iMCW = get_group_id(1); // codeword id int iMCW = get_group_id(1); // codeword id
int iBlkRow = get_group_id(0); // block row in h_base int iBlkRow = get_group_id(0); // block row in h_base
...@@ -158,6 +161,9 @@ __kernel void ldpc_cnp_kernel_1st_iter( __local char * dev_llr, __local char * d ...@@ -158,6 +161,9 @@ __kernel void ldpc_cnp_kernel_1st_iter( __local char * dev_llr, __local char * d
__kernel void ldpc_cnp_kernel( __local char * dev_llr, __local char * dev_dt, int BG, int row, int col, int Zc) __kernel void ldpc_cnp_kernel( __local char * dev_llr, __local char * dev_dt, int BG, int row, int col, int Zc)
{ {
// if(blockIdx.x == 0 && threadIdx.x == 1) printf("cnp\n"); // if(blockIdx.x == 0 && threadIdx.x == 1) printf("cnp\n");
// int iMCW = blockIdx.y;
// int iBlkRow = blockIdx.x; // block row in h_base // block col in h_base
// int iSubRow = threadIdx.x; // row index in sub_block of h_base
int iMCW = get_group_id(1); int iMCW = get_group_id(1);
int iBlkRow = get_group_id(0); // block row in h_base int iBlkRow = get_group_id(0); // block row in h_base
int iBlkCol; // block col in h_base int iBlkCol; // block col in h_base
...@@ -270,6 +276,8 @@ ldpc_vnp_kernel_normal(__local char * dev_llr, __local char * dev_dt, /* char * ...@@ -270,6 +276,8 @@ ldpc_vnp_kernel_normal(__local char * dev_llr, __local char * dev_dt, /* char *
__kernel void pack_decoded_bit(__global unsigned char * dev_llr, __global unsigned char * dev_tmp, int col, int Zc) __kernel void pack_decoded_bit(__global unsigned char * dev_llr, __global unsigned char * dev_tmp, int col, int Zc)
{ {
// int iMCW = blockIdx.y;
// int btid = threadIdx.x;
unsigned char tmp[128]; unsigned char tmp[128];
int iMCW = get_group_id(1); int iMCW = get_group_id(1);
int btid = get_local_id(0); int btid = get_local_id(0);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment