Commit 3586e83f authored by Raymond Knopp's avatar Raymond Knopp Committed by frtabu

start translating cuda ldpc sources to opencl

parent 4345d42e
...@@ -110,6 +110,23 @@ char *clutil_getstrdev(int intdev) { ...@@ -110,6 +110,23 @@ char *clutil_getstrdev(int intdev) {
return retstring; return retstring;
} }
void get_CompilErr(cl_program program, int pltf) {
// Determine the size of the log
size_t log_size;
for(int i=0; i<ocl.runtime[pltf].num_devices;i++) {
clGetProgramBuildInfo(program, ocl.runtime[pltf].devices[i], CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
// Allocate memory for the log
char *log = (char *) malloc(log_size);
// Get the log
clGetProgramBuildInfo(program, ocl.runtime[pltf].devices[i], CL_PROGRAM_BUILD_LOG, log_size, log, NULL);
// Print the log
printf("%s\n", log);
free(log);
}
}
size_t load_source(char **source_str) { size_t load_source(char **source_str) {
int MAX_SOURCE_SIZE=(500*132); int MAX_SOURCE_SIZE=(500*132);
FILE *fp; FILE *fp;
...@@ -177,7 +194,10 @@ int ldpc_autoinit(void) { // called by the library loader ...@@ -177,7 +194,10 @@ int ldpc_autoinit(void) { // called by the library loader
cl_program program = clCreateProgramWithSource(ocl.runtime[i].context, 1, cl_program program = clCreateProgramWithSource(ocl.runtime[i].context, 1,
(const char **)&source_str, (const size_t *)&source_size, (cl_int *)&rt); (const char **)&source_str, (const size_t *)&source_size, (cl_int *)&rt);
AssertFatal(rt == CL_SUCCESS, "Error %d creating program for platform %i \n" , (int)rt, i); AssertFatal(rt == CL_SUCCESS, "Error %d creating program for platform %i \n" , (int)rt, i);
rt = clBuildProgram(program, ocl.runtime[i].num_devices,ocl.runtime[i].devices, NULL, NULL, NULL); rt = clBuildProgram(program, ocl.runtime[i].num_devices,ocl.runtime[i].devices, NULL, NULL, NULL);
if (rt == CL_BUILD_PROGRAM_FAILURE) {
get_CompilErr(program,i);
}
AssertFatal(rt == CL_SUCCESS, "Error %d buildding program for platform %i \n" , rt, i); AssertFatal(rt == CL_SUCCESS, "Error %d buildding program for platform %i \n" , rt, i);
context_ok++; context_ok++;
} }
......
define MAX_ITERATION 2 #define define MAX_ITERATION 2
#define MC 1 #define MC 1
#define INT32_MAX 2147483647
typedef struct{ typedef struct{
char x; char x;
...@@ -10,28 +10,29 @@ typedef struct{ ...@@ -10,28 +10,29 @@ typedef struct{
short value; short value;
} h_element; } h_element;
char dev_const_llr[68*384]; __constant char dev_const_llr[68*384]= {};
char dev_dt [46*68*384]; //__global char dev_dt [46*68*384];
char dev_llr[68*384]; //__local char *dev_t;
unsigned char dev_tmp[68*384]; //__global char dev_llr[68*384];
//__global unsigned char dev_tmp[68*384];
h_element h_compact1 [46*19] = {}; __constant h_element h_compact1 [46*19] = {};
h_element h_compact2 [68*30] = {}; __constant h_element h_compact2 [68*30] = {};
h_element dev_h_compact1[46*19]; // used in kernel 1 __constant h_element dev_h_compact1[46*19] = {}; // used in kernel 1
h_element dev_h_compact2[68*30]; // used in kernel 2 __constant h_element dev_h_compact2[68*30] = {}; // used in kernel 2
// __device__ __constant__ h_element dev_h_compact1[46*19]; // used in kernel 1 // __device__ __constantant__ h_element dev_h_compact1[46*19]; // used in kernel 1
// __device__ __constant__ h_element dev_h_compact2[68*30]; // used in kernel 2 // __device__ __constantant__ h_element dev_h_compact2[68*30]; // used in kernel 2
// row and col element count // row and col element count
const char h_ele_row_bg1_count[46] = { __constant char h_ele_row_bg1_count[46] = {
19, 19, 19, 19, 3, 8, 9, 7, 10, 9, 19, 19, 19, 19, 3, 8, 9, 7, 10, 9,
7, 8, 7, 6, 7, 7, 6, 6, 6, 6, 7, 8, 7, 6, 7, 7, 6, 6, 6, 6,
6, 6, 5, 5, 6, 5, 5, 4, 5, 5, 6, 6, 5, 5, 6, 5, 5, 4, 5, 5,
5, 5, 5, 5, 5, 5, 5, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 5, 5,
4, 5, 4, 5, 5, 4}; 4, 5, 4, 5, 5, 4};
const char h_ele_col_bg1_count[68] = { __constant char h_ele_col_bg1_count[68] = {
30, 28, 7, 11, 9, 4, 8, 12, 8, 7, 30, 28, 7, 11, 9, 4, 8, 12, 8, 7,
12, 10, 12, 11, 10, 7, 10, 10, 13, 7, 12, 10, 12, 11, 10, 7, 10, 10, 13, 7,
8, 11, 12, 5, 6, 6, 1, 1, 1, 1, 8, 11, 12, 5, 6, 6, 1, 1, 1, 1,
...@@ -39,13 +40,13 @@ const char h_ele_col_bg1_count[68] = { ...@@ -39,13 +40,13 @@ const char h_ele_col_bg1_count[68] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1}; 1, 1, 1, 1, 1, 1, 1, 1};
const char h_ele_row_bg2_count[42] = { __constant char h_ele_row_bg2_count[42] = {
8, 10, 8, 10, 4, 6, 6, 6, 4, 5, 8, 10, 8, 10, 4, 6, 6, 6, 4, 5,
5, 5, 4, 5, 5, 4, 5, 5, 4, 4, 5, 5, 4, 5, 5, 4, 5, 5, 4, 4,
4, 4, 3, 4, 4, 3, 5, 3, 4, 3, 4, 4, 3, 4, 4, 3, 5, 3, 4, 3,
5, 3, 4, 4, 4, 4, 4, 3, 4, 4, 5, 3, 4, 4, 4, 4, 4, 3, 4, 4,
4, 4}; 4, 4};
const char h_ele_col_bg2_count[52] = { __constant char h_ele_col_bg2_count[52] = {
22, 23, 10, 5, 5, 14, 7, 13, 6, 8, 22, 23, 10, 5, 5, 14, 7, 13, 6, 8,
9, 16, 9, 12, 1, 1, 1, 1, 1, 1, 9, 16, 9, 12, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
...@@ -56,13 +57,13 @@ const char h_ele_col_bg2_count[52] = { ...@@ -56,13 +57,13 @@ const char h_ele_col_bg2_count[52] = {
// Kernel 1 // Kernel 1
__kernel void ldpc_cnp_kernel_1st_iter(/*char * dev_llr,*/ int BG, int row, int col, int Zc) __kernel void ldpc_cnp_kernel_1st_iter( __local char * dev_llr, __local char * dev_dt, int BG, int row, int col, int Zc)
{ {
// if(blockIdx.x == 0 && threadIdx.x == 1) printf("cnp %d\n", threadIdx.x); // if(blockIdx.x == 0 && threadIdx.x == 1) printf("cnp %d\n", threadIdx.x);
int iMCW = blockIdx.y; // codeword id int iMCW = get_group_id(1); // codeword id
int iBlkRow = blockIdx.x; // block row in h_base int iBlkRow = get_group_id(0); // block row in h_base
int iBlkCol; // block col in h_base int iBlkCol; // block col in h_base
int iSubRow = threadIdx.x; // row index in sub_block of h_base int iSubRow = get_local_id(0);; // row index in sub_block of h_base
int iCol; // overall col index in h_base int iCol; // overall col index in h_base
int offsetR; int offsetR;
int shift_t; int shift_t;
...@@ -125,13 +126,13 @@ __kernel void ldpc_cnp_kernel_1st_iter(/*char * dev_llr,*/ int BG, int row, int ...@@ -125,13 +126,13 @@ __kernel void ldpc_cnp_kernel_1st_iter(/*char * dev_llr,*/ int BG, int row, int
} }
// Kernel_1 // Kernel_1
__kernel void ldpc_cnp_kernel(/*char * dev_llr, char * dev_dt,*/ int BG, int row, int col, int Zc) __kernel void ldpc_cnp_kernel( __local char * dev_llr, __local char * dev_dt, int BG, int row, int col, int Zc)
{ {
// if(blockIdx.x == 0 && threadIdx.x == 1) printf("cnp\n"); // if(blockIdx.x == 0 && threadIdx.x == 1) printf("cnp\n");
int iMCW = blockIdx.y; int iMCW = get_group_id(1);
int iBlkRow = blockIdx.x; // block row in h_base int iBlkRow = get_group_id(0); // block row in h_base
int iBlkCol; // block col in h_base int iBlkCol; // block col in h_base
int iSubRow = threadIdx.x; // row index in sub_block of h_base int iSubRow = get_local_id(0);; // row index in sub_block of h_base
int iCol; // overall col index in h_base int iCol; // overall col index in h_base
int offsetR; int offsetR;
int shift_t; int shift_t;
...@@ -198,12 +199,12 @@ __kernel void ldpc_cnp_kernel(/*char * dev_llr, char * dev_dt,*/ int BG, int row ...@@ -198,12 +199,12 @@ __kernel void ldpc_cnp_kernel(/*char * dev_llr, char * dev_dt,*/ int BG, int row
// Kernel 2: VNP processing // Kernel 2: VNP processing
__kernel void __kernel void
ldpc_vnp_kernel_normal(/*char * dev_llr, char * dev_dt, char * dev_const_llr,*/ int BG, int row, int col, int Zc) ldpc_vnp_kernel_normal(__local char * dev_llr, __local char * dev_dt, /* char * dev_const_llr,*/ int BG, int row, int col, int Zc)
{ {
int iMCW = blockIdx.y; int iMCW = get_group_id(1);
int iBlkCol = blockIdx.x; int iBlkCol = get_group_id(0);
int iBlkRow; int iBlkRow;
int iSubCol = threadIdx.x; int iSubCol = get_local_id(0);
int iRow; int iRow;
int iCol; int iCol;
...@@ -238,23 +239,23 @@ ldpc_vnp_kernel_normal(/*char * dev_llr, char * dev_dt, char * dev_const_llr,*/ ...@@ -238,23 +239,23 @@ ldpc_vnp_kernel_normal(/*char * dev_llr, char * dev_dt, char * dev_const_llr,*/
} }
__kernel void pack_decoded_bit(/*char *dev, unsigned char *host,*/ int col, int Zc) __kernel void pack_decoded_bit(__local unsigned char * dev_llr, __local unsigned char * dev_tmp, int col, int Zc)
{ {
__shared__ unsigned char tmp[128]; __local unsigned char tmp[128];
int iMCW = blockIdx.y; int iMCW = get_group_id(1);
int tid = iMCW * col*Zc + blockIdx.x*128 + threadIdx.x; int btid = get_local_id(0);
int btid = threadIdx.x; int tid = iMCW * col*Zc + get_group_id(0)*128 + btid;
tmp[btid] = 0; tmp[btid] = 0;
if(dev_llr[tid] < 0){ if(dev_llr[tid] < 0){
tmp[btid] = 1 << (7-(btid&7)); tmp[btid] = 1 << (7-(btid&7));
} }
__syncthreads(); // __syncthreads();
if(threadIdx.x < 16){ if(btid < 16){
dev_tmp[iMCW * col*Zc + blockIdx.x*16+threadIdx.x] = 0; dev_tmp[iMCW * col*Zc + get_group_id(0)*16+btid] = 0;
for(int i = 0; i < 8; i++){ for(int i = 0; i < 8; i++){
dev_tmp[iMCW * col*Zc + blockIdx.x*16+threadIdx.x] += tmp[threadIdx.x*8+i]; dev_tmp[iMCW * col*Zc + get_group_id(0)*16+btid] += tmp[btid*8+i];
} }
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment