Commit 3586e83f authored by Raymond Knopp's avatar Raymond Knopp Committed by frtabu

start translating cuda ldpc sources to opencl

parent 4345d42e
......@@ -110,6 +110,23 @@ char *clutil_getstrdev(int intdev) {
return retstring;
}
void get_CompilErr(cl_program program, int pltf) {
// Determine the size of the log
size_t log_size;
for(int i=0; i<ocl.runtime[pltf].num_devices;i++) {
clGetProgramBuildInfo(program, ocl.runtime[pltf].devices[i], CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
// Allocate memory for the log
char *log = (char *) malloc(log_size);
// Get the log
clGetProgramBuildInfo(program, ocl.runtime[pltf].devices[i], CL_PROGRAM_BUILD_LOG, log_size, log, NULL);
// Print the log
printf("%s\n", log);
free(log);
}
}
size_t load_source(char **source_str) {
int MAX_SOURCE_SIZE=(500*132);
FILE *fp;
......@@ -177,7 +194,10 @@ int ldpc_autoinit(void) { // called by the library loader
cl_program program = clCreateProgramWithSource(ocl.runtime[i].context, 1,
(const char **)&source_str, (const size_t *)&source_size, (cl_int *)&rt);
AssertFatal(rt == CL_SUCCESS, "Error %d creating program for platform %i \n" , (int)rt, i);
rt = clBuildProgram(program, ocl.runtime[i].num_devices,ocl.runtime[i].devices, NULL, NULL, NULL);
rt = clBuildProgram(program, ocl.runtime[i].num_devices,ocl.runtime[i].devices, NULL, NULL, NULL);
if (rt == CL_BUILD_PROGRAM_FAILURE) {
get_CompilErr(program,i);
}
AssertFatal(rt == CL_SUCCESS, "Error %d buildding program for platform %i \n" , rt, i);
context_ok++;
}
......
define MAX_ITERATION 2
#define define MAX_ITERATION 2
#define MC 1
#define INT32_MAX 2147483647
typedef struct{
char x;
......@@ -10,28 +10,29 @@ typedef struct{
short value;
} h_element;
char dev_const_llr[68*384];
char dev_dt [46*68*384];
char dev_llr[68*384];
unsigned char dev_tmp[68*384];
__constant char dev_const_llr[68*384]= {};
//__global char dev_dt [46*68*384];
//__local char *dev_t;
//__global char dev_llr[68*384];
//__global unsigned char dev_tmp[68*384];
h_element h_compact1 [46*19] = {};
h_element h_compact2 [68*30] = {};
__constant h_element h_compact1 [46*19] = {};
__constant h_element h_compact2 [68*30] = {};
h_element dev_h_compact1[46*19]; // used in kernel 1
h_element dev_h_compact2[68*30]; // used in kernel 2
__constant h_element dev_h_compact1[46*19] = {}; // used in kernel 1
__constant h_element dev_h_compact2[68*30] = {}; // used in kernel 2
// __device__ __constant__ h_element dev_h_compact1[46*19]; // used in kernel 1
// __device__ __constant__ h_element dev_h_compact2[68*30]; // used in kernel 2
// __device__ __constantant__ h_element dev_h_compact1[46*19]; // used in kernel 1
// __device__ __constantant__ h_element dev_h_compact2[68*30]; // used in kernel 2
// row and col element count
const char h_ele_row_bg1_count[46] = {
__constant char h_ele_row_bg1_count[46] = {
19, 19, 19, 19, 3, 8, 9, 7, 10, 9,
7, 8, 7, 6, 7, 7, 6, 6, 6, 6,
6, 6, 5, 5, 6, 5, 5, 4, 5, 5,
5, 5, 5, 5, 5, 5, 5, 4, 5, 5,
4, 5, 4, 5, 5, 4};
const char h_ele_col_bg1_count[68] = {
__constant char h_ele_col_bg1_count[68] = {
30, 28, 7, 11, 9, 4, 8, 12, 8, 7,
12, 10, 12, 11, 10, 7, 10, 10, 13, 7,
8, 11, 12, 5, 6, 6, 1, 1, 1, 1,
......@@ -39,13 +40,13 @@ const char h_ele_col_bg1_count[68] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1};
const char h_ele_row_bg2_count[42] = {
__constant char h_ele_row_bg2_count[42] = {
8, 10, 8, 10, 4, 6, 6, 6, 4, 5,
5, 5, 4, 5, 5, 4, 5, 5, 4, 4,
4, 4, 3, 4, 4, 3, 5, 3, 4, 3,
5, 3, 4, 4, 4, 4, 4, 3, 4, 4,
4, 4};
const char h_ele_col_bg2_count[52] = {
__constant char h_ele_col_bg2_count[52] = {
22, 23, 10, 5, 5, 14, 7, 13, 6, 8,
9, 16, 9, 12, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
......@@ -56,13 +57,13 @@ const char h_ele_col_bg2_count[52] = {
// Kernel 1
__kernel void ldpc_cnp_kernel_1st_iter(/*char * dev_llr,*/ int BG, int row, int col, int Zc)
__kernel void ldpc_cnp_kernel_1st_iter( __local char * dev_llr, __local char * dev_dt, int BG, int row, int col, int Zc)
{
// if(blockIdx.x == 0 && threadIdx.x == 1) printf("cnp %d\n", threadIdx.x);
int iMCW = blockIdx.y; // codeword id
int iBlkRow = blockIdx.x; // block row in h_base
int iMCW = get_group_id(1); // codeword id
int iBlkRow = get_group_id(0); // block row in h_base
int iBlkCol; // block col in h_base
int iSubRow = threadIdx.x; // row index in sub_block of h_base
int iSubRow = get_local_id(0);; // row index in sub_block of h_base
int iCol; // overall col index in h_base
int offsetR;
int shift_t;
......@@ -125,13 +126,13 @@ __kernel void ldpc_cnp_kernel_1st_iter(/*char * dev_llr,*/ int BG, int row, int
}
// Kernel_1
__kernel void ldpc_cnp_kernel(/*char * dev_llr, char * dev_dt,*/ int BG, int row, int col, int Zc)
__kernel void ldpc_cnp_kernel( __local char * dev_llr, __local char * dev_dt, int BG, int row, int col, int Zc)
{
// if(blockIdx.x == 0 && threadIdx.x == 1) printf("cnp\n");
int iMCW = blockIdx.y;
int iBlkRow = blockIdx.x; // block row in h_base
int iMCW = get_group_id(1);
int iBlkRow = get_group_id(0); // block row in h_base
int iBlkCol; // block col in h_base
int iSubRow = threadIdx.x; // row index in sub_block of h_base
int iSubRow = get_local_id(0);; // row index in sub_block of h_base
int iCol; // overall col index in h_base
int offsetR;
int shift_t;
......@@ -198,12 +199,12 @@ __kernel void ldpc_cnp_kernel(/*char * dev_llr, char * dev_dt,*/ int BG, int row
// Kernel 2: VNP processing
__kernel void
ldpc_vnp_kernel_normal(/*char * dev_llr, char * dev_dt, char * dev_const_llr,*/ int BG, int row, int col, int Zc)
ldpc_vnp_kernel_normal(__local char * dev_llr, __local char * dev_dt, /* char * dev_const_llr,*/ int BG, int row, int col, int Zc)
{
int iMCW = blockIdx.y;
int iBlkCol = blockIdx.x;
int iMCW = get_group_id(1);
int iBlkCol = get_group_id(0);
int iBlkRow;
int iSubCol = threadIdx.x;
int iSubCol = get_local_id(0);
int iRow;
int iCol;
......@@ -238,23 +239,23 @@ ldpc_vnp_kernel_normal(/*char * dev_llr, char * dev_dt, char * dev_const_llr,*/
}
__kernel void pack_decoded_bit(/*char *dev, unsigned char *host,*/ int col, int Zc)
__kernel void pack_decoded_bit(__local unsigned char * dev_llr, __local unsigned char * dev_tmp, int col, int Zc)
{
__shared__ unsigned char tmp[128];
int iMCW = blockIdx.y;
int tid = iMCW * col*Zc + blockIdx.x*128 + threadIdx.x;
int btid = threadIdx.x;
__local unsigned char tmp[128];
int iMCW = get_group_id(1);
int btid = get_local_id(0);
int tid = iMCW * col*Zc + get_group_id(0)*128 + btid;
tmp[btid] = 0;
if(dev_llr[tid] < 0){
tmp[btid] = 1 << (7-(btid&7));
}
__syncthreads();
// __syncthreads();
if(threadIdx.x < 16){
dev_tmp[iMCW * col*Zc + blockIdx.x*16+threadIdx.x] = 0;
if(btid < 16){
dev_tmp[iMCW * col*Zc + get_group_id(0)*16+btid] = 0;
for(int i = 0; i < 8; i++){
dev_tmp[iMCW * col*Zc + blockIdx.x*16+threadIdx.x] += tmp[threadIdx.x*8+i];
dev_tmp[iMCW * col*Zc + get_group_id(0)*16+btid] += tmp[btid*8+i];
}
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment