Commit 82b32199 authored by ISIP CS/NCTU's avatar ISIP CS/NCTU

GPU part uploading

parent ae0494b0
#ifndef CUDA_debug
//#define CUDA_debug
#endif
#ifndef CUDA
#define CUDA
#endif
\ No newline at end of file
#ifndef __INIT__DEFS_CU__H__
#define __INIT__DEFS_CU__H__
#include <stdint.h>
#include <stdio.h>
#include "PHY/impl_defs_lte.h"
#include "PHY/defs.h"
#ifdef __cplusplus
extern "C"
#endif
void init_cuda( PHY_VARS_eNB *phy_vars_eNB, LTE_DL_FRAME_PARMS frame_parms );
#ifdef __cplusplus
extern "C"
#endif
void free_cufft();
#endif
This diff is collapsed.
#include <stdint.h>
#include <stdio.h>
#include "assertions.h"
#ifdef __cplusplus
extern "C"
#endif
void ulsch_extract_rb_and_compensation_cu( unsigned int first_rb,
unsigned int nb_rb,
unsigned short first_carrier_offset,
unsigned short number_symbols,
unsigned short sf);
#ifdef __cplusplus
extern "C"
#endif
void ulsch_channel_compensation_cu( short sf, short cyclic_shift, int *out, int *out2, int *u, int *v, int Msc_RS, short const_shift);
#ifdef __cplusplus
extern "C"
#endif
void ulsch_extract_rb_cu( unsigned int first_rb,
unsigned int nb_rb,
unsigned short number_symbols,
unsigned short sf);
#ifdef __cplusplus
extern "C"
#endif
void exrb_compen_esti_cu( unsigned int first_rb,
unsigned int nb_rb,
unsigned short number_symbols,
unsigned short sf);
#ifdef __cplusplus
extern "C"
#endif
void estimation_cu( unsigned int first_rb,
unsigned int nb_rb,
unsigned short number_symbols,
unsigned short sf);
#ifdef __cplusplus
extern "C"
#endif
void compensation_cu( unsigned int first_rb,
unsigned int nb_rb,
unsigned short number_symbols,
short Qm,
unsigned short sf);
#ifdef __cplusplus
extern "C"
#endif
void idft_cu(unsigned int first_rb,
unsigned int nb_rb,
unsigned short number_symbols,
short cl,
unsigned short sf
);
This diff is collapsed.
#ifndef __DEFS_CU__H__
#define __DEFS_CU__H__
#include <stdint.h>
#include <stdio.h>
#ifndef CUFFT_H
#define CUFFT_H
#include "cufft.h"
#endif
//typedef float2 Complex;
#ifdef __cplusplus
extern "C"
#endif
void idft512ad_cu( short *, short *, int );
#ifdef __cplusplus
extern "C"
#endif
void dft512rm_cu( short *, short *, int );
#endif
#include "stdio.h"
#include "cufft.h"
#include "defs.h"
#include "PHY/CUDA/extern.h"
typedef float2 Complex;
//for dftXXXrm
__global__ void k_rmcp(int16_t *x, Complex *y, int CP, int CP0)
{
int i= blockDim.x * blockIdx.x+ threadIdx.x ;
int j= (blockDim.x+CP )* blockIdx.x+ threadIdx.x + CP0;
if (blockIdx.x > 6)
j = j + CP0-CP;
y[i].x = ( float )x[(j<<1)];
y[i].y = ( float )x[(j<<1)+1];
}
//for dftXXXrm
__global__ void k_short(Complex *x, short *y)
{
int i= blockDim.x * blockIdx.x+ threadIdx.x;
y[i<<1] = ( short )(x[i].x*0.04419417);//for divide sqrt(512)
y[(i<<1)+1]=( short )(x[i].y*0.04419417);
}
__global__ void k_adcp_extend( short *x, Complex *y )
{
int i= blockDim.x * blockIdx.x+ threadIdx.x;
y[i].x = ( float )x[ (i<<1) ];
y[i].y = ( float )x[ (i<<1)+ 1 ];
}
__global__ void k_test( Complex *x )
{
int tid = threadIdx.x;
int bid = blockIdx.x;
x[bid*blockDim.x+tid].x= tid*22;
x[bid*blockDim.x+tid].y= bid*22;
}
void idft512ad_cu( int16_t *x, int16_t *y, int sf )
{//dl_cu
int i;
// printf("[CUDA] IN idft, sf num = %2d\n",sf);
cudaMemcpyAsync( dl_cu[sf].d_txdata,
x,
sizeof(short)* 2 * dl_cu[sf].ifftsize* dl_cu[sf].symbols_per_tti,
cudaMemcpyHostToDevice,
dl_cu[sf].stream_dl );
k_adcp_extend<<< dl_cu[sf].symbols_per_tti, dl_cu[sf].ifftsize, 0, dl_cu[sf].stream_dl >>>
( dl_cu[sf].d_txdata,
dl_cu[sf].d_txdata_ifft );
cufftExecC2C( dl_cu[sf].ifft,
(cufftComplex *) dl_cu[sf].d_txdata_ifft,
(cufftComplex *) dl_cu[sf].d_txdata_ifft,
CUFFT_INVERSE);
k_short<<< dl_cu[sf].symbols_per_tti, dl_cu[sf].ifftsize, 0, dl_cu[sf].stream_dl >>>
( dl_cu[sf].d_txdata_ifft,
dl_cu[sf].d_txdata );
cudaMemcpyAsync( dl_cu[sf].h_txdata,
dl_cu[sf].d_txdata,
sizeof( short )* 2 * dl_cu[sf].ifftsize* dl_cu[sf].symbols_per_tti,
cudaMemcpyDeviceToHost,
dl_cu[sf].stream_dl);
int index = 0;
short *temp = dl_cu[sf].h_txdata;
cudaStreamSynchronize( dl_cu[sf].stream_dl );
for ( i = 0; i < dl_cu[sf].symbols_per_tti; i++ )
{
int cp = 0;
if( i == 0 || i == 7 )
cp = dl_cu[sf].CP0;
else
cp = dl_cu[sf].CP;
memcpy( &y[ index<<1 ], &temp[ (i+1)*dl_cu[sf].ifftsize*2-cp*2 ], cp*sizeof(short)*2 );
memcpy( &y[ (index+cp)<<1 ], &temp[ i*dl_cu[sf].ifftsize*2 ], dl_cu[sf].ifftsize*2*sizeof(short) );
index = index + cp + dl_cu[sf].ifftsize;
}
}
void dft512rm_cu( int16_t *x, int16_t *y, int sf )
{
//printf("enter DFT\n");
cudaEvent_t startEvent, stopEvent;
cudaEventCreate(&startEvent);
cudaEventCreate(&stopEvent);
cudaEventRecord(startEvent, 0);
cudaMemcpyAsync(ul_cu[sf].d_rxdata[0],
x,
sizeof(short)* ul_cu[sf].samples_per_tti*2,
cudaMemcpyHostToDevice,
ul_cu[sf].stream_ul );
k_rmcp<<< ul_cu[sf].symbols_per_tti,
ul_cu[sf].fftsize,
0,
ul_cu[sf].stream_ul>>>
((short*)ul_cu[sf].d_rxdata[0],
ul_cu[sf].d_rxdata_fft[0],
36,
40);
cufftExecC2C(ul_cu[sf].fft,
(cufftComplex *)ul_cu[sf].d_rxdata_fft[0],
(cufftComplex *)ul_cu[sf].d_rxdata_fft[0],
CUFFT_FORWARD);
k_short<<< ul_cu[sf].symbols_per_tti,
ul_cu[sf].fftsize,
0,
ul_cu[sf].stream_ul>>>
( ul_cu[sf].d_rxdata_fft[0],
(short *)ul_cu[sf].d_rxdataF[0]);
cudaStreamSynchronize( ul_cu[sf].stream_ul);
cudaMemcpyAsync(y,
ul_cu[sf].d_rxdataF[0],
sizeof(short)* ul_cu[sf].symbols_per_tti* 2* ul_cu[sf].fftsize,
cudaMemcpyDeviceToHost,
ul_cu[sf].stream_ul );
cudaStreamSynchronize( ul_cu[sf].stream_ul);
float time;
cudaEventRecord(stopEvent, 0);
cudaEventSynchronize(stopEvent);
cudaEventElapsedTime(&time, startEvent, stopEvent);
//printf("[GPU] end of DFT %f\n",time);
cudaEventDestroy(startEvent);
cudaEventDestroy(stopEvent);
}
#include "PHY/defs.h"
#include "PHY/extern.h"
#include "PHY/CUDA/LTE_TRANSPORT/defs.h"
void rx_ulsch_cu(PHY_VARS_eNB *phy_vars_eNB,
uint32_t sched_subframe,
uint8_t eNB_id, // this is the effective sector id
uint8_t UE_id,
LTE_eNB_ULSCH_t **ulsch,
uint8_t cooperation_flag);
\ No newline at end of file
#include <stdint.h>
#include <stdio.h>
#ifndef CUFFT_H
#define CUFFT_H
#include "cufft.h"
#endif
typedef struct {
float2 *d_ul_ref_sigs_rx[30][2][33];
} estimation_const_t;
typedef struct {
short u1;
short v1;
short u2;
short v2;
short Msc_RS_idx;
short cyclic_shift1;
short cyclic_shift2;
short Msc_RS;
} para_ulsch;
typedef struct {
unsigned int first_rb;
unsigned short first_carrier_offset;
short N_RB_UL;
unsigned short nb_rb1;
unsigned short nb_rb2;
short fftsize;
} ext_rbs;
typedef struct {
cufftHandle fft12;
cufftHandle fft24;
cufftHandle fft36;
cufftHandle fft48;
cufftHandle fft60;
cufftHandle fft72;
cufftHandle fft84;
cufftHandle fft96;
cufftHandle fft108;
cufftHandle fft120;
cufftHandle fft132;
cufftHandle fft144;
cufftHandle fft156;
cufftHandle fft168;
cufftHandle fft180;
cufftHandle fft192;
cufftHandle fft204;
cufftHandle fft216;
cufftHandle fft228;
cufftHandle fft240;
cufftHandle fft252;
cufftHandle fft264;
cufftHandle fft276;
cufftHandle fft288;
cufftHandle fft300;
} fftHandle;
typedef struct {
cudaStream_t stream_ul;
cudaStream_t timing_advance;
cudaStream_t tempstrm;
cufftHandle fft;
cufftHandle ifft_timing_advance;
fftHandle idft;
int **d_rxdata;
float2 **d_rxdata_fft;
int **d_rxdataF;
int **d_rxdata_comp_int;
float2 **d_rxdata_comp;
float2 **d_drs_ch;
int **d_drs_ch_int;
int **d_ulsch_power;
float2 **d_rxdata_ext;
int **d_rxdata_ext_int;
short N_RB_UL;
short nb_antennas_rx;
short symbols_per_tti;
short samples_per_tti;
short Ncp;
short fftsize;
short CP;
short CP0;
} ul_cu_t;
typedef struct {
cudaStream_t stream_dl;
cufftHandle ifft;
short *d_txdata;
short *d_txdata_o;
float2 *d_txdata_ifft;
short *h_txdata;
short symbols_per_tti;
short samples_per_tti;
short Ncp;
short ifftsize;
short CP;
short CP0;
} dl_cu_t;
\ No newline at end of file
#ifndef __MODULATION_EXTERN_CU_H__
#define __MODULATION_EXTERN_CU_H_
#include "defs_struct.h"
#include <stdint.h>
#include <stdio.h>
#ifndef CUFFT_H
#define CUFFT_H
#include "cufft.h"
#endif
extern dl_cu_t dl_cu[10];
extern ul_cu_t ul_cu[10];
extern estimation_const_t esti_const;
extern int device_count;
extern para_ulsch ulsch_para[10];
extern ext_rbs ext_rbs_para[10];
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment