Commit d16884d5 authored by Fang-WANG's avatar Fang-WANG

slot fft ok

parent 5b5b48d3
......@@ -764,7 +764,8 @@ function main() {
if [ "$SIMUS_PHY" = "1" ] ; then
echo_info "Compiling physical unitary tests simulators"
# TODO: fix: dlsim_tm4 pucchsim prachsim pdcchsim pbchsim mbmssim
simlist="dlsim ulsim ldpctest polartest smallblocktest nr_pbchsim nr_dlschsim nr_ulschsim nr_dlsim nr_ulsim nr_pucchsim nr_prachsim"
simlist="nr_ulsim"
# simlist="dlsim ulsim ldpctest polartest smallblocktest nr_pbchsim nr_dlschsim nr_ulschsim nr_dlsim nr_ulsim nr_pucchsim nr_prachsim"
# simlist="ldpctest"
for f in $simlist ; do
compilations \
......
......@@ -535,6 +535,7 @@ const char* eurecomFunctionsNames[] = {
"nr_rx_pusch",
"nr_ulsch_procedures_rx",
"macxface_gNB_dlsch_ulsch_scheduler",
"cufft_wait",
/*NR ue-softmodem signal*/
"nr_ue_ulsch_encoding",
......
......@@ -527,6 +527,7 @@ typedef enum {
VCD_SIGNAL_DUMPER_FUNCTIONS_NR_RX_PUSCH,
VCD_SIGNAL_DUMPER_FUNCTIONS_NR_ULSCH_PROCEDURES_RX,
VCD_SIGNAL_DUMPER_FUNCTIONS_gNB_DLSCH_ULSCH_SCHEDULER,
VCD_SIGNAL_DUMPER_FUNCTIONS_CUFFT_WAIT,
/* NR ue-softmodem signal*/
VCD_SIGNAL_DUMPER_FUNCTIONS_NR_UE_ULSCH_ENCODING,
......
......@@ -73,7 +73,7 @@ typedef struct {
} T_cache_t;
/* number of VCD functions (to be kept up to date! see in T_messages.txt) */
#define VCD_NUM_FUNCTIONS (273)
#define VCD_NUM_FUNCTIONS (274)
/* number of VCD variables (to be kept up to date! see in T_messages.txt) */
#define VCD_NUM_VARIABLES (187)
......
......@@ -3513,6 +3513,11 @@ ID = VCD_FUNCTION_gNB_DLSCH_ULSCH_SCHEDULER
GROUP = ALL:VCD:ENB:VCD_FUNCTION
FORMAT = int,value
VCD_NAME = macxface_gNB_dlsch_ulsch_scheduler
ID = VCD_FUNCTION_CUFFT_WAIT
DESC = VCD function CUFFT_WAIT
GROUP = ALL:VCD:ENB:VCD_FUNCTION
FORMAT = int,value
VCD_NAME = cufft_wait
#function for nrUE
ID = VCD_FUNCTION_NR_UE_ULSCH_ENCODING
......
......@@ -87,6 +87,7 @@ unsigned short config_frames[4] = {2,9,11,13};
#include "gnb_paramdef.h"
#include <openair3/ocp-gtpu/gtp_itf.h>
#include "nfapi/oai_integration/vendor_ext.h"
#include "PHY/CODING/nrLDPC_extern.h"
pthread_cond_t nfapi_sync_cond;
pthread_mutex_t nfapi_sync_mutex;
......@@ -705,6 +706,7 @@ int main( int argc, char **argv )
init_opt();
load_cuFFT1();
#ifdef PDCP_USE_NETLINK
......
cmake_minimum_required(VERSION 2.8)
project(run)
FIND_PACKAGE(CUDA REQUIRED)
# Pass options to NVCC
# 由于cuda采用NVCC编译而不是gCC编译,因此需要将参数传递给NVCC
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -lcufft)
# For compilation ...
# Specify target & source files to compile it from
CUDA_ADD_EXECUTABLE(run cuFFT2.cu)
# For linking ...
# Specify target & libraries to link it with
CUDA_ADD_CUFFT_TO_TARGET(run)
# 添加对gdb的支持
# SET(CMAKE_BUILD_TYPE "Debug")
# SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -Wall -g2 -ggdb")
# SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O3 -Wall")
#include <stdio.h>
#include <cufft.h>
#include <cuda_runtime.h>
#define LEN 2048
#define SQRT2048_real 45.2876
#define SQRT2048_imag 45.3065
#define SYMBOLS_PER_SLOT 1
__global__ void int_cufftComplex(int16_t *a, cufftComplex *b, int length)
{
int id = (blockIdx.x * blockDim.x + threadIdx.x);
if(id >=length)
{
return;
}
b[id].x = a[id*2];
b[id].y = a[id*2+1];
}
__global__ void cufftComplex_int(cufftComplex *a, int16_t *b, int length)
{
int id = (blockIdx.x * blockDim.x + threadIdx.x);
if(id >=length)
{
return;
}
b[id*2] = a[id].x/SQRT2048_real;
b[id*2+1] = a[id].y/SQRT2048_imag;
}
int16_t *x1;
cufftComplex *CompData;
cufftHandle plan;
void initcudft()
{
cudaMalloc((void**)&x1, SYMBOLS_PER_SLOT*LEN * sizeof(int32_t));
cudaMalloc((void**)&CompData, SYMBOLS_PER_SLOT*LEN * sizeof(cufftComplex));
cufftPlan1d(&plan, LEN, CUFFT_C2C, 1);
// int rank=1;
// int n[1]; n[0]=LEN;
// int nembed[2]; nembed[0]=LEN; nembed[1]=SYMBOLS_PER_SLOT;
// int stride=1;
// int dist = LEN;
// int batch=SYMBOLS_PER_SLOT;
// cufftPlanMany(&plan,rank,n,nembed, stride ,dist , nembed, stride,dist, CUFFT_C2C, batch);
}
void cudft2048(int16_t *x,int16_t *y,unsigned char scale)
{
cudaMemcpy(x1, x, SYMBOLS_PER_SLOT*LEN * sizeof(int32_t), cudaMemcpyHostToDevice);
int threadNum = 512;
int blockNum = (SYMBOLS_PER_SLOT * LEN - 0.5) / threadNum + 1;
int_cufftComplex<<<blockNum, threadNum>>>(x1, CompData, SYMBOLS_PER_SLOT*LEN);
cufftExecC2C(plan, (cufftComplex*)CompData, (cufftComplex*)CompData, CUFFT_FORWARD);//execute
cudaDeviceSynchronize();//wait to be done
cufftComplex_int<<<blockNum, threadNum>>>(CompData, x1, SYMBOLS_PER_SLOT*LEN);
cudaMemcpy(y, x1, SYMBOLS_PER_SLOT*LEN * sizeof(int32_t), cudaMemcpyDeviceToHost);// copy the result from device to host
static int hshs=0;
printf("------------%d\n",hshs);
hshs++;
}
void load_cuFFT(void)
{
initcudft();
int16_t *a = (int16_t *)malloc(SYMBOLS_PER_SLOT*LEN * sizeof(int32_t));
int16_t *b = (int16_t *)malloc(SYMBOLS_PER_SLOT*LEN * sizeof(int32_t));
cudft2048(a,b,1);
}
int main()
{
load_cuFFT();
int16_t *a = (int16_t *)malloc(SYMBOLS_PER_SLOT*LEN * sizeof(int32_t));
int16_t *b = (int16_t *)malloc(SYMBOLS_PER_SLOT*LEN * sizeof(int32_t));
for (int i = 0; i < SYMBOLS_PER_SLOT*LEN; i++)
{
*(a+2*i) = i;
*(a+2*i+1) = LEN-i;
}
for (int i = 0; i < 3; i++)
{
cudft2048((int16_t *)a,(int16_t *)b,0);
printf("hs1111111111111111:\n");
for (int j = 0; j < SYMBOLS_PER_SLOT*LEN; j++)
{
printf("a=%d + %dj\tb=%d + %dj\n", a[j*2],a[j*2+1],b[j*2],b[j*2+1]);
}
}
}
\ No newline at end of file
File added
File added
#include <stdio.h>
#include <cufft.h>
#include <cuda_runtime.h>
#define LEN 2048
#define SQRT2048_real 45.2876
#define SQRT2048_imag 45.3065
#define SYMBOLS_PER_SLOT 1400
__global__ void int_cufftComplex(int16_t *a, cufftComplex *b, int length)
{
int id = (blockIdx.x * blockDim.x + threadIdx.x);
if(id >=length)
{
return;
}
b[id].x = a[id*2];
b[id].y = a[id*2+1];
}
__global__ void cufftComplex_int(cufftComplex *a, int16_t *b, int length)
{
int id = (blockIdx.x * blockDim.x + threadIdx.x);
if(id >=length)
{
return;
}
b[id*2] = a[id].x/SQRT2048_real;
b[id*2+1] = a[id].y/SQRT2048_imag;
}
int16_t *x11;
cufftComplex *CompData1;
cufftHandle plan1;
void initcudft()
{
cudaMalloc((void**)&x11, SYMBOLS_PER_SLOT*LEN * sizeof(int32_t));
cudaMalloc((void**)&CompData1, SYMBOLS_PER_SLOT*LEN * sizeof(cufftComplex));
// cufftPlan1d(&plan1, LEN, CUFFT_C2C, SYMBOLS_PER_SLOT);
int rank=1;
int n[1]; n[0]=LEN;
int nembed[2]; nembed[0]=LEN; nembed[1]=SYMBOLS_PER_SLOT;
int stride=1;
int dist = LEN;
int batch=SYMBOLS_PER_SLOT;
cufftPlanMany(&plan1,rank,n,nembed, stride ,dist , nembed, stride,dist, CUFFT_C2C, batch);
}
void cudft2048(int16_t *x,int16_t *y,unsigned char scale)
{
// cudaStream_t stream;
// cudaStreamCreate(&stream);
cudaEvent_t start, stop;
float time;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaMemcpy(x11, x, SYMBOLS_PER_SLOT*LEN * sizeof(int32_t), cudaMemcpyHostToDevice);
int threadNum = 512;
int blockNum = (SYMBOLS_PER_SLOT * LEN - 1) / threadNum + 1;
cudaEventRecord( start, 0 );
int_cufftComplex<<<blockNum, threadNum>>>(x11, CompData1, SYMBOLS_PER_SLOT*LEN);
cudaEventRecord( stop, 0 );
cudaEventSynchronize(start);
cudaEventSynchronize( stop );//注意函数所处位置
cufftExecC2C(plan1, (cufftComplex*)CompData1, (cufftComplex*)CompData1, CUFFT_FORWARD);//execute
cudaDeviceSynchronize();//wait to be done
cufftComplex_int<<<blockNum, threadNum>>>(CompData1, x11, SYMBOLS_PER_SLOT*LEN);
cudaMemcpy(y, x11, SYMBOLS_PER_SLOT*LEN * sizeof(int32_t), cudaMemcpyDeviceToHost);// copy the result from device to host
cudaEventElapsedTime( &time, start, stop );
printf("cudft2048执行时间:%f(us)\n",time*1000);
}
void load_cuFFT(void)
{
initcudft();
int16_t *a = (int16_t *)malloc(SYMBOLS_PER_SLOT*LEN * sizeof(int32_t));
int16_t *b = (int16_t *)malloc(SYMBOLS_PER_SLOT*LEN * sizeof(int32_t));
cudft2048(a,b,1);
}
int main()
{
load_cuFFT();
int16_t *a = (int16_t *)malloc(SYMBOLS_PER_SLOT*LEN * sizeof(int32_t));
int16_t *b = (int16_t *)malloc(SYMBOLS_PER_SLOT*LEN * sizeof(int32_t));
// int16_t *a;
// int16_t *b;
// cudaHostAlloc((void **)&a, SYMBOLS_PER_SLOT*LEN * sizeof(int32_t), cudaHostAllocDefault);
// cudaHostAlloc((void **)&b, SYMBOLS_PER_SLOT*LEN * sizeof(int32_t), cudaHostAllocDefault);
for (int i = 0; i < SYMBOLS_PER_SLOT*LEN; i++)
{
*(a+2*i) = i;
*(a+2*i+1) = LEN-i;
}
for (int i = 0; i < 10; i++)
{
cudft2048((int16_t *)a,(int16_t *)b,0);
// printf("hs1111111111111111:\n");
// for (int j = 0; j < SYMBOLS_PER_SLOT*LEN; j++)
// {
// printf("a=%d + %dj\tb=%d + %dj\n", a[j*2],a[j*2+1],b[j*2],b[j*2+1]);
// }
}
}
#include <stdio.h>
#include <cufft.h>
#include <cuda_runtime.h>
#define LEN 2048
#define SQRT2048_real 45.2876
#define SQRT2048_imag 45.3065
#define SYMBOLS_PER_SLOT 14
__global__ void int_cufftComplex(int16_t *a, cufftComplex *b, int length)
{
int id = (blockIdx.x * blockDim.x + threadIdx.x);
if(id >=length)
{
return;
}
b[id].x = a[id*2];
b[id].y = a[id*2+1];
}
__global__ void cufftComplex_int(cufftComplex *a, int16_t *b, int length)
{
int id = (blockIdx.x * blockDim.x + threadIdx.x);
if(id >=length)
{
return;
}
b[id*2] = a[id].x/SQRT2048_real;
b[id*2+1] = a[id].y/SQRT2048_imag;
}
int16_t *cuda_x;
int16_t *cuda_y;
int16_t *x11;
cufftComplex *CompData1;
cufftHandle plan1;
void initcudft()
{
cudaMalloc((void**)&x11, SYMBOLS_PER_SLOT*LEN * sizeof(int32_t));
cudaMalloc((void**)&CompData1, SYMBOLS_PER_SLOT*LEN * sizeof(cufftComplex));
// cufftPlan1d(&plan1, LEN, CUFFT_C2C, SYMBOLS_PER_SLOT);
int rank=1;
int n[1]; n[0]=LEN;
int nembed[2]; nembed[0]=LEN; nembed[1]=SYMBOLS_PER_SLOT;
int stride=1;
int dist = LEN;
int batch=SYMBOLS_PER_SLOT;
cufftPlanMany(&plan1,rank,n,nembed, stride ,dist , nembed, stride,dist, CUFFT_C2C, batch);
cudaHostAlloc((void **)&cuda_x, SYMBOLS_PER_SLOT*LEN * sizeof(int32_t), cudaHostAllocDefault);
cudaHostAlloc((void **)&cuda_y, SYMBOLS_PER_SLOT*LEN * sizeof(int32_t), cudaHostAllocDefault);
}
void cudft2048(int16_t *x,int16_t *y,unsigned char scale)
{
// cudaStream_t stream;
// cudaStreamCreate(&stream);
cudaEvent_t start, stop;
float time;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord( start, 0 );
memcpy(cuda_x,x,SYMBOLS_PER_SLOT*LEN * sizeof(int32_t));
cudaMemcpy(x11, cuda_x, SYMBOLS_PER_SLOT*LEN * sizeof(int32_t), cudaMemcpyHostToDevice);
int threadNum = 512;
int blockNum = (SYMBOLS_PER_SLOT * LEN - 1) / threadNum + 1;
int_cufftComplex<<<blockNum, threadNum>>>(x11, CompData1, SYMBOLS_PER_SLOT*LEN);
cufftExecC2C(plan1, (cufftComplex*)CompData1, (cufftComplex*)CompData1, CUFFT_FORWARD);//execute
cudaDeviceSynchronize();//wait to be done
cufftComplex_int<<<blockNum, threadNum>>>(CompData1, x11, SYMBOLS_PER_SLOT*LEN);
cudaMemcpy(cuda_y, x11, SYMBOLS_PER_SLOT*LEN * sizeof(int32_t), cudaMemcpyDeviceToHost);// copy the result from device to host
memcpy(y,cuda_y,SYMBOLS_PER_SLOT*LEN * sizeof(int32_t));
cudaEventRecord( stop, 0 );
cudaEventSynchronize(start);
cudaEventSynchronize( stop );//注意函数所处位置
cudaEventElapsedTime( &time, start, stop );
printf("cudft2048执行时间:%f(us)\n",time*1000);
}
void load_cuFFT(void)
{
initcudft();
int16_t *a = (int16_t *)malloc(SYMBOLS_PER_SLOT*LEN * sizeof(int32_t));
int16_t *b = (int16_t *)malloc(SYMBOLS_PER_SLOT*LEN * sizeof(int32_t));
cudft2048(a,b,1);
}
int main()
{
load_cuFFT();
int16_t *a = (int16_t *)malloc(SYMBOLS_PER_SLOT*LEN * sizeof(int32_t));
int16_t *b = (int16_t *)malloc(SYMBOLS_PER_SLOT*LEN * sizeof(int32_t));
// int16_t *a;
// int16_t *b;
// cudaHostAlloc((void **)&a, SYMBOLS_PER_SLOT*LEN * sizeof(int32_t), cudaHostAllocDefault);
// cudaHostAlloc((void **)&b, SYMBOLS_PER_SLOT*LEN * sizeof(int32_t), cudaHostAllocDefault);
for (int i = 0; i < SYMBOLS_PER_SLOT*LEN; i++)
{
*(a+2*i) = rand()%LEN;
*(a+2*i+1) = rand()%LEN;
}
for (int i = 0; i < 10; i++)
{
cudft2048((int16_t *)a,(int16_t *)b,0);
// printf("hs1111111111111111:\n");
// for (int j = 0; j < SYMBOLS_PER_SLOT*LEN; j++)
// {
// printf("a=%d + %dj\tb=%d + %dj\n", a[j*2],a[j*2+1],b[j*2],b[j*2+1]);
// }
}
}
File added
File added
#include <stdio.h>
#include <cufft.h>
#include <cuda_runtime.h>
#define LEN 2048
#define SQRT2048_real 45.2876
#define SQRT2048_imag 45.3065
#define SYMBOLS_PER_SLOT 64
__global__ void int_cufftComplex(int16_t *a, cufftComplex *b, int length)
{
int id = (blockIdx.x * blockDim.x + threadIdx.x);
if(id >=length)
{
return;
}
b[id].x = a[id*2];
b[id].y = a[id*2+1];
}
__global__ void cufftComplex_int(cufftComplex *a, int16_t *b, int length)
{
int id = (blockIdx.x * blockDim.x + threadIdx.x);
if(id >=length)
{
return;
}
b[id*2] = a[id].x/SQRT2048_real;
b[id*2+1] = a[id].y/SQRT2048_imag;
}
// cufftComplex *fftData;
cufftComplex *d_fftData;
cufftHandle plan1;
// cufftComplex *CompData;
int16_t *temp;
int16_t *cuda_temp1;
// int16_t *cuda_temp;
void initcudft()
{
// CompData = (cufftComplex*)malloc(SYMBOLS_PER_SLOT*LEN * sizeof(cufftComplex));
int rank=1;
int n[1]; n[0]=LEN;
int nembed[2]; nembed[0]=LEN; nembed[1]=SYMBOLS_PER_SLOT;
int stride=1;
int dist = LEN;
int batch=SYMBOLS_PER_SLOT;
cufftPlanMany(&plan1,rank,n,nembed, stride ,dist , nembed, stride,dist, CUFFT_C2C, batch);
// cufftPlan1d(&plan1, LEN, CUFFT_C2C, SYMBOLS_PER_SLOT);
// cudaMallocHost((void **)&fftData, SYMBOLS_PER_SLOT*LEN * sizeof(cufftComplex), cudaHostAllocMapped);
// cudaHostGetDevicePointer ((void**)&d_fftData, (void*)fftData, 0 );
cudaMalloc((void**)&d_fftData, SYMBOLS_PER_SLOT*LEN * sizeof(cufftComplex));
cudaHostAlloc((void **)&temp, SYMBOLS_PER_SLOT*LEN * sizeof(int32_t), cudaHostAllocPortable);
cudaMalloc((void **)&cuda_temp1, SYMBOLS_PER_SLOT*LEN * sizeof(int32_t));
// cudaHostGetDevicePointer ((void**)&cuda_temp, (void*)temp, 0 );
}
void cudft2048(int16_t *x,int16_t *y,unsigned char scale)
{
// cudaEvent_t start, stop;
// float time;
// cudaEventCreate(&start);
// cudaEventCreate(&stop);
// cudaEventRecord( start, 0 );
// for (int i = 0; i < SYMBOLS_PER_SLOT*LEN; i++)
// {
// fftData[i].x = x[i*2];
// fftData[i].y = x[i*2+1];
// }
memcpy(temp,x,SYMBOLS_PER_SLOT*LEN * sizeof(int32_t));
int threadNum = 512;
int blockNum = (SYMBOLS_PER_SLOT * LEN - 1) / threadNum + 1;
int_cufftComplex<<<blockNum, threadNum>>>(temp, d_fftData, SYMBOLS_PER_SLOT*LEN);
// memcpy(fftData,CompData,SYMBOLS_PER_SLOT*LEN * sizeof(cufftComplex));
// cudaMemcpy(d_fftData, CompData, SYMBOLS_PER_SLOT*LEN * sizeof(cufftComplex), cudaMemcpyHostToDevice);
cufftExecC2C(plan1, (cufftComplex*)d_fftData, (cufftComplex*)d_fftData, CUFFT_FORWARD);//execute
// cudaDeviceSynchronize();//wait to be done
// memcpy(CompData,fftData,SYMBOLS_PER_SLOT*LEN * sizeof(cufftComplex));
// cudaMemcpy(CompData, d_fftData, SYMBOLS_PER_SLOT*LEN * sizeof(cufftComplex), cudaMemcpyDeviceToHost);
// for (int i = 0; i < SYMBOLS_PER_SLOT*LEN; i++)
// {
// y[i*2] = fftData[i].x/SQRT2048_real;
// y[i*2+1] = fftData[i].y/SQRT2048_imag;
// }
cufftComplex_int<<<blockNum, threadNum>>>(d_fftData, cuda_temp1, SYMBOLS_PER_SLOT*LEN);
cudaMemcpy(temp, cuda_temp1, SYMBOLS_PER_SLOT*LEN * sizeof(int32_t), cudaMemcpyDeviceToHost);
memcpy(y,temp,SYMBOLS_PER_SLOT*LEN * sizeof(int32_t));
// cudaEventRecord( stop, 0 );
// cudaEventSynchronize(start);
// cudaEventSynchronize( stop );//注意函数所处位置
// cudaEventElapsedTime( &time, start, stop );
// printf("cudft2048执行时间:%f(us)\n",time*1000);
// printf("----------------------------------\n");
}
void load_cuFFT(void)
{
initcudft();
int16_t *a = (int16_t *)malloc(SYMBOLS_PER_SLOT*LEN * sizeof(int32_t));
int16_t *b = (int16_t *)malloc(SYMBOLS_PER_SLOT*LEN * sizeof(int32_t));
cudft2048(a,b,1);
}
int main()
{
load_cuFFT();
int16_t *a = (int16_t *)malloc(SYMBOLS_PER_SLOT*LEN * sizeof(int32_t));
int16_t *b = (int16_t *)malloc(SYMBOLS_PER_SLOT*LEN * sizeof(int32_t));
// int16_t *a;
// int16_t *b;
// cudaHostAlloc((void **)&a, SYMBOLS_PER_SLOT*LEN * sizeof(int32_t), cudaHostAllocDefault);
// cudaHostAlloc((void **)&b, SYMBOLS_PER_SLOT*LEN * sizeof(int32_t), cudaHostAllocDefault);
for (int j = 0; j < 100; j++)
{
for (int i = 0; i < SYMBOLS_PER_SLOT*LEN; i++)
{
*(a+2*i) = rand()%LEN;
*(a+2*i+1) = rand()%LEN;
}
cudaEvent_t start, stop;
float time;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord( start, 0 );
cudft2048((int16_t *)a,(int16_t *)b,0);
cudaEventRecord( stop, 0 );
cudaEventSynchronize(start);
cudaEventSynchronize( stop );//注意函数所处位置
cudaEventElapsedTime( &time, start, stop );
printf("cudft2048执行时间:%f(us)\n",time*1000);
// printf("hs1111111111111111:\n");
// for (int j = 0; j < SYMBOLS_PER_SLOT*LEN; j++)
// {
// printf("a=%d + %dj\tb=%d + %dj\n", a[j*2],a[j*2+1],b[j*2],b[j*2+1]);
// }
}
}
File added
......@@ -56,4 +56,6 @@ typedef int(*nrLDPC_encoderfunc_t)(unsigned char **,unsigned char **,int,int,sho
\param p_profiler LDPC profiler statistics
*/
typedef int32_t(*nrLDPC_decoderfunc_t)(t_nrLDPC_dec_params* , int8_t*, int8_t* , t_nrLDPC_procBuf* , t_nrLDPC_time_stats* );
typedef void(*cudft_EnTx)(int16_t*, int16_t*, unsigned char);
typedef void(*cudft_load)(void);
#endif
\ No newline at end of file
......@@ -23,13 +23,23 @@
#ifdef LDPC_LOADER
nrLDPC_decoderfunc_t nrLDPC_decoder;
nrLDPC_encoderfunc_t nrLDPC_encoder;
cudft_EnTx cudft2048;
cudft_load load_cudft;
cudft_EnTx cudft20481;
cudft_load load_cudft1;
#else
/* functions to load the LDPC shared lib, implemented in openair1/PHY/CODING/nrLDPC_load.c */
extern int load_nrLDPClib(void) ;
extern int load_nrLDPClib_ref(char *libversion, nrLDPC_encoderfunc_t * nrLDPC_encoder_ptr); // for ldpctest
extern int load_cuFFT(void) ;
extern int load_cuFFT1(void) ;
/* ldpc coder/decoder functions, as loaded by load_nrLDPClib(). */
extern nrLDPC_decoderfunc_t nrLDPC_decoder;
extern nrLDPC_encoderfunc_t nrLDPC_encoder;
extern cudft_EnTx cudft2048;
extern cudft_load load_cudft;
extern cudft_EnTx cudft20481;
extern cudft_load load_cudft1;
// inline functions:
#include "openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_init_mem.h"
#endif
\ No newline at end of file
......@@ -39,6 +39,7 @@
#include "PHY/CODING/nrLDPC_extern.h"
#include "common/config/config_userapi.h"
#include "common/utils/load_module_shlib.h"
#include <dlfcn.h>
/* function description array, to be used when loading the encoding/decoding shared lib */
......@@ -73,4 +74,51 @@ int load_nrLDPClib_ref(char *libversion, nrLDPC_encoderfunc_t * nrLDPC_encoder_p
return 0;
}
int load_cuFFT(void) {
//手动加载指定位置的so动态库
void* handle = dlopen("../../../hs/cuFFT.so", RTLD_LAZY|RTLD_NODELETE|RTLD_GLOBAL);
if(!handle){
printf("open cuFFT.so error!\n");
return -1;
}
//根据动态链接库操作句柄与符号,返回符号对应的地址
cudft2048 = (cudft_EnTx) dlsym(handle, "_Z9cudft2048PsS_h");
if(!cudft2048){
printf("cuFFT.so cudft2048 error!\n");
dlclose(handle);
return -1;
}
load_cudft = (cudft_load) dlsym(handle, "_Z10load_cuFFTv");
if(!load_cudft){
printf("cuFFT.so load_cudft error!\n");
dlclose(handle);
return -1;
}
load_cudft();
return 0;
}
int load_cuFFT1(void) {
//手动加载指定位置的so动态库
void* handle1 = dlopen("../../../hs/cuFFT1.so", RTLD_LAZY|RTLD_NODELETE|RTLD_GLOBAL);
if(!handle1){
printf("open cuFFT1.so error!\n");
return -1;
}
//根据动态链接库操作句柄与符号,返回符号对应的地址
cudft20481 = (cudft_EnTx) dlsym(handle1, "_Z9cudft2048PsS_h");
if(!cudft20481){
printf("cuFFT1.so cudft2048 error!\n");
dlclose(handle1);
return -1;
}
load_cudft1 = (cudft_load) dlsym(handle1, "_Z10load_cuFFTv");
if(!load_cudft1){
printf("cuFFT1.so load_cudft error!\n");
dlclose(handle1);
return -1;
}
load_cudft1();
return 0;
}
......@@ -89,6 +89,13 @@ int nr_slot_fep_ul(NR_DL_FRAME_PARMS *frame_parms,
unsigned char Ns,
int sample_offset);
int cuda_nr_slot_fep_ul(NR_DL_FRAME_PARMS *frame_parms,
int32_t *rxdata,
int32_t *rxdataF,
int symbol_slot,
unsigned char Ns,
int sample_offset);
/*!
\brief This function implements the dft transform precoding in PUSCH
\param z Pointer to output in frequnecy domain
......
......@@ -26,6 +26,10 @@
#include "PHY/LTE_ESTIMATION/lte_estimation.h"
#include "PHY/NR_UE_ESTIMATION/nr_estimation.h"
#include <common/utils/LOG/log.h>
#include "PHY/CODING/nrLDPC_extern.h"
#include "PHY/CODING/nrLDPC_extern.h"
#include "common/utils/LOG/vcd_signal_dumper.h"
#include "common/utils/LOG/log.h"
//#define DEBUG_FEP
......@@ -332,10 +336,80 @@ int nr_slot_fep_ul(NR_DL_FRAME_PARMS *frame_parms,
rxdata_ptr,
(int16_t *)&rxdataF[symbol * frame_parms->ofdm_symbol_size],
1);
// cudft2048(rxdata_ptr,(int16_t *)&rxdataF[symbol * frame_parms->ofdm_symbol_size],0);
// clear DC carrier from OFDM symbols
rxdataF[symbol * frame_parms->ofdm_symbol_size] = 0;
// static int cu_2048 = 0;
// if(cu_2048==0)
// {
// LOG_M("./fft/FFT0.m","input",rxdata_ptr,frame_parms->ofdm_symbol_size,1,15);
// LOG_M("./fft/FFT1.m","fftoutput",(int16_t *)&rxdataF[symbol * frame_parms->ofdm_symbol_size],2048,1,15);
// }
// else if(cu_2048<13)
// {
// LOG_M("./fft/FFT0.m","input",rxdata_ptr,frame_parms->ofdm_symbol_size,1,13);
// LOG_M("./fft/FFT1.m","fftoutput",(int16_t *)&rxdataF[symbol * frame_parms->ofdm_symbol_size],2048,1,13);
// }
// else if(cu_2048 == 13)
// {
// LOG_M("./fft/FFT0.m","input",rxdata_ptr,frame_parms->ofdm_symbol_size,1,14);
// LOG_M("./fft/FFT1.m","fftoutput",(int16_t *)&rxdataF[symbol * frame_parms->ofdm_symbol_size],2048,1,14);
// }
// cu_2048++;
return 0;
}
int cuda_nr_slot_fep_ul(NR_DL_FRAME_PARMS *frame_parms,
int32_t *rxdata,
int32_t *rxdataF,
int symbol_slot,
unsigned char Ns,
int sample_offset)
{
unsigned int nb_prefix_samples = frame_parms->nb_prefix_samples;
unsigned int nb_prefix_samples0 = frame_parms->nb_prefix_samples0;
// dft_size_idx_t dftsize = get_dft_size_idx(frame_parms->ofdm_symbol_size);
// This is for misalignment issues
int32_t tmp_dft_in[frame_parms->ofdm_symbol_size*symbol_slot] __attribute__ ((aligned (32)));
unsigned int slot_offset = frame_parms->get_samples_slot_timestamp(Ns,frame_parms,0);
int16_t *rxdata_ptr;
for (int symbol = 0; symbol < symbol_slot; symbol++) {
// offset of first OFDM symbol
int32_t rxdata_offset = slot_offset + nb_prefix_samples0;
// offset of n-th OFDM symbol
rxdata_offset += symbol * (frame_parms->ofdm_symbol_size + nb_prefix_samples);
// use OFDM symbol from within 1/8th of the CP to avoid ISI
rxdata_offset -= nb_prefix_samples / 8;
// if input to dft is not 256-bit aligned
memcpy((void *)&tmp_dft_in[symbol*frame_parms->ofdm_symbol_size],
(void *)&rxdata[rxdata_offset - sample_offset],
(frame_parms->ofdm_symbol_size) * sizeof(int32_t));
}
rxdata_ptr = (int16_t *)tmp_dft_in;
VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_CUFFT_WAIT, 1 );
cudft20481(rxdata_ptr,(int16_t *)rxdataF,0);
VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_CUFFT_WAIT, 0 );
// clear DC carrier from OFDM symbols
for (int symbol = 0; symbol < symbol_slot; symbol++)
rxdataF[symbol * frame_parms->ofdm_symbol_size] = 0;
// static int cu_20481 = 0;
// if(cu_20481==0)
// {
// LOG_M("./fft/cuFFT0.m","input",rxdata_ptr,frame_parms->ofdm_symbol_size*symbol_slot,1,1);
// LOG_M("./fft/cuFFT1.m","cufftoutput",(int16_t *)&rxdataF[0],frame_parms->ofdm_symbol_size*symbol_slot,1,1);
// }
// cu_20481++;
return 0;
}
......
......@@ -660,15 +660,24 @@ void nr_fep_full(RU_t *ru, int slot) {
// remove_7_5_kHz(ru,proc->tti_rx<<1);
// remove_7_5_kHz(ru,1+(proc->tti_rx<<1));
for (l = 0; l < fp->symbols_per_slot; l++) {
for (aa = 0; aa < fp->nb_antennas_rx; aa++) {
nr_slot_fep_ul(fp,
ru->common.rxdata[aa],
ru->common.rxdataF[aa],
l,
proc->tti_rx,
ru->N_TA_offset);
}
// for (l = 0; l < fp->symbols_per_slot; l++) {
// for (aa = 0; aa < fp->nb_antennas_rx; aa++) {
// nr_slot_fep_ul(fp,
// ru->common.rxdata[aa],
// ru->common.rxdataF[aa],
// l,
// proc->tti_rx,
// ru->N_TA_offset);
// }
// }
for (aa = 0; aa < fp->nb_antennas_rx; aa++) {
cuda_nr_slot_fep_ul(fp,
ru->common.rxdata[aa],
ru->common.rxdataF[aa],
fp->symbols_per_slot,
proc->tti_rx,
ru->N_TA_offset);
}
if (ru->idx == 0) VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME( VCD_SIGNAL_DUMPER_FUNCTIONS_PHY_PROCEDURES_RU_FEPRX, 0 );
......
......@@ -503,6 +503,15 @@ void phy_procedures_gNB_common_RX(PHY_VARS_gNB *gNB, int frame_rx, int slot_rx)
uint8_t symbol;
unsigned char aa;
// for (aa = 0; aa < gNB->frame_parms.nb_antennas_rx; aa++) {
// cuda_nr_slot_fep_ul(&gNB->frame_parms,
// gNB->common_vars.rxdata[aa],
// gNB->common_vars.rxdataF[aa],
// gNB->frame_parms.Ncp==EXTENDED?12:14,
// slot_rx,
// 0);
// }
for(symbol = 0; symbol < (gNB->frame_parms.Ncp==EXTENDED?12:14); symbol++) {
for (aa = 0; aa < gNB->frame_parms.nb_antennas_rx; aa++) {
nr_slot_fep_ul(&gNB->frame_parms,
......
......@@ -65,6 +65,7 @@
#include <executables/softmodem-common.h>
#include "PHY/NR_REFSIG/ul_ref_seq_nr.h"
#include "PHY/CODING/nrLDPC_extern.h"
//#define DEBUG_ULSIM
LCHAN_DESC DCCH_LCHAN_DESC,DTCH_DL_LCHAN_DESC,DTCH_UL_LCHAN_DESC;
......@@ -323,6 +324,8 @@ int main(int argc, char **argv)
int ul_proc_error = 0; // uplink processing checking status flag
//logInit();
randominit(0);
// load_cuFFT();
// load_cuFFT1();
/* initialize the sin-cos table */
InitSinLUT();
......
......@@ -262,7 +262,7 @@ THREAD_STRUCT = (
#three config for level of parallelism "PARALLEL_SINGLE_THREAD", "PARALLEL_RU_L1_SPLIT", or "PARALLEL_RU_L1_TRX_SPLIT"
parallel_config = "PARALLEL_SINGLE_THREAD";
#two option for worker "WORKER_DISABLE" or "WORKER_ENABLE"
worker_config = "WORKER_ENABLE";
worker_config = "WORKER_DISABLE";
}
);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment