Commit b76e487d authored by tyhsu's avatar tyhsu

v1. move cpu function to gpu

parent fdc78bfb
...@@ -52,9 +52,7 @@ set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -I${OPENAIR1_DIR}/ ") ...@@ -52,9 +52,7 @@ set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -I${OPENAIR1_DIR}/ ")
### ADD CUDA LIBRARY ### ADD CUDA LIBRARY
CUDA_ADD_LIBRARY(PHY_CU ${OPENAIR1_DIR}/CUDA/CUDA_phy_procedure.cu CUDA_ADD_LIBRARY(PHY_CU ${OPENAIR1_DIR}/CUDA/CUDA_phy_procedure.cu
${OPENAIR1_DIR}/CUDA/init_cuda.cu
${OPENAIR1_DIR}/CUDA/struct.h ${OPENAIR1_DIR}/CUDA/struct.h
${OPENAIR1_DIR}/CUDA/cuda_struct.h
${OPENAIR1_DIR}/CUDA/checkError.h ${OPENAIR1_DIR}/CUDA/checkError.h
) )
......
...@@ -2033,10 +2033,8 @@ void set_function_spec_param(RU_t *ru) { ...@@ -2033,10 +2033,8 @@ void set_function_spec_param(RU_t *ru) {
} else if (ru->function == gNodeB_3GPP) { } else if (ru->function == gNodeB_3GPP) {
ru->do_prach = 0; // no prach processing in RU ru->do_prach = 0; // no prach processing in RU
ru->feprx = (get_thread_worker_conf() == WORKER_ENABLE) ? ru_fep_full_2thread : fep_full; // RX DFTs ru->feprx = (get_thread_worker_conf() == WORKER_ENABLE) ? ru_fep_full_2thread : fep_full; // RX DFTs
//ru->feptx_ofdm = (get_thread_worker_conf() == WORKER_ENABLE) ? nr_feptx_ofdm_2thread : nr_feptx_ofdm; // this is fep with idft and precoding ru->feptx_ofdm = (get_thread_worker_conf() == WORKER_ENABLE) ? nr_feptx_ofdm_2thread : nr_feptx_ofdm; // this is fep with idft and precoding
ru->feptx_ofdm = CUDA_prec_ofdm; // this is fep with idft and precoding ru->feptx_prec = nr_feptx_prec; // this is fep with idft and precoding
//ru->feptx_prec = nr_feptx_prec; // this is fep with idft and precoding
ru->feptx_prec = NULL; // this is fep with idft and precoding
ru->fh_north_in = NULL; // no incoming fronthaul from north ru->fh_north_in = NULL; // no incoming fronthaul from north
ru->fh_north_out = NULL; // no outgoing fronthaul to north ru->fh_north_out = NULL; // no outgoing fronthaul to north
ru->nr_start_if = NULL; // no if interface ru->nr_start_if = NULL; // no if interface
......
/*
* Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The OpenAirInterface Software Alliance licenses this file to You under
* the OAI Public License, Version 1.1 (the "License"); you may not use this file
* except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.openairinterface.org/?page_id=698
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*-------------------------------------------------------------------------------
* For more information about the OpenAirInterface (OAI) Software Alliance:
* contact@openairinterface.org
*/
/*! \file CUDA_phy_procedure.cu
* \brief Create and Implementation of beamforming and ifft in gpu
* \author TY Hsu, CW Chang
* \date 2018
* \version 0.1
* \company ISIP@NCTU and Eurecom
* \email: tyhsu@cs.nctu.edu.tw, zhang0756107.cs07g@nctu.edu.tw
* \note
* \warning
*/
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <math.h> #include <math.h>
...@@ -37,7 +6,7 @@ ...@@ -37,7 +6,7 @@
#include <cufft.h> #include <cufft.h>
#include "CUDA/checkError.h" #include "CUDA/checkError.h"
#include "CUDA/struct.h" #include "CUDA/struct.h"
#include "CUDA/cuda_struct.h"
__global__ void cu_intToComplex(int *input, Complex *signal){ __global__ void cu_intToComplex(int *input, Complex *signal){
int id = blockIdx.x*1024 + threadIdx.x; int id = blockIdx.x*1024 + threadIdx.x;
...@@ -51,141 +20,141 @@ __global__ void cu_ComplexToInt(int *output, Complex *signal){ ...@@ -51,141 +20,141 @@ __global__ void cu_ComplexToInt(int *output, Complex *signal){
((short*)&output[id])[1] = round(signal[id].y); ((short*)&output[id])[1] = round(signal[id].y);
} }
__global__ void cu_CP_fft_resultin(unsigned char nb_prefix_samples, int *input, int *output, int fftsize, int nb_symbols){ extern "C" void CUDA_PHY_ofdm_mod(int *input,
int id = blockIdx.x*1024 + threadIdx.x; int *output,
int elementId = id%fftsize;
int symbolId = id/fftsize;
int slotId = symbolId/nb_symbols;
int symbIdinSlot = symbolId%nb_symbols;
int slotElmtNum = fftsize*(nb_symbols+1);
int CPElmtNum = fftsize+nb_prefix_samples;
int offset = slotId*slotElmtNum + symbIdinSlot*CPElmtNum;
output[offset + nb_prefix_samples + elementId] = input[id];
if(elementId >= fftsize-nb_prefix_samples){
output[offset + (fftsize-nb_prefix_samples)] = input[id];
}
}
__global__ void cu_CP0_fft_resultin(unsigned char nb_prefix_samples0, unsigned char nb_prefix_samples,
int *input, int *output, int fftsize, int nb_symbols){
int id = blockIdx.x*1024 + threadIdx.x;
int elementId = id%fftsize;
int symbolId = id/fftsize;
int slotId = symbolId/nb_symbols;
int symbIdinSlot = symbolId%nb_symbols;
int slotElmtNum = fftsize*(nb_symbols+1);
int CP0ElmtNum = fftsize+nb_prefix_samples0;
int CPElmtNum = fftsize+nb_prefix_samples;
if(symbIdinSlot==0){
int offset = slotId*slotElmtNum;
output[offset + nb_prefix_samples0+ elementId] = input[id];
if(elementId >= fftsize-nb_prefix_samples0){
output[offset + (fftsize-nb_prefix_samples0)] = input[id];
}
}else{
int offset = slotId*slotElmtNum + CP0ElmtNum + (symbIdinSlot-1)*CPElmtNum;
output[offset + nb_prefix_samples + elementId] = input[id];
if(elementId >= fftsize-nb_prefix_samples){
output[offset + (fftsize-nb_prefix_samples)] = input[id];
}
}
}
extern "C" void CUDA_ifft_ofdm( int **output,
int fftsize, int fftsize,
unsigned char nb_symbols, unsigned char nb_symbols,
unsigned char nb_prefix_samples, unsigned short nb_prefix_samples,
unsigned char nb_prefix_samples0,
int nb_tx,
int Ncp,
Extension_t etype){ Extension_t etype){
//for(int i=0; i<fftsize; i++) printf("%d+%di\n", ((short*)&input[0][i])[0], ((short*)&input[0][i])[1]); if(nb_symbols==0) return ;
int *d_txdataF_BF = cu_ru.d_txdataF_BF;
int *d_data_wCP = cu_ru.d_data_wCP;
Complex *d_signal = cu_ru.d_signal;
cufftHandle plan = cu_ru.plan;
/* Complex *d_signal;
for(int aa=0; aa<nb_tx; aa++){ gpuErrchk( cudaMalloc((void**)&d_signal, fftsize*sizeof(Complex)*nb_symbols) );
int elementNum = fftsize*nb_symbols;
gpuErrchk( cudaMemcpy(&d_data[aa*elementNum], input[aa], sizeof(int)*elementNum, cudaMemcpyHostToDevice) ); int *d_data;
}*/ gpuErrchk( cudaMalloc((void**)&d_data, fftsize*sizeof(int)*nb_symbols) );
gpuErrchk( cudaMemcpy(d_data, input, fftsize*sizeof(int)*nb_symbols, cudaMemcpyHostToDevice) );
int threadNum = 1024; int threadNum = 1024;
int blockNum = fftsize*nb_symbols*nb_tx / threadNum; int blockNum = fftsize*nb_symbols/threadNum;
cu_intToComplex<<<blockNum, threadNum>>>(d_txdataF_BF, d_signal); cu_intToComplex<<<blockNum, threadNum>>>(d_data, d_signal);
//CHECK_STATE("cu_intToComplex");
cufftErrchk( cufftExecC2C(plan, (cufftComplex *)d_signal, (cufftComplex *)d_signal, CUFFT_INVERSE));
cu_ComplexToInt<<<blockNum, threadNum>>>(d_txdataF_BF, d_signal); cufftHandle plan;
//CHECK_STATE("cu_ComplexToInt"); cufftErrchk( cufftPlan1d(&plan, fftsize, CUFFT_C2C, nb_symbols) );
cufftErrchk( cufftExecC2C(plan, (cufftComplex *)d_signal, (cufftComplex *)d_signal, CUFFT_FORWARD) );
Complex *h_output = (Complex*)malloc(fftsize*sizeof(Complex)*nb_symbols) ;
gpuErrchk( cudaMemcpy(h_output, d_signal, fftsize*sizeof(Complex)*nb_symbols, cudaMemcpyDeviceToHost) );
//only do cyclic_prefix, suffix/none not finish yet. cu_ComplexToInt<<<blockNum, threadNum>>>(d_data, d_signal);
if(Ncp==1){
cu_CP_fft_resultin<<<blockNum, threadNum>>>(nb_prefix_samples, d_txdataF_BF, d_data_wCP, fftsize, nb_symbols); int *res = (int*)malloc(fftsize*sizeof(int)*nb_symbols);
//CHECK_STATE("cu_CP_fft_resultin"); gpuErrchk( cudaMemcpy(res, d_data, fftsize*sizeof(int)*nb_symbols, cudaMemcpyDeviceToHost) );
}else{
cu_CP0_fft_resultin<<<blockNum, threadNum>>>(nb_prefix_samples0, nb_prefix_samples, d_txdataF_BF, d_data_wCP, fftsize, nb_symbols); /*
//CHECK_STATE("cu_CP0_fft_resultin"); for(int i=0; i<fftsize*nb_symbols; i++){
printf("res(%d) %d+%di\n", i, ((short*)&res[i])[0], ((short*)&res[i])[1]);
}*/
for(int symb_th=0; symb_th<nb_symbols; symb_th++){
int* output_ptr;
switch(etype){
case CYCLIC_PREFIX:{
output_ptr = &output[symb_th*fftsize + (1+symb_th)*nb_prefix_samples];
memcpy(output_ptr, res, fftsize<<2);
int j=fftsize;
for(int k=-1; k>=-nb_prefix_samples; k--){
output_ptr[k] = output_ptr[--j];
}
break;
} }
case CYCLIC_SUFFIX:{
output_ptr = &output[symb_th*fftsize + (symb_th)*nb_prefix_samples];
memcpy(output_ptr, res, fftsize<<2);
//write back gpu->cpu for(int k=0; k<nb_prefix_samples; k++){
for(int aa=0; aa<nb_tx; aa++){ output_ptr[fftsize+k] = output_ptr[k];
gpuErrchk( cudaMemcpy(output[aa], &d_data_wCP[aa*(nb_symbols+1)*fftsize], fftsize*(nb_symbols+1)*sizeof(int), cudaMemcpyDeviceToHost) ); }
break;
}
case ZEROS:{
break;
}
case NONE:{
output_ptr = &output[fftsize];
memcpy(output_ptr, res, fftsize<<2);
break;
} }
cudaDeviceSynchronize();
default:{
break;
}
}
cufftDestroy(plan);
free(h_output);
free(res);
cudaFree(d_signal);
}
} }
__global__ void conjMul(int *d_x1, int *d_x2, int *d_y, int aa, int div, int fftsize, int nb_symbols){ __global__ void conjMul(int *d_x1, int *d_x2, int *d_y, short zero_flag,unsigned int div){
int id = blockIdx.x*1024 + threadIdx.x; int id = blockIdx.x*1024 + threadIdx.x;
int *x1 = &d_x1[id]; int *x1 = &d_x1[id];
int *x2 = &d_x2[id%fftsize]; int *x2 = &d_x2[id];
int *y = &d_y[aa*fftsize*nb_symbols + id]; int *y = &d_y[id];
int re, im; int re, im;
//conj(x1) * x2
re = ((short*)x1)[0]*((short*)x2)[0] + ((short*)x1)[1]*((short*)x2)[1]; ((short*)x1)[1] *= -1;
im = ((short*)x1)[0]*((short*)x2)[1] - ((short*)x1)[1]*((short*)x2)[0]; re = ((short*)x1)[0]*((short*)x2)[0] - ((short*)x1)[1]*((short*)x2)[1];
im = ((short*)x1)[1]*((short*)x2)[0] + ((short*)x1)[0]*((short*)x2)[1];
re = re / div; re = re / div;
im = im / div; im = im / div;
if(zero_flag){
((short*)y)[0] = re;
((short*)y)[1] = im;
}else{
((short*)y)[0] += re; ((short*)y)[0] += re;
((short*)y)[1] += im; ((short*)y)[1] += im;
}
} }
extern "C" void CUDA_multadd_cpx_vector(int *x1, int *x2, int *y, short zero_flag,unsigned int N, int output_shift){
int *d_x1, *d_x2, *d_y;
gpuErrchk( cudaMalloc((void**)&d_x1, N*sizeof(int)) );
gpuErrchk( cudaMalloc((void**)&d_x2, N*sizeof(int)) );
gpuErrchk( cudaMalloc((void**)&d_y, N*sizeof(int)) );
extern "C" void CUDA_beam_precoding(int **txdataF, int ***weight, int L_ssb, int shift, int fftsize, int nb_symbols, int nb_antenna_ports, int nb_tx){ gpuErrchk( cudaMemcpy(d_x1, x1, N*sizeof(int), cudaMemcpyHostToDevice) );
gpuErrchk( cudaMemcpy(d_x2, x2, N*sizeof(int), cudaMemcpyHostToDevice) );
//initial BF data; if(zero_flag == 0){
gpuErrchk( cudaMemset(cu_ru.d_txdataF_BF, 0, fftsize*nb_symbols*sizeof(int)*nb_tx) ); gpuErrchk( cudaMemcpy(d_y, y, N*sizeof(int), cudaMemcpyHostToDevice) );
//move data to gpu }else{
for(int p=0; p<nb_antenna_ports; p++){ gpuErrchk( cudaMemset(d_y, 0, N*sizeof(int)) );
gpuErrchk( cudaMemcpy(cu_ru.d_txdataF[p], txdataF[p], fftsize*sizeof(int)*nb_symbols, cudaMemcpyHostToDevice) );
} }
unsigned int div = 1;
div = div << output_shift;
int threadNum = 1024; int threadNum = 1024;
int blockNum = fftsize*nb_symbols/threadNum; int blockNum = N / threadNum;
int div = 1<<shift; conjMul<<<blockNum, threadNum>>>(d_x1, d_x2, d_y, zero_flag, div);
for(int aa=0; aa<nb_tx; aa++){ cudaDeviceSynchronize();
for(int p=0; p<nb_antenna_ports; p++){ CHECK_STATE("conjMul");
if((L_ssb>>p) & 0x01){
gpuErrchk( cudaMemcpy(cu_ru.d_weight[p][aa], weight[p][aa], fftsize*sizeof(int), cudaMemcpyHostToDevice) );
conjMul<<<blockNum, threadNum>>>(cu_ru.d_txdataF[p], cu_ru.d_weight[p][aa],
cu_ru.d_txdataF_BF, aa, div, fftsize, nb_symbols);
}
}
}
gpuErrchk( cudaMemcpy(y, d_y, N*sizeof(int), cudaMemcpyDeviceToHost) );
}
cudaFree(d_x1);
cudaFree(d_x2);
cudaFree(d_y);
}
#ifndef CUDA #ifndef CUDA
#define CUDA #define CUDA
#include "cuda_struct.h"
#if __cplusplus #if __cplusplus
extern "C" { extern "C" {
#endif #endif
void CUDA_hello(void); void CUDA_hello(void);
void CUDA_ifft_ofdm( int **output, void CUDA_PHY_ofdm_mod(int *input,
int *output,
int fftsize, int fftsize,
unsigned char nb_symbols, unsigned char nb_symbols,
unsigned char nb_prefix_samples, unsigned short nb_prefix_samples,
unsigned char nb_prefix_samples0,
int nb_tx,
int Ncp,
Extension_t etype); Extension_t etype);
void CUDA_beam_precoding(int **txdataF, int ***weight, int L_ssb, int shift, int fftsize, int nb_symbols, int nb_antenna_ports, int nb_tx); void CUDA_multadd_cpx_vector(int* x1, int *x2, int *y, short zero_flag, unsigned int N, int output_shift);
#if __cplusplus #if __cplusplus
} }
......
#ifndef CHECKERROR_H __global__ void gpu_hello(void){
#define CHECKERROR_H printf("Hello world from GPU!\n");
}
extern "C" void CUDA_hello(void){
printf("ready to gpu_hello\n");
gpu_hello<<<1,1>>>();
cudaDeviceSynchronize();
}
static const char* _cudaGetErrorEnum(cufftResult error){ static const char* _cudaGetErrorEnum(cufftResult error){
switch (error){ switch (error){
...@@ -36,7 +43,6 @@ inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=t ...@@ -36,7 +43,6 @@ inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=t
#define CHECK_STATE(msg) {checkCudaState((msg), __FILE__, __LINE__);} #define CHECK_STATE(msg) {checkCudaState((msg), __FILE__, __LINE__);}
inline void checkCudaState(const char *msg, const char *file, const int line){ inline void checkCudaState(const char *msg, const char *file, const int line){
cudaDeviceSynchronize();
cudaError_t err = cudaGetLastError(); cudaError_t err = cudaGetLastError();
if(err != cudaSuccess) { if(err != cudaSuccess) {
fprintf(stderr, "[%s]gpu error: %s %s %d\n", msg, cudaGetErrorString(err), file, line); fprintf(stderr, "[%s]gpu error: %s %s %d\n", msg, cudaGetErrorString(err), file, line);
...@@ -49,4 +55,4 @@ inline void checkCudaState(const char *msg, const char *file, const int line){ ...@@ -49,4 +55,4 @@ inline void checkCudaState(const char *msg, const char *file, const int line){
#endif
#ifndef CUDA_STRUCT_H
#define CUDA_STRUCT_H
#include <cuda.h>
#include <cuda_runtime.h>
#include <cufft.h>
#if __cplusplus
extern "C" {
#endif
typedef float2 Complex;
typedef struct cuda_cu_ru_t{
//beamforming precoding
int **d_txdataF;//14symb-port0, 14symb-port1, ......
int ***d_weight;//[p][tx][symb]
cudaStream_t *d_beam_stream;
//ifft
int *d_txdataF_BF;//14symb-tx0, 14symb-tx1, ......
Complex *d_signal;
int *d_data_wCP;
cufftHandle plan;
}cuda_cu_ru;
extern cuda_cu_ru cu_ru;
#if __cplusplus
}
#endif
#endif
/*
* Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The OpenAirInterface Software Alliance licenses this file to You under
* the OAI Public License, Version 1.1 (the "License"); you may not use this file
* except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.openairinterface.org/?page_id=698
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*-------------------------------------------------------------------------------
* For more information about the OpenAirInterface (OAI) Software Alliance:
* contact@openairinterface.org
*/
/*! \file init_cuda.cu
* \brief Create and Implementation of beamforming and ifft in gpu
* \author TY Hsu, CW Chang
* \date 2018
* \version 0.1
* \company ISIP@NCTU and Eurecom
* \email: tyhsu@cs.nctu.edu.tw, zhang0756107.cs07g@nctu.edu.tw
* \note
* \warning
*/
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cufft.h>
#include "CUDA/checkError.h"
#include "CUDA/struct.h"
#include "CUDA/cuda_struct.h"
cuda_cu_ru cu_ru;
extern "C" void init_cuda(int nb_tx, int nb_symbols, int fftsize){
printf("init_cuda %d %d %d \n\n\n", nb_tx, nb_symbols, fftsize);
int nb_antenna_ports = 8;
//beamforming precoding
cu_ru.d_txdataF = (int**)malloc(sizeof(int*) * nb_antenna_ports);
for(int p=0; p<nb_antenna_ports; p++){
gpuErrchk( cudaMalloc((void**)&cu_ru.d_txdataF[p], fftsize*sizeof(int)*nb_symbols) );
}
cu_ru.d_beam_stream = (cudaStream_t*)malloc(sizeof(cudaStream_t)*nb_tx);
for(int aa=0; aa<nb_tx; aa++){
gpuErrchk( cudaStreamCreate(&cu_ru.d_beam_stream[aa]) );
}
cu_ru.d_weight = (int***)malloc(sizeof(int**) * nb_antenna_ports);
for(int p=0; p<nb_antenna_ports; p++){
cu_ru.d_weight[p] = (int**)malloc(sizeof(int*) * nb_tx);
for(int aa=0; aa<nb_tx; aa++){
gpuErrchk( cudaMalloc((void**)&cu_ru.d_weight[p][aa], fftsize*sizeof(int)) );
}
}
//ifft
gpuErrchk( cudaMalloc((void**)&cu_ru.d_txdataF_BF, fftsize*sizeof(int)*nb_symbols*nb_tx) );
gpuErrchk( cudaMalloc((void**)&cu_ru.d_signal, fftsize*sizeof(Complex)*nb_symbols*nb_tx) );
gpuErrchk( cudaMalloc((void**)&cu_ru.d_data_wCP, fftsize*(nb_symbols+1)*nb_tx*sizeof(int)) );
cufftErrchk( cufftPlan1d(&cu_ru.plan, fftsize, CUFFT_C2C, nb_symbols*nb_tx) );
}
#ifndef INIT_CUDA_DEF #ifndef INIT_CUDA_DEF
#define INIT_CUDA_DEF #define INIT_CUDA_DEF
#include "cuda_struct.h" #include "struct.h"
#if __cplusplus
extern "C" {
#endif
typedef cuda_ifft_t{
Complex *d_signal;
Complex *d_output;
int *d_data;
}cuda_ifft
void init_cuda(int nb_tx, int nb_symbols, int fftsize);
#if __cplusplus
}
#endif
#endif #endif
#ifndef OAI_STRUCT #ifndef CUDA_STRUCT
#define OAI_STRUCT #define CUDA_STRUCT
#include <cuda.h>
#include <cuda_runtime.h>
typedef float2 Complex;
typedef enum { typedef enum {
CYCLIC_PREFIX, CYCLIC_PREFIX,
......
...@@ -33,7 +33,6 @@ ...@@ -33,7 +33,6 @@
#include "LAYER2/MAC/mac_extern.h" #include "LAYER2/MAC/mac_extern.h"
#include "assertions.h" #include "assertions.h"
#include <math.h> #include <math.h>
#include "openair1/CUDA/init_cuda_def.h"
#include "PHY/NR_TRANSPORT/nr_ulsch.h" #include "PHY/NR_TRANSPORT/nr_ulsch.h"
#include "PHY/NR_REFSIG/nr_refsig.h" #include "PHY/NR_REFSIG/nr_refsig.h"
......
...@@ -31,7 +31,6 @@ ...@@ -31,7 +31,6 @@
#include "assertions.h" #include "assertions.h"
#include <math.h> #include <math.h>
#include "openair1/PHY/defs_RU.h" #include "openair1/PHY/defs_RU.h"
#include "openair1/CUDA/init_cuda_def.h"
int nr_phy_init_RU(RU_t *ru) { int nr_phy_init_RU(RU_t *ru) {
...@@ -42,7 +41,6 @@ int nr_phy_init_RU(RU_t *ru) { ...@@ -42,7 +41,6 @@ int nr_phy_init_RU(RU_t *ru) {
LOG_I(PHY,"Initializing RU signal buffers (if_south %s) nb_tx %d\n",ru_if_types[ru->if_south],ru->nb_tx); LOG_I(PHY,"Initializing RU signal buffers (if_south %s) nb_tx %d\n",ru_if_types[ru->if_south],ru->nb_tx);
init_cuda(ru->nb_tx, fp->symbols_per_slot, fp->ofdm_symbol_size);
if (ru->if_south <= REMOTE_IF5) { // this means REMOTE_IF5 or LOCAL_RF, so allocate memory for time-domain signals if (ru->if_south <= REMOTE_IF5) { // this means REMOTE_IF5 or LOCAL_RF, so allocate memory for time-domain signals
// Time-domain signals // Time-domain signals
......
...@@ -52,6 +52,7 @@ ...@@ -52,6 +52,7 @@
#include "modulation_eNB.h" #include "modulation_eNB.h"
#include "nr_modulation.h" #include "nr_modulation.h"
#include "common/utils/LOG/vcd_signal_dumper.h" #include "common/utils/LOG/vcd_signal_dumper.h"
#include "CUDA/CUDA_phy_procedure_def.h"
int beam_precoding(int32_t **txdataF, int beam_precoding(int32_t **txdataF,
...@@ -165,9 +166,16 @@ int nr_beam_precoding(int32_t **txdataF, ...@@ -165,9 +166,16 @@ int nr_beam_precoding(int32_t **txdataF,
} }
} }
void (*multadd_cpx_vector_ptr)(int*, int*, int*, short, unsigned int, int);
#ifdef CUDA
multadd_cpx_vector_ptr = CUDA_multadd_cpx_vector;
#else
multadd_cpx_vector_ptr = multadd_cpx_vector;
#endif
for (p=0; p<nb_antenna_ports; p++) { for (p=0; p<nb_antenna_ports; p++) {
if ((frame_parms->L_ssb >> p) & 0x01) { if ((frame_parms->L_ssb >> p) & 0x01) {
multadd_cpx_vector((int16_t*)&txdataF[p][symbol*frame_parms->ofdm_symbol_size], multadd_cpx_vector_ptr((int16_t*)&txdataF[p][symbol*frame_parms->ofdm_symbol_size],
(int16_t*)beam_weights[p][aa], (int16_t*)beam_weights[p][aa],
(int16_t*)&txdataF_BF[aa][symbol*frame_parms->ofdm_symbol_size], (int16_t*)&txdataF_BF[aa][symbol*frame_parms->ofdm_symbol_size],
0, 0,
......
...@@ -77,8 +77,15 @@ void nr_feptx0(RU_t *ru,int tti_tx,int first_symbol, int num_symbols, int aa) { ...@@ -77,8 +77,15 @@ void nr_feptx0(RU_t *ru,int tti_tx,int first_symbol, int num_symbols, int aa) {
LOG_D(PHY,"SFN/SF:RU:TX:%d/%d Generating slot %d (first_symbol %d num_symbols %d)\n",ru->proc.frame_tx, ru->proc.tti_tx,slot,first_symbol,num_symbols); LOG_D(PHY,"SFN/SF:RU:TX:%d/%d Generating slot %d (first_symbol %d num_symbols %d)\n",ru->proc.frame_tx, ru->proc.tti_tx,slot,first_symbol,num_symbols);
void (*PHY_ofdm_mod_ptr)(int*, int*, int, unsigned char, unsigned short, Extension_t);
#ifdef CUDA
PHY_ofdm_mod_ptr = CUDA_PHY_ofdm_mod;
#else
PHY_ofdm_mod_ptr = PHY_ofdm_mod;
#endif
if (fp->Ncp == 1) { if (fp->Ncp == 1) {
PHY_ofdm_mod(&ru->common.txdataF_BF[aa][slot_offsetF], PHY_ofdm_mod_ptr(&ru->common.txdataF_BF[aa][slot_offsetF],
(int*)&ru->common.txdata[aa][slot_offset], (int*)&ru->common.txdata[aa][slot_offset],
fp->ofdm_symbol_size, fp->ofdm_symbol_size,
num_symbols, num_symbols,
...@@ -87,13 +94,13 @@ void nr_feptx0(RU_t *ru,int tti_tx,int first_symbol, int num_symbols, int aa) { ...@@ -87,13 +94,13 @@ void nr_feptx0(RU_t *ru,int tti_tx,int first_symbol, int num_symbols, int aa) {
} }
else { else {
if (first_symbol==0) { if (first_symbol==0) {
PHY_ofdm_mod(&ru->common.txdataF_BF[aa][slot_offsetF], PHY_ofdm_mod_ptr(&ru->common.txdataF_BF[aa][slot_offsetF],
(int*)&ru->common.txdata[aa][slot_offset], (int*)&ru->common.txdata[aa][slot_offset],
fp->ofdm_symbol_size, fp->ofdm_symbol_size,
1, 1,
fp->nb_prefix_samples0, fp->nb_prefix_samples0,
CYCLIC_PREFIX); CYCLIC_PREFIX);
PHY_ofdm_mod(&ru->common.txdataF_BF[aa][slot_offsetF+fp->ofdm_symbol_size], PHY_ofdm_mod_ptr(&ru->common.txdataF_BF[aa][slot_offsetF+fp->ofdm_symbol_size],
(int*)&ru->common.txdata[aa][slot_offset+fp->nb_prefix_samples0+fp->ofdm_symbol_size], (int*)&ru->common.txdata[aa][slot_offset+fp->nb_prefix_samples0+fp->ofdm_symbol_size],
fp->ofdm_symbol_size, fp->ofdm_symbol_size,
num_symbols-1, num_symbols-1,
...@@ -101,7 +108,7 @@ void nr_feptx0(RU_t *ru,int tti_tx,int first_symbol, int num_symbols, int aa) { ...@@ -101,7 +108,7 @@ void nr_feptx0(RU_t *ru,int tti_tx,int first_symbol, int num_symbols, int aa) {
CYCLIC_PREFIX); CYCLIC_PREFIX);
} }
else { else {
PHY_ofdm_mod(&ru->common.txdataF_BF[aa][slot_offsetF], PHY_ofdm_mod_ptr(&ru->common.txdataF_BF[aa][slot_offsetF],
(int*)&ru->common.txdata[aa][slot_offset], (int*)&ru->common.txdata[aa][slot_offset],
fp->ofdm_symbol_size, fp->ofdm_symbol_size,
num_symbols, num_symbols,
...@@ -113,40 +120,8 @@ void nr_feptx0(RU_t *ru,int tti_tx,int first_symbol, int num_symbols, int aa) { ...@@ -113,40 +120,8 @@ void nr_feptx0(RU_t *ru,int tti_tx,int first_symbol, int num_symbols, int aa) {
//VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_PHY_PROCEDURES_RU_FEPTX_OFDM+(first_symbol!=0?1:0), 0); //VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_PHY_PROCEDURES_RU_FEPTX_OFDM+(first_symbol!=0?1:0), 0);
} }
void CUDA_prec_ofdm(RU_t *ru,int frame_tx,int tti_tx){
nfapi_nr_config_request_t *cfg = &ru->gNB_list[0]->gNB_config;
if(nr_slot_select(cfg, tti_tx) == SF_UL) return;
int slot = tti_tx;
NR_DL_FRAME_PARMS *fp = ru->nr_frame_parms;
PHY_VARS_gNB *gNB = ru->gNB_list[0];
int nb_antenna_ports = 8;
//data L1 to ru
for(int p=0; p<nb_antenna_ports; ++p){
memcpy((void*)ru->common.txdataF[p], (void*)&gNB->common_vars.txdataF[p],
fp->ofdm_symbol_size*sizeof(int32_t)*fp->symbols_per_slot);
//fake data
for(int j=0; j<fp->ofdm_symbol_size*fp->symbols_per_slot; j++){
((short*)&ru->common.txdataF[p][j])[0] = 1;
((short*)&ru->common.txdataF[p][j])[1] = 1;
}
}
CUDA_beam_precoding((int**)ru->common.txdataF, (int***)ru->beam_weights[0], fp->L_ssb, 3,
fp->ofdm_symbol_size, fp->symbols_per_slot, nb_antenna_ports, ru->nb_tx);
CUDA_ifft_ofdm((int**)ru->common.txdata,
fp->ofdm_symbol_size, fp->symbols_per_slot,
fp->nb_prefix_samples, fp->nb_prefix_samples0, ru->nb_tx,
fp->Ncp, CYCLIC_PREFIX);
}
void nr_feptx_ofdm_2thread(RU_t *ru,int frame_tx,int tti_tx) { void nr_feptx_ofdm_2thread(RU_t *ru,int frame_tx,int tti_tx) {
printf("nr_feptx_ofdm_2thread : frame_tx:%d tti_tx:%d\n", frame_tx, tti_tx);return;
nfapi_nr_config_request_t *cfg = &ru->gNB_list[0]->gNB_config; nfapi_nr_config_request_t *cfg = &ru->gNB_list[0]->gNB_config;
RU_proc_t *proc = &ru->proc; RU_proc_t *proc = &ru->proc;
...@@ -245,7 +220,6 @@ printf("nr_feptx_ofdm_2thread : frame_tx:%d tti_tx:%d\n", frame_tx, tti_tx);retu ...@@ -245,7 +220,6 @@ printf("nr_feptx_ofdm_2thread : frame_tx:%d tti_tx:%d\n", frame_tx, tti_tx);retu
} }
static void *nr_feptx_thread(void *param) { static void *nr_feptx_thread(void *param) {
RU_feptx_t *feptx = (RU_feptx_t *)param; RU_feptx_t *feptx = (RU_feptx_t *)param;
RU_t *ru; RU_t *ru;
int aa, slot, start, l, nb_antenna_ports, ret; int aa, slot, start, l, nb_antenna_ports, ret;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment