Commit b76e487d authored by tyhsu's avatar tyhsu

v1. move cpu function to gpu

parent fdc78bfb
...@@ -52,9 +52,7 @@ set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -I${OPENAIR1_DIR}/ ") ...@@ -52,9 +52,7 @@ set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -I${OPENAIR1_DIR}/ ")
### ADD CUDA LIBRARY ### ADD CUDA LIBRARY
CUDA_ADD_LIBRARY(PHY_CU ${OPENAIR1_DIR}/CUDA/CUDA_phy_procedure.cu CUDA_ADD_LIBRARY(PHY_CU ${OPENAIR1_DIR}/CUDA/CUDA_phy_procedure.cu
${OPENAIR1_DIR}/CUDA/init_cuda.cu
${OPENAIR1_DIR}/CUDA/struct.h ${OPENAIR1_DIR}/CUDA/struct.h
${OPENAIR1_DIR}/CUDA/cuda_struct.h
${OPENAIR1_DIR}/CUDA/checkError.h ${OPENAIR1_DIR}/CUDA/checkError.h
) )
......
...@@ -2033,10 +2033,8 @@ void set_function_spec_param(RU_t *ru) { ...@@ -2033,10 +2033,8 @@ void set_function_spec_param(RU_t *ru) {
} else if (ru->function == gNodeB_3GPP) { } else if (ru->function == gNodeB_3GPP) {
ru->do_prach = 0; // no prach processing in RU ru->do_prach = 0; // no prach processing in RU
ru->feprx = (get_thread_worker_conf() == WORKER_ENABLE) ? ru_fep_full_2thread : fep_full; // RX DFTs ru->feprx = (get_thread_worker_conf() == WORKER_ENABLE) ? ru_fep_full_2thread : fep_full; // RX DFTs
//ru->feptx_ofdm = (get_thread_worker_conf() == WORKER_ENABLE) ? nr_feptx_ofdm_2thread : nr_feptx_ofdm; // this is fep with idft and precoding ru->feptx_ofdm = (get_thread_worker_conf() == WORKER_ENABLE) ? nr_feptx_ofdm_2thread : nr_feptx_ofdm; // this is fep with idft and precoding
ru->feptx_ofdm = CUDA_prec_ofdm; // this is fep with idft and precoding ru->feptx_prec = nr_feptx_prec; // this is fep with idft and precoding
//ru->feptx_prec = nr_feptx_prec; // this is fep with idft and precoding
ru->feptx_prec = NULL; // this is fep with idft and precoding
ru->fh_north_in = NULL; // no incoming fronthaul from north ru->fh_north_in = NULL; // no incoming fronthaul from north
ru->fh_north_out = NULL; // no outgoing fronthaul to north ru->fh_north_out = NULL; // no outgoing fronthaul to north
ru->nr_start_if = NULL; // no if interface ru->nr_start_if = NULL; // no if interface
......
This diff is collapsed.
#ifndef CUDA #ifndef CUDA
#define CUDA #define CUDA
#include "cuda_struct.h"
#if __cplusplus #if __cplusplus
extern "C" { extern "C" {
#endif #endif
void CUDA_hello(void); void CUDA_hello(void);
void CUDA_ifft_ofdm( int **output, void CUDA_PHY_ofdm_mod(int *input,
int *output,
int fftsize, int fftsize,
unsigned char nb_symbols, unsigned char nb_symbols,
unsigned char nb_prefix_samples, unsigned short nb_prefix_samples,
unsigned char nb_prefix_samples0,
int nb_tx,
int Ncp,
Extension_t etype); Extension_t etype);
void CUDA_beam_precoding(int **txdataF, int ***weight, int L_ssb, int shift, int fftsize, int nb_symbols, int nb_antenna_ports, int nb_tx); void CUDA_multadd_cpx_vector(int* x1, int *x2, int *y, short zero_flag, unsigned int N, int output_shift);
#if __cplusplus #if __cplusplus
} }
......
#ifndef CHECKERROR_H __global__ void gpu_hello(void){
#define CHECKERROR_H printf("Hello world from GPU!\n");
}
extern "C" void CUDA_hello(void){
printf("ready to gpu_hello\n");
gpu_hello<<<1,1>>>();
cudaDeviceSynchronize();
}
static const char* _cudaGetErrorEnum(cufftResult error){ static const char* _cudaGetErrorEnum(cufftResult error){
switch (error){ switch (error){
...@@ -36,7 +43,6 @@ inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=t ...@@ -36,7 +43,6 @@ inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=t
#define CHECK_STATE(msg) {checkCudaState((msg), __FILE__, __LINE__);} #define CHECK_STATE(msg) {checkCudaState((msg), __FILE__, __LINE__);}
inline void checkCudaState(const char *msg, const char *file, const int line){ inline void checkCudaState(const char *msg, const char *file, const int line){
cudaDeviceSynchronize();
cudaError_t err = cudaGetLastError(); cudaError_t err = cudaGetLastError();
if(err != cudaSuccess) { if(err != cudaSuccess) {
fprintf(stderr, "[%s]gpu error: %s %s %d\n", msg, cudaGetErrorString(err), file, line); fprintf(stderr, "[%s]gpu error: %s %s %d\n", msg, cudaGetErrorString(err), file, line);
...@@ -49,4 +55,4 @@ inline void checkCudaState(const char *msg, const char *file, const int line){ ...@@ -49,4 +55,4 @@ inline void checkCudaState(const char *msg, const char *file, const int line){
#endif
#ifndef CUDA_STRUCT_H
#define CUDA_STRUCT_H
#include <cuda.h>
#include <cuda_runtime.h>
#include <cufft.h>
#if __cplusplus
extern "C" {
#endif
typedef float2 Complex;
typedef struct cuda_cu_ru_t{
//beamforming precoding
int **d_txdataF;//14symb-port0, 14symb-port1, ......
int ***d_weight;//[p][tx][symb]
cudaStream_t *d_beam_stream;
//ifft
int *d_txdataF_BF;//14symb-tx0, 14symb-tx1, ......
Complex *d_signal;
int *d_data_wCP;
cufftHandle plan;
}cuda_cu_ru;
extern cuda_cu_ru cu_ru;
#if __cplusplus
}
#endif
#endif
/*
* Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The OpenAirInterface Software Alliance licenses this file to You under
* the OAI Public License, Version 1.1 (the "License"); you may not use this file
* except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.openairinterface.org/?page_id=698
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*-------------------------------------------------------------------------------
* For more information about the OpenAirInterface (OAI) Software Alliance:
* contact@openairinterface.org
*/
/*! \file init_cuda.cu
* \brief Create and Implementation of beamforming and ifft in gpu
* \author TY Hsu, CW Chang
* \date 2018
* \version 0.1
* \company ISIP@NCTU and Eurecom
* \email: tyhsu@cs.nctu.edu.tw, zhang0756107.cs07g@nctu.edu.tw
* \note
* \warning
*/
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cufft.h>
#include "CUDA/checkError.h"
#include "CUDA/struct.h"
#include "CUDA/cuda_struct.h"
cuda_cu_ru cu_ru;
extern "C" void init_cuda(int nb_tx, int nb_symbols, int fftsize){
printf("init_cuda %d %d %d \n\n\n", nb_tx, nb_symbols, fftsize);
int nb_antenna_ports = 8;
//beamforming precoding
cu_ru.d_txdataF = (int**)malloc(sizeof(int*) * nb_antenna_ports);
for(int p=0; p<nb_antenna_ports; p++){
gpuErrchk( cudaMalloc((void**)&cu_ru.d_txdataF[p], fftsize*sizeof(int)*nb_symbols) );
}
cu_ru.d_beam_stream = (cudaStream_t*)malloc(sizeof(cudaStream_t)*nb_tx);
for(int aa=0; aa<nb_tx; aa++){
gpuErrchk( cudaStreamCreate(&cu_ru.d_beam_stream[aa]) );
}
cu_ru.d_weight = (int***)malloc(sizeof(int**) * nb_antenna_ports);
for(int p=0; p<nb_antenna_ports; p++){
cu_ru.d_weight[p] = (int**)malloc(sizeof(int*) * nb_tx);
for(int aa=0; aa<nb_tx; aa++){
gpuErrchk( cudaMalloc((void**)&cu_ru.d_weight[p][aa], fftsize*sizeof(int)) );
}
}
//ifft
gpuErrchk( cudaMalloc((void**)&cu_ru.d_txdataF_BF, fftsize*sizeof(int)*nb_symbols*nb_tx) );
gpuErrchk( cudaMalloc((void**)&cu_ru.d_signal, fftsize*sizeof(Complex)*nb_symbols*nb_tx) );
gpuErrchk( cudaMalloc((void**)&cu_ru.d_data_wCP, fftsize*(nb_symbols+1)*nb_tx*sizeof(int)) );
cufftErrchk( cufftPlan1d(&cu_ru.plan, fftsize, CUFFT_C2C, nb_symbols*nb_tx) );
}
#ifndef INIT_CUDA_DEF #ifndef INIT_CUDA_DEF
#define INIT_CUDA_DEF #define INIT_CUDA_DEF
#include "cuda_struct.h" #include "struct.h"
#if __cplusplus
extern "C" {
#endif
typedef cuda_ifft_t{
Complex *d_signal;
Complex *d_output;
int *d_data;
}cuda_ifft
void init_cuda(int nb_tx, int nb_symbols, int fftsize);
#if __cplusplus
}
#endif
#endif #endif
#ifndef OAI_STRUCT #ifndef CUDA_STRUCT
#define OAI_STRUCT #define CUDA_STRUCT
#include <cuda.h>
#include <cuda_runtime.h>
typedef float2 Complex;
typedef enum { typedef enum {
CYCLIC_PREFIX, CYCLIC_PREFIX,
......
...@@ -33,7 +33,6 @@ ...@@ -33,7 +33,6 @@
#include "LAYER2/MAC/mac_extern.h" #include "LAYER2/MAC/mac_extern.h"
#include "assertions.h" #include "assertions.h"
#include <math.h> #include <math.h>
#include "openair1/CUDA/init_cuda_def.h"
#include "PHY/NR_TRANSPORT/nr_ulsch.h" #include "PHY/NR_TRANSPORT/nr_ulsch.h"
#include "PHY/NR_REFSIG/nr_refsig.h" #include "PHY/NR_REFSIG/nr_refsig.h"
......
...@@ -31,7 +31,6 @@ ...@@ -31,7 +31,6 @@
#include "assertions.h" #include "assertions.h"
#include <math.h> #include <math.h>
#include "openair1/PHY/defs_RU.h" #include "openair1/PHY/defs_RU.h"
#include "openair1/CUDA/init_cuda_def.h"
int nr_phy_init_RU(RU_t *ru) { int nr_phy_init_RU(RU_t *ru) {
...@@ -42,7 +41,6 @@ int nr_phy_init_RU(RU_t *ru) { ...@@ -42,7 +41,6 @@ int nr_phy_init_RU(RU_t *ru) {
LOG_I(PHY,"Initializing RU signal buffers (if_south %s) nb_tx %d\n",ru_if_types[ru->if_south],ru->nb_tx); LOG_I(PHY,"Initializing RU signal buffers (if_south %s) nb_tx %d\n",ru_if_types[ru->if_south],ru->nb_tx);
init_cuda(ru->nb_tx, fp->symbols_per_slot, fp->ofdm_symbol_size);
if (ru->if_south <= REMOTE_IF5) { // this means REMOTE_IF5 or LOCAL_RF, so allocate memory for time-domain signals if (ru->if_south <= REMOTE_IF5) { // this means REMOTE_IF5 or LOCAL_RF, so allocate memory for time-domain signals
// Time-domain signals // Time-domain signals
......
...@@ -52,6 +52,7 @@ ...@@ -52,6 +52,7 @@
#include "modulation_eNB.h" #include "modulation_eNB.h"
#include "nr_modulation.h" #include "nr_modulation.h"
#include "common/utils/LOG/vcd_signal_dumper.h" #include "common/utils/LOG/vcd_signal_dumper.h"
#include "CUDA/CUDA_phy_procedure_def.h"
int beam_precoding(int32_t **txdataF, int beam_precoding(int32_t **txdataF,
...@@ -165,9 +166,16 @@ int nr_beam_precoding(int32_t **txdataF, ...@@ -165,9 +166,16 @@ int nr_beam_precoding(int32_t **txdataF,
} }
} }
void (*multadd_cpx_vector_ptr)(int*, int*, int*, short, unsigned int, int);
#ifdef CUDA
multadd_cpx_vector_ptr = CUDA_multadd_cpx_vector;
#else
multadd_cpx_vector_ptr = multadd_cpx_vector;
#endif
for (p=0; p<nb_antenna_ports; p++) { for (p=0; p<nb_antenna_ports; p++) {
if ((frame_parms->L_ssb >> p) & 0x01) { if ((frame_parms->L_ssb >> p) & 0x01) {
multadd_cpx_vector((int16_t*)&txdataF[p][symbol*frame_parms->ofdm_symbol_size], multadd_cpx_vector_ptr((int16_t*)&txdataF[p][symbol*frame_parms->ofdm_symbol_size],
(int16_t*)beam_weights[p][aa], (int16_t*)beam_weights[p][aa],
(int16_t*)&txdataF_BF[aa][symbol*frame_parms->ofdm_symbol_size], (int16_t*)&txdataF_BF[aa][symbol*frame_parms->ofdm_symbol_size],
0, 0,
......
...@@ -77,8 +77,15 @@ void nr_feptx0(RU_t *ru,int tti_tx,int first_symbol, int num_symbols, int aa) { ...@@ -77,8 +77,15 @@ void nr_feptx0(RU_t *ru,int tti_tx,int first_symbol, int num_symbols, int aa) {
LOG_D(PHY,"SFN/SF:RU:TX:%d/%d Generating slot %d (first_symbol %d num_symbols %d)\n",ru->proc.frame_tx, ru->proc.tti_tx,slot,first_symbol,num_symbols); LOG_D(PHY,"SFN/SF:RU:TX:%d/%d Generating slot %d (first_symbol %d num_symbols %d)\n",ru->proc.frame_tx, ru->proc.tti_tx,slot,first_symbol,num_symbols);
void (*PHY_ofdm_mod_ptr)(int*, int*, int, unsigned char, unsigned short, Extension_t);
#ifdef CUDA
PHY_ofdm_mod_ptr = CUDA_PHY_ofdm_mod;
#else
PHY_ofdm_mod_ptr = PHY_ofdm_mod;
#endif
if (fp->Ncp == 1) { if (fp->Ncp == 1) {
PHY_ofdm_mod(&ru->common.txdataF_BF[aa][slot_offsetF], PHY_ofdm_mod_ptr(&ru->common.txdataF_BF[aa][slot_offsetF],
(int*)&ru->common.txdata[aa][slot_offset], (int*)&ru->common.txdata[aa][slot_offset],
fp->ofdm_symbol_size, fp->ofdm_symbol_size,
num_symbols, num_symbols,
...@@ -87,13 +94,13 @@ void nr_feptx0(RU_t *ru,int tti_tx,int first_symbol, int num_symbols, int aa) { ...@@ -87,13 +94,13 @@ void nr_feptx0(RU_t *ru,int tti_tx,int first_symbol, int num_symbols, int aa) {
} }
else { else {
if (first_symbol==0) { if (first_symbol==0) {
PHY_ofdm_mod(&ru->common.txdataF_BF[aa][slot_offsetF], PHY_ofdm_mod_ptr(&ru->common.txdataF_BF[aa][slot_offsetF],
(int*)&ru->common.txdata[aa][slot_offset], (int*)&ru->common.txdata[aa][slot_offset],
fp->ofdm_symbol_size, fp->ofdm_symbol_size,
1, 1,
fp->nb_prefix_samples0, fp->nb_prefix_samples0,
CYCLIC_PREFIX); CYCLIC_PREFIX);
PHY_ofdm_mod(&ru->common.txdataF_BF[aa][slot_offsetF+fp->ofdm_symbol_size], PHY_ofdm_mod_ptr(&ru->common.txdataF_BF[aa][slot_offsetF+fp->ofdm_symbol_size],
(int*)&ru->common.txdata[aa][slot_offset+fp->nb_prefix_samples0+fp->ofdm_symbol_size], (int*)&ru->common.txdata[aa][slot_offset+fp->nb_prefix_samples0+fp->ofdm_symbol_size],
fp->ofdm_symbol_size, fp->ofdm_symbol_size,
num_symbols-1, num_symbols-1,
...@@ -101,7 +108,7 @@ void nr_feptx0(RU_t *ru,int tti_tx,int first_symbol, int num_symbols, int aa) { ...@@ -101,7 +108,7 @@ void nr_feptx0(RU_t *ru,int tti_tx,int first_symbol, int num_symbols, int aa) {
CYCLIC_PREFIX); CYCLIC_PREFIX);
} }
else { else {
PHY_ofdm_mod(&ru->common.txdataF_BF[aa][slot_offsetF], PHY_ofdm_mod_ptr(&ru->common.txdataF_BF[aa][slot_offsetF],
(int*)&ru->common.txdata[aa][slot_offset], (int*)&ru->common.txdata[aa][slot_offset],
fp->ofdm_symbol_size, fp->ofdm_symbol_size,
num_symbols, num_symbols,
...@@ -113,40 +120,8 @@ void nr_feptx0(RU_t *ru,int tti_tx,int first_symbol, int num_symbols, int aa) { ...@@ -113,40 +120,8 @@ void nr_feptx0(RU_t *ru,int tti_tx,int first_symbol, int num_symbols, int aa) {
//VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_PHY_PROCEDURES_RU_FEPTX_OFDM+(first_symbol!=0?1:0), 0); //VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_PHY_PROCEDURES_RU_FEPTX_OFDM+(first_symbol!=0?1:0), 0);
} }
void CUDA_prec_ofdm(RU_t *ru,int frame_tx,int tti_tx){
nfapi_nr_config_request_t *cfg = &ru->gNB_list[0]->gNB_config;
if(nr_slot_select(cfg, tti_tx) == SF_UL) return;
int slot = tti_tx;
NR_DL_FRAME_PARMS *fp = ru->nr_frame_parms;
PHY_VARS_gNB *gNB = ru->gNB_list[0];
int nb_antenna_ports = 8;
//data L1 to ru
for(int p=0; p<nb_antenna_ports; ++p){
memcpy((void*)ru->common.txdataF[p], (void*)&gNB->common_vars.txdataF[p],
fp->ofdm_symbol_size*sizeof(int32_t)*fp->symbols_per_slot);
//fake data
for(int j=0; j<fp->ofdm_symbol_size*fp->symbols_per_slot; j++){
((short*)&ru->common.txdataF[p][j])[0] = 1;
((short*)&ru->common.txdataF[p][j])[1] = 1;
}
}
CUDA_beam_precoding((int**)ru->common.txdataF, (int***)ru->beam_weights[0], fp->L_ssb, 3,
fp->ofdm_symbol_size, fp->symbols_per_slot, nb_antenna_ports, ru->nb_tx);
CUDA_ifft_ofdm((int**)ru->common.txdata,
fp->ofdm_symbol_size, fp->symbols_per_slot,
fp->nb_prefix_samples, fp->nb_prefix_samples0, ru->nb_tx,
fp->Ncp, CYCLIC_PREFIX);
}
void nr_feptx_ofdm_2thread(RU_t *ru,int frame_tx,int tti_tx) { void nr_feptx_ofdm_2thread(RU_t *ru,int frame_tx,int tti_tx) {
printf("nr_feptx_ofdm_2thread : frame_tx:%d tti_tx:%d\n", frame_tx, tti_tx);return;
nfapi_nr_config_request_t *cfg = &ru->gNB_list[0]->gNB_config; nfapi_nr_config_request_t *cfg = &ru->gNB_list[0]->gNB_config;
RU_proc_t *proc = &ru->proc; RU_proc_t *proc = &ru->proc;
...@@ -245,7 +220,6 @@ printf("nr_feptx_ofdm_2thread : frame_tx:%d tti_tx:%d\n", frame_tx, tti_tx);retu ...@@ -245,7 +220,6 @@ printf("nr_feptx_ofdm_2thread : frame_tx:%d tti_tx:%d\n", frame_tx, tti_tx);retu
} }
static void *nr_feptx_thread(void *param) { static void *nr_feptx_thread(void *param) {
RU_feptx_t *feptx = (RU_feptx_t *)param; RU_feptx_t *feptx = (RU_feptx_t *)param;
RU_t *ru; RU_t *ru;
int aa, slot, start, l, nb_antenna_ports, ret; int aa, slot, start, l, nb_antenna_ports, ret;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment