Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
O
OpenXG-RAN
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
canghaiwuhen
OpenXG-RAN
Commits
b76e487d
Commit
b76e487d
authored
Nov 15, 2019
by
tyhsu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
v1. move cpu function to gpu
parent
fdc78bfb
Changes
13
Show whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
155 additions
and
317 deletions
+155
-317
cmake_targets/CMakeLists.txt
cmake_targets/CMakeLists.txt
+0
-2
executables/nr-ru.c
executables/nr-ru.c
+2
-4
openair1/CUDA/CUDA_phy_procedure.cu
openair1/CUDA/CUDA_phy_procedure.cu
+107
-138
openair1/CUDA/CUDA_phy_procedure_def.h
openair1/CUDA/CUDA_phy_procedure_def.h
+4
-7
openair1/CUDA/checkError.h
openair1/CUDA/checkError.h
+10
-4
openair1/CUDA/cuda_struct.h
openair1/CUDA/cuda_struct.h
+0
-33
openair1/CUDA/init_cuda.cu
openair1/CUDA/init_cuda.cu
+0
-76
openair1/CUDA/init_cuda_def.h
openair1/CUDA/init_cuda_def.h
+6
-10
openair1/CUDA/struct.h
openair1/CUDA/struct.h
+6
-2
openair1/PHY/INIT/nr_init.c
openair1/PHY/INIT/nr_init.c
+0
-1
openair1/PHY/INIT/nr_init_ru.c
openair1/PHY/INIT/nr_init_ru.c
+0
-2
openair1/PHY/MODULATION/beamforming.c
openair1/PHY/MODULATION/beamforming.c
+9
-1
openair1/SCHED_NR/nr_ru_procedures.c
openair1/SCHED_NR/nr_ru_procedures.c
+11
-37
No files found.
cmake_targets/CMakeLists.txt
View file @
b76e487d
...
@@ -52,9 +52,7 @@ set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -I${OPENAIR1_DIR}/ ")
...
@@ -52,9 +52,7 @@ set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -I${OPENAIR1_DIR}/ ")
### ADD CUDA LIBRARY
### ADD CUDA LIBRARY
CUDA_ADD_LIBRARY
(
PHY_CU
${
OPENAIR1_DIR
}
/CUDA/CUDA_phy_procedure.cu
CUDA_ADD_LIBRARY
(
PHY_CU
${
OPENAIR1_DIR
}
/CUDA/CUDA_phy_procedure.cu
${
OPENAIR1_DIR
}
/CUDA/init_cuda.cu
${
OPENAIR1_DIR
}
/CUDA/struct.h
${
OPENAIR1_DIR
}
/CUDA/struct.h
${
OPENAIR1_DIR
}
/CUDA/cuda_struct.h
${
OPENAIR1_DIR
}
/CUDA/checkError.h
${
OPENAIR1_DIR
}
/CUDA/checkError.h
)
)
...
...
executables/nr-ru.c
View file @
b76e487d
...
@@ -2033,10 +2033,8 @@ void set_function_spec_param(RU_t *ru) {
...
@@ -2033,10 +2033,8 @@ void set_function_spec_param(RU_t *ru) {
}
else
if
(
ru
->
function
==
gNodeB_3GPP
)
{
}
else
if
(
ru
->
function
==
gNodeB_3GPP
)
{
ru
->
do_prach
=
0
;
// no prach processing in RU
ru
->
do_prach
=
0
;
// no prach processing in RU
ru
->
feprx
=
(
get_thread_worker_conf
()
==
WORKER_ENABLE
)
?
ru_fep_full_2thread
:
fep_full
;
// RX DFTs
ru
->
feprx
=
(
get_thread_worker_conf
()
==
WORKER_ENABLE
)
?
ru_fep_full_2thread
:
fep_full
;
// RX DFTs
//ru->feptx_ofdm = (get_thread_worker_conf() == WORKER_ENABLE) ? nr_feptx_ofdm_2thread : nr_feptx_ofdm; // this is fep with idft and precoding
ru
->
feptx_ofdm
=
(
get_thread_worker_conf
()
==
WORKER_ENABLE
)
?
nr_feptx_ofdm_2thread
:
nr_feptx_ofdm
;
// this is fep with idft and precoding
ru
->
feptx_ofdm
=
CUDA_prec_ofdm
;
// this is fep with idft and precoding
ru
->
feptx_prec
=
nr_feptx_prec
;
// this is fep with idft and precoding
//ru->feptx_prec = nr_feptx_prec; // this is fep with idft and precoding
ru
->
feptx_prec
=
NULL
;
// this is fep with idft and precoding
ru
->
fh_north_in
=
NULL
;
// no incoming fronthaul from north
ru
->
fh_north_in
=
NULL
;
// no incoming fronthaul from north
ru
->
fh_north_out
=
NULL
;
// no outgoing fronthaul to north
ru
->
fh_north_out
=
NULL
;
// no outgoing fronthaul to north
ru
->
nr_start_if
=
NULL
;
// no if interface
ru
->
nr_start_if
=
NULL
;
// no if interface
...
...
openair1/CUDA/CUDA_phy_procedure.cu
View file @
b76e487d
/*
* Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The OpenAirInterface Software Alliance licenses this file to You under
* the OAI Public License, Version 1.1 (the "License"); you may not use this file
* except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.openairinterface.org/?page_id=698
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*-------------------------------------------------------------------------------
* For more information about the OpenAirInterface (OAI) Software Alliance:
* contact@openairinterface.org
*/
/*! \file CUDA_phy_procedure.cu
* \brief Create and Implementation of beamforming and ifft in gpu
* \author TY Hsu, CW Chang
* \date 2018
* \version 0.1
* \company ISIP@NCTU and Eurecom
* \email: tyhsu@cs.nctu.edu.tw, zhang0756107.cs07g@nctu.edu.tw
* \note
* \warning
*/
#include <stdio.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdlib.h>
#include <math.h>
#include <math.h>
...
@@ -37,7 +6,7 @@
...
@@ -37,7 +6,7 @@
#include <cufft.h>
#include <cufft.h>
#include "CUDA/checkError.h"
#include "CUDA/checkError.h"
#include "CUDA/struct.h"
#include "CUDA/struct.h"
#include "CUDA/cuda_struct.h"
__global__
void
cu_intToComplex
(
int
*
input
,
Complex
*
signal
){
__global__
void
cu_intToComplex
(
int
*
input
,
Complex
*
signal
){
int
id
=
blockIdx
.
x
*
1024
+
threadIdx
.
x
;
int
id
=
blockIdx
.
x
*
1024
+
threadIdx
.
x
;
...
@@ -51,141 +20,141 @@ __global__ void cu_ComplexToInt(int *output, Complex *signal){
...
@@ -51,141 +20,141 @@ __global__ void cu_ComplexToInt(int *output, Complex *signal){
((
short
*
)
&
output
[
id
])[
1
]
=
round
(
signal
[
id
].
y
);
((
short
*
)
&
output
[
id
])[
1
]
=
round
(
signal
[
id
].
y
);
}
}
__global__
void
cu_CP_fft_resultin
(
unsigned
char
nb_prefix_samples
,
int
*
input
,
int
*
output
,
int
fftsize
,
int
nb_symbols
){
extern
"C"
void
CUDA_PHY_ofdm_mod
(
int
*
input
,
int
id
=
blockIdx
.
x
*
1024
+
threadIdx
.
x
;
int
*
output
,
int
elementId
=
id
%
fftsize
;
int
symbolId
=
id
/
fftsize
;
int
slotId
=
symbolId
/
nb_symbols
;
int
symbIdinSlot
=
symbolId
%
nb_symbols
;
int
slotElmtNum
=
fftsize
*
(
nb_symbols
+
1
);
int
CPElmtNum
=
fftsize
+
nb_prefix_samples
;
int
offset
=
slotId
*
slotElmtNum
+
symbIdinSlot
*
CPElmtNum
;
output
[
offset
+
nb_prefix_samples
+
elementId
]
=
input
[
id
];
if
(
elementId
>=
fftsize
-
nb_prefix_samples
){
output
[
offset
+
(
fftsize
-
nb_prefix_samples
)]
=
input
[
id
];
}
}
__global__
void
cu_CP0_fft_resultin
(
unsigned
char
nb_prefix_samples0
,
unsigned
char
nb_prefix_samples
,
int
*
input
,
int
*
output
,
int
fftsize
,
int
nb_symbols
){
int
id
=
blockIdx
.
x
*
1024
+
threadIdx
.
x
;
int
elementId
=
id
%
fftsize
;
int
symbolId
=
id
/
fftsize
;
int
slotId
=
symbolId
/
nb_symbols
;
int
symbIdinSlot
=
symbolId
%
nb_symbols
;
int
slotElmtNum
=
fftsize
*
(
nb_symbols
+
1
);
int
CP0ElmtNum
=
fftsize
+
nb_prefix_samples0
;
int
CPElmtNum
=
fftsize
+
nb_prefix_samples
;
if
(
symbIdinSlot
==
0
){
int
offset
=
slotId
*
slotElmtNum
;
output
[
offset
+
nb_prefix_samples0
+
elementId
]
=
input
[
id
];
if
(
elementId
>=
fftsize
-
nb_prefix_samples0
){
output
[
offset
+
(
fftsize
-
nb_prefix_samples0
)]
=
input
[
id
];
}
}
else
{
int
offset
=
slotId
*
slotElmtNum
+
CP0ElmtNum
+
(
symbIdinSlot
-
1
)
*
CPElmtNum
;
output
[
offset
+
nb_prefix_samples
+
elementId
]
=
input
[
id
];
if
(
elementId
>=
fftsize
-
nb_prefix_samples
){
output
[
offset
+
(
fftsize
-
nb_prefix_samples
)]
=
input
[
id
];
}
}
}
extern
"C"
void
CUDA_ifft_ofdm
(
int
**
output
,
int
fftsize
,
int
fftsize
,
unsigned
char
nb_symbols
,
unsigned
char
nb_symbols
,
unsigned
char
nb_prefix_samples
,
unsigned
short
nb_prefix_samples
,
unsigned
char
nb_prefix_samples0
,
int
nb_tx
,
int
Ncp
,
Extension_t
etype
){
Extension_t
etype
){
//for(int i=0; i<fftsize; i++) printf("%d+%di\n", ((short*)&input[0][i])[0], ((short*)&input[0][i])[1])
;
if
(
nb_symbols
==
0
)
return
;
int
*
d_txdataF_BF
=
cu_ru
.
d_txdataF_BF
;
int
*
d_data_wCP
=
cu_ru
.
d_data_wCP
;
Complex
*
d_signal
=
cu_ru
.
d_signal
;
cufftHandle
plan
=
cu_ru
.
plan
;
/*
Complex
*
d_signal
;
for(int aa=0; aa<nb_tx; aa++){
gpuErrchk
(
cudaMalloc
((
void
**
)
&
d_signal
,
fftsize
*
sizeof
(
Complex
)
*
nb_symbols
)
);
int elementNum = fftsize*nb_symbols;
gpuErrchk( cudaMemcpy(&d_data[aa*elementNum], input[aa], sizeof(int)*elementNum, cudaMemcpyHostToDevice) );
int
*
d_data
;
}*/
gpuErrchk
(
cudaMalloc
((
void
**
)
&
d_data
,
fftsize
*
sizeof
(
int
)
*
nb_symbols
)
);
gpuErrchk
(
cudaMemcpy
(
d_data
,
input
,
fftsize
*
sizeof
(
int
)
*
nb_symbols
,
cudaMemcpyHostToDevice
)
);
int
threadNum
=
1024
;
int
threadNum
=
1024
;
int
blockNum
=
fftsize
*
nb_symbols
*
nb_tx
/
threadNum
;
int
blockNum
=
fftsize
*
nb_symbols
/
threadNum
;
cu_intToComplex
<<<
blockNum
,
threadNum
>>>
(
d_txdataF_BF
,
d_signal
);
cu_intToComplex
<<<
blockNum
,
threadNum
>>>
(
d_data
,
d_signal
);
//CHECK_STATE("cu_intToComplex");
cufftErrchk
(
cufftExecC2C
(
plan
,
(
cufftComplex
*
)
d_signal
,
(
cufftComplex
*
)
d_signal
,
CUFFT_INVERSE
));
cu_ComplexToInt
<<<
blockNum
,
threadNum
>>>
(
d_txdataF_BF
,
d_signal
);
cufftHandle
plan
;
//CHECK_STATE("cu_ComplexToInt");
cufftErrchk
(
cufftPlan1d
(
&
plan
,
fftsize
,
CUFFT_C2C
,
nb_symbols
)
);
cufftErrchk
(
cufftExecC2C
(
plan
,
(
cufftComplex
*
)
d_signal
,
(
cufftComplex
*
)
d_signal
,
CUFFT_FORWARD
)
);
Complex
*
h_output
=
(
Complex
*
)
malloc
(
fftsize
*
sizeof
(
Complex
)
*
nb_symbols
)
;
gpuErrchk
(
cudaMemcpy
(
h_output
,
d_signal
,
fftsize
*
sizeof
(
Complex
)
*
nb_symbols
,
cudaMemcpyDeviceToHost
)
);
//only do cyclic_prefix, suffix/none not finish yet.
cu_ComplexToInt
<<<
blockNum
,
threadNum
>>>
(
d_data
,
d_signal
);
if
(
Ncp
==
1
){
cu_CP_fft_resultin
<<<
blockNum
,
threadNum
>>>
(
nb_prefix_samples
,
d_txdataF_BF
,
d_data_wCP
,
fftsize
,
nb_symbols
);
int
*
res
=
(
int
*
)
malloc
(
fftsize
*
sizeof
(
int
)
*
nb_symbols
);
//CHECK_STATE("cu_CP_fft_resultin");
gpuErrchk
(
cudaMemcpy
(
res
,
d_data
,
fftsize
*
sizeof
(
int
)
*
nb_symbols
,
cudaMemcpyDeviceToHost
)
);
}
else
{
cu_CP0_fft_resultin
<<<
blockNum
,
threadNum
>>>
(
nb_prefix_samples0
,
nb_prefix_samples
,
d_txdataF_BF
,
d_data_wCP
,
fftsize
,
nb_symbols
);
/*
//CHECK_STATE("cu_CP0_fft_resultin");
for(int i=0; i<fftsize*nb_symbols; i++){
printf("res(%d) %d+%di\n", i, ((short*)&res[i])[0], ((short*)&res[i])[1]);
}*/
for
(
int
symb_th
=
0
;
symb_th
<
nb_symbols
;
symb_th
++
){
int
*
output_ptr
;
switch
(
etype
){
case
CYCLIC_PREFIX
:{
output_ptr
=
&
output
[
symb_th
*
fftsize
+
(
1
+
symb_th
)
*
nb_prefix_samples
];
memcpy
(
output_ptr
,
res
,
fftsize
<<
2
);
int
j
=
fftsize
;
for
(
int
k
=-
1
;
k
>=-
nb_prefix_samples
;
k
--
){
output_ptr
[
k
]
=
output_ptr
[
--
j
];
}
break
;
}
}
case
CYCLIC_SUFFIX
:{
output_ptr
=
&
output
[
symb_th
*
fftsize
+
(
symb_th
)
*
nb_prefix_samples
];
memcpy
(
output_ptr
,
res
,
fftsize
<<
2
);
//write back gpu->cpu
for
(
int
k
=
0
;
k
<
nb_prefix_samples
;
k
++
){
for
(
int
aa
=
0
;
aa
<
nb_tx
;
aa
++
){
output_ptr
[
fftsize
+
k
]
=
output_ptr
[
k
];
gpuErrchk
(
cudaMemcpy
(
output
[
aa
],
&
d_data_wCP
[
aa
*
(
nb_symbols
+
1
)
*
fftsize
],
fftsize
*
(
nb_symbols
+
1
)
*
sizeof
(
int
),
cudaMemcpyDeviceToHost
)
);
}
break
;
}
case
ZEROS
:{
break
;
}
case
NONE
:{
output_ptr
=
&
output
[
fftsize
];
memcpy
(
output_ptr
,
res
,
fftsize
<<
2
);
break
;
}
}
cudaDeviceSynchronize
();
default:
{
break
;
}
}
cufftDestroy
(
plan
);
free
(
h_output
);
free
(
res
);
cudaFree
(
d_signal
);
}
}
}
__global__
void
conjMul
(
int
*
d_x1
,
int
*
d_x2
,
int
*
d_y
,
int
aa
,
int
div
,
int
fftsize
,
int
nb_symbols
){
__global__
void
conjMul
(
int
*
d_x1
,
int
*
d_x2
,
int
*
d_y
,
short
zero_flag
,
unsigned
int
div
){
int
id
=
blockIdx
.
x
*
1024
+
threadIdx
.
x
;
int
id
=
blockIdx
.
x
*
1024
+
threadIdx
.
x
;
int
*
x1
=
&
d_x1
[
id
];
int
*
x1
=
&
d_x1
[
id
];
int
*
x2
=
&
d_x2
[
id
%
fftsize
];
int
*
x2
=
&
d_x2
[
id
];
int
*
y
=
&
d_y
[
aa
*
fftsize
*
nb_symbols
+
id
];
int
*
y
=
&
d_y
[
id
];
int
re
,
im
;
int
re
,
im
;
//conj(x1) * x2
re
=
((
short
*
)
x1
)[
0
]
*
((
short
*
)
x2
)[
0
]
+
((
short
*
)
x1
)[
1
]
*
((
short
*
)
x2
)[
1
];
((
short
*
)
x1
)[
1
]
*=
-
1
;
im
=
((
short
*
)
x1
)[
0
]
*
((
short
*
)
x2
)[
1
]
-
((
short
*
)
x1
)[
1
]
*
((
short
*
)
x2
)[
0
];
re
=
((
short
*
)
x1
)[
0
]
*
((
short
*
)
x2
)[
0
]
-
((
short
*
)
x1
)[
1
]
*
((
short
*
)
x2
)[
1
];
im
=
((
short
*
)
x1
)[
1
]
*
((
short
*
)
x2
)[
0
]
+
((
short
*
)
x1
)[
0
]
*
((
short
*
)
x2
)[
1
];
re
=
re
/
div
;
re
=
re
/
div
;
im
=
im
/
div
;
im
=
im
/
div
;
if
(
zero_flag
){
((
short
*
)
y
)[
0
]
=
re
;
((
short
*
)
y
)[
1
]
=
im
;
}
else
{
((
short
*
)
y
)[
0
]
+=
re
;
((
short
*
)
y
)[
0
]
+=
re
;
((
short
*
)
y
)[
1
]
+=
im
;
((
short
*
)
y
)[
1
]
+=
im
;
}
}
}
extern
"C"
void
CUDA_multadd_cpx_vector
(
int
*
x1
,
int
*
x2
,
int
*
y
,
short
zero_flag
,
unsigned
int
N
,
int
output_shift
){
int
*
d_x1
,
*
d_x2
,
*
d_y
;
gpuErrchk
(
cudaMalloc
((
void
**
)
&
d_x1
,
N
*
sizeof
(
int
))
);
gpuErrchk
(
cudaMalloc
((
void
**
)
&
d_x2
,
N
*
sizeof
(
int
))
);
gpuErrchk
(
cudaMalloc
((
void
**
)
&
d_y
,
N
*
sizeof
(
int
))
);
extern
"C"
void
CUDA_beam_precoding
(
int
**
txdataF
,
int
***
weight
,
int
L_ssb
,
int
shift
,
int
fftsize
,
int
nb_symbols
,
int
nb_antenna_ports
,
int
nb_tx
){
gpuErrchk
(
cudaMemcpy
(
d_x1
,
x1
,
N
*
sizeof
(
int
),
cudaMemcpyHostToDevice
)
);
gpuErrchk
(
cudaMemcpy
(
d_x2
,
x2
,
N
*
sizeof
(
int
),
cudaMemcpyHostToDevice
)
);
//initial BF data;
if
(
zero_flag
==
0
){
gpuErrchk
(
cudaMemset
(
cu_ru
.
d_txdataF_BF
,
0
,
fftsize
*
nb_symbols
*
sizeof
(
int
)
*
nb_tx
)
);
gpuErrchk
(
cudaMemcpy
(
d_y
,
y
,
N
*
sizeof
(
int
),
cudaMemcpyHostToDevice
)
);
//move data to gpu
}
else
{
for
(
int
p
=
0
;
p
<
nb_antenna_ports
;
p
++
){
gpuErrchk
(
cudaMemset
(
d_y
,
0
,
N
*
sizeof
(
int
))
);
gpuErrchk
(
cudaMemcpy
(
cu_ru
.
d_txdataF
[
p
],
txdataF
[
p
],
fftsize
*
sizeof
(
int
)
*
nb_symbols
,
cudaMemcpyHostToDevice
)
);
}
}
unsigned
int
div
=
1
;
div
=
div
<<
output_shift
;
int
threadNum
=
1024
;
int
threadNum
=
1024
;
int
blockNum
=
fftsize
*
nb_symbols
/
threadNum
;
int
blockNum
=
N
/
threadNum
;
int
div
=
1
<<
shift
;
conjMul
<<<
blockNum
,
threadNum
>>>
(
d_x1
,
d_x2
,
d_y
,
zero_flag
,
div
);
for
(
int
aa
=
0
;
aa
<
nb_tx
;
aa
++
){
cudaDeviceSynchronize
();
for
(
int
p
=
0
;
p
<
nb_antenna_ports
;
p
++
){
CHECK_STATE
(
"conjMul"
);
if
((
L_ssb
>>
p
)
&
0x01
){
gpuErrchk
(
cudaMemcpy
(
cu_ru
.
d_weight
[
p
][
aa
],
weight
[
p
][
aa
],
fftsize
*
sizeof
(
int
),
cudaMemcpyHostToDevice
)
);
conjMul
<<<
blockNum
,
threadNum
>>>
(
cu_ru
.
d_txdataF
[
p
],
cu_ru
.
d_weight
[
p
][
aa
],
cu_ru
.
d_txdataF_BF
,
aa
,
div
,
fftsize
,
nb_symbols
);
}
}
}
gpuErrchk
(
cudaMemcpy
(
y
,
d_y
,
N
*
sizeof
(
int
),
cudaMemcpyDeviceToHost
)
);
}
cudaFree
(
d_x1
);
cudaFree
(
d_x2
);
cudaFree
(
d_y
);
}
openair1/CUDA/CUDA_phy_procedure_def.h
View file @
b76e487d
#ifndef CUDA
#ifndef CUDA
#define CUDA
#define CUDA
#include "cuda_struct.h"
#if __cplusplus
#if __cplusplus
extern
"C"
{
extern
"C"
{
#endif
#endif
void
CUDA_hello
(
void
);
void
CUDA_hello
(
void
);
void
CUDA_ifft_ofdm
(
int
**
output
,
void
CUDA_PHY_ofdm_mod
(
int
*
input
,
int
*
output
,
int
fftsize
,
int
fftsize
,
unsigned
char
nb_symbols
,
unsigned
char
nb_symbols
,
unsigned
char
nb_prefix_samples
,
unsigned
short
nb_prefix_samples
,
unsigned
char
nb_prefix_samples0
,
int
nb_tx
,
int
Ncp
,
Extension_t
etype
);
Extension_t
etype
);
void
CUDA_
beam_precoding
(
int
**
txdataF
,
int
***
weight
,
int
L_ssb
,
int
shift
,
int
fftsize
,
int
nb_symbols
,
int
nb_antenna_ports
,
int
nb_tx
);
void
CUDA_
multadd_cpx_vector
(
int
*
x1
,
int
*
x2
,
int
*
y
,
short
zero_flag
,
unsigned
int
N
,
int
output_shift
);
#if __cplusplus
#if __cplusplus
}
}
...
...
openair1/CUDA/checkError.h
View file @
b76e487d
#ifndef CHECKERROR_H
__global__
void
gpu_hello
(
void
){
#define CHECKERROR_H
printf
(
"Hello world from GPU!
\n
"
);
}
extern
"C"
void
CUDA_hello
(
void
){
printf
(
"ready to gpu_hello
\n
"
);
gpu_hello
<<<
1
,
1
>>>
();
cudaDeviceSynchronize
();
}
static
const
char
*
_cudaGetErrorEnum
(
cufftResult
error
){
static
const
char
*
_cudaGetErrorEnum
(
cufftResult
error
){
switch
(
error
){
switch
(
error
){
...
@@ -36,7 +43,6 @@ inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=t
...
@@ -36,7 +43,6 @@ inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=t
#define CHECK_STATE(msg) {checkCudaState((msg), __FILE__, __LINE__);}
#define CHECK_STATE(msg) {checkCudaState((msg), __FILE__, __LINE__);}
inline
void
checkCudaState
(
const
char
*
msg
,
const
char
*
file
,
const
int
line
){
inline
void
checkCudaState
(
const
char
*
msg
,
const
char
*
file
,
const
int
line
){
cudaDeviceSynchronize
();
cudaError_t
err
=
cudaGetLastError
();
cudaError_t
err
=
cudaGetLastError
();
if
(
err
!=
cudaSuccess
)
{
if
(
err
!=
cudaSuccess
)
{
fprintf
(
stderr
,
"[%s]gpu error: %s %s %d
\n
"
,
msg
,
cudaGetErrorString
(
err
),
file
,
line
);
fprintf
(
stderr
,
"[%s]gpu error: %s %s %d
\n
"
,
msg
,
cudaGetErrorString
(
err
),
file
,
line
);
...
@@ -49,4 +55,4 @@ inline void checkCudaState(const char *msg, const char *file, const int line){
...
@@ -49,4 +55,4 @@ inline void checkCudaState(const char *msg, const char *file, const int line){
#endif
openair1/CUDA/cuda_struct.h
deleted
100644 → 0
View file @
fdc78bfb
#ifndef CUDA_STRUCT_H
#define CUDA_STRUCT_H
#include <cuda.h>
#include <cuda_runtime.h>
#include <cufft.h>
#if __cplusplus
extern
"C"
{
#endif
typedef
float2
Complex
;
typedef
struct
cuda_cu_ru_t
{
//beamforming precoding
int
**
d_txdataF
;
//14symb-port0, 14symb-port1, ......
int
***
d_weight
;
//[p][tx][symb]
cudaStream_t
*
d_beam_stream
;
//ifft
int
*
d_txdataF_BF
;
//14symb-tx0, 14symb-tx1, ......
Complex
*
d_signal
;
int
*
d_data_wCP
;
cufftHandle
plan
;
}
cuda_cu_ru
;
extern
cuda_cu_ru
cu_ru
;
#if __cplusplus
}
#endif
#endif
openair1/CUDA/init_cuda.cu
deleted
100644 → 0
View file @
fdc78bfb
/*
* Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The OpenAirInterface Software Alliance licenses this file to You under
* the OAI Public License, Version 1.1 (the "License"); you may not use this file
* except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.openairinterface.org/?page_id=698
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*-------------------------------------------------------------------------------
* For more information about the OpenAirInterface (OAI) Software Alliance:
* contact@openairinterface.org
*/
/*! \file init_cuda.cu
* \brief Create and Implementation of beamforming and ifft in gpu
* \author TY Hsu, CW Chang
* \date 2018
* \version 0.1
* \company ISIP@NCTU and Eurecom
* \email: tyhsu@cs.nctu.edu.tw, zhang0756107.cs07g@nctu.edu.tw
* \note
* \warning
*/
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cufft.h>
#include "CUDA/checkError.h"
#include "CUDA/struct.h"
#include "CUDA/cuda_struct.h"
cuda_cu_ru
cu_ru
;
extern
"C"
void
init_cuda
(
int
nb_tx
,
int
nb_symbols
,
int
fftsize
){
printf
(
"init_cuda %d %d %d
\n\n\n
"
,
nb_tx
,
nb_symbols
,
fftsize
);
int
nb_antenna_ports
=
8
;
//beamforming precoding
cu_ru
.
d_txdataF
=
(
int
**
)
malloc
(
sizeof
(
int
*
)
*
nb_antenna_ports
);
for
(
int
p
=
0
;
p
<
nb_antenna_ports
;
p
++
){
gpuErrchk
(
cudaMalloc
((
void
**
)
&
cu_ru
.
d_txdataF
[
p
],
fftsize
*
sizeof
(
int
)
*
nb_symbols
)
);
}
cu_ru
.
d_beam_stream
=
(
cudaStream_t
*
)
malloc
(
sizeof
(
cudaStream_t
)
*
nb_tx
);
for
(
int
aa
=
0
;
aa
<
nb_tx
;
aa
++
){
gpuErrchk
(
cudaStreamCreate
(
&
cu_ru
.
d_beam_stream
[
aa
])
);
}
cu_ru
.
d_weight
=
(
int
***
)
malloc
(
sizeof
(
int
**
)
*
nb_antenna_ports
);
for
(
int
p
=
0
;
p
<
nb_antenna_ports
;
p
++
){
cu_ru
.
d_weight
[
p
]
=
(
int
**
)
malloc
(
sizeof
(
int
*
)
*
nb_tx
);
for
(
int
aa
=
0
;
aa
<
nb_tx
;
aa
++
){
gpuErrchk
(
cudaMalloc
((
void
**
)
&
cu_ru
.
d_weight
[
p
][
aa
],
fftsize
*
sizeof
(
int
))
);
}
}
//ifft
gpuErrchk
(
cudaMalloc
((
void
**
)
&
cu_ru
.
d_txdataF_BF
,
fftsize
*
sizeof
(
int
)
*
nb_symbols
*
nb_tx
)
);
gpuErrchk
(
cudaMalloc
((
void
**
)
&
cu_ru
.
d_signal
,
fftsize
*
sizeof
(
Complex
)
*
nb_symbols
*
nb_tx
)
);
gpuErrchk
(
cudaMalloc
((
void
**
)
&
cu_ru
.
d_data_wCP
,
fftsize
*
(
nb_symbols
+
1
)
*
nb_tx
*
sizeof
(
int
))
);
cufftErrchk
(
cufftPlan1d
(
&
cu_ru
.
plan
,
fftsize
,
CUFFT_C2C
,
nb_symbols
*
nb_tx
)
);
}
openair1/CUDA/init_cuda_def.h
View file @
b76e487d
#ifndef INIT_CUDA_DEF
#ifndef INIT_CUDA_DEF
#define INIT_CUDA_DEF
#define INIT_CUDA_DEF
#include "cuda_struct.h"
#include "struct.h"
#if __cplusplus
extern
"C"
{
#endif
typedef
cuda_ifft_t
{
Complex
*
d_signal
;
Complex
*
d_output
;
int
*
d_data
;
}
cuda_ifft
void
init_cuda
(
int
nb_tx
,
int
nb_symbols
,
int
fftsize
);
#if __cplusplus
}
#endif
#endif
#endif
openair1/CUDA/struct.h
View file @
b76e487d
#ifndef
OAI
_STRUCT
#ifndef
CUDA
_STRUCT
#define
OAI
_STRUCT
#define
CUDA
_STRUCT
#include <cuda.h>
#include <cuda_runtime.h>
typedef
float2
Complex
;
typedef
enum
{
typedef
enum
{
CYCLIC_PREFIX
,
CYCLIC_PREFIX
,
...
...
openair1/PHY/INIT/nr_init.c
View file @
b76e487d
...
@@ -33,7 +33,6 @@
...
@@ -33,7 +33,6 @@
#include "LAYER2/MAC/mac_extern.h"
#include "LAYER2/MAC/mac_extern.h"
#include "assertions.h"
#include "assertions.h"
#include <math.h>
#include <math.h>
#include "openair1/CUDA/init_cuda_def.h"
#include "PHY/NR_TRANSPORT/nr_ulsch.h"
#include "PHY/NR_TRANSPORT/nr_ulsch.h"
#include "PHY/NR_REFSIG/nr_refsig.h"
#include "PHY/NR_REFSIG/nr_refsig.h"
...
...
openair1/PHY/INIT/nr_init_ru.c
View file @
b76e487d
...
@@ -31,7 +31,6 @@
...
@@ -31,7 +31,6 @@
#include "assertions.h"
#include "assertions.h"
#include <math.h>
#include <math.h>
#include "openair1/PHY/defs_RU.h"
#include "openair1/PHY/defs_RU.h"
#include "openair1/CUDA/init_cuda_def.h"
int
nr_phy_init_RU
(
RU_t
*
ru
)
{
int
nr_phy_init_RU
(
RU_t
*
ru
)
{
...
@@ -42,7 +41,6 @@ int nr_phy_init_RU(RU_t *ru) {
...
@@ -42,7 +41,6 @@ int nr_phy_init_RU(RU_t *ru) {
LOG_I
(
PHY
,
"Initializing RU signal buffers (if_south %s) nb_tx %d
\n
"
,
ru_if_types
[
ru
->
if_south
],
ru
->
nb_tx
);
LOG_I
(
PHY
,
"Initializing RU signal buffers (if_south %s) nb_tx %d
\n
"
,
ru_if_types
[
ru
->
if_south
],
ru
->
nb_tx
);
init_cuda
(
ru
->
nb_tx
,
fp
->
symbols_per_slot
,
fp
->
ofdm_symbol_size
);
if
(
ru
->
if_south
<=
REMOTE_IF5
)
{
// this means REMOTE_IF5 or LOCAL_RF, so allocate memory for time-domain signals
if
(
ru
->
if_south
<=
REMOTE_IF5
)
{
// this means REMOTE_IF5 or LOCAL_RF, so allocate memory for time-domain signals
// Time-domain signals
// Time-domain signals
...
...
openair1/PHY/MODULATION/beamforming.c
View file @
b76e487d
...
@@ -52,6 +52,7 @@
...
@@ -52,6 +52,7 @@
#include "modulation_eNB.h"
#include "modulation_eNB.h"
#include "nr_modulation.h"
#include "nr_modulation.h"
#include "common/utils/LOG/vcd_signal_dumper.h"
#include "common/utils/LOG/vcd_signal_dumper.h"
#include "CUDA/CUDA_phy_procedure_def.h"
int
beam_precoding
(
int32_t
**
txdataF
,
int
beam_precoding
(
int32_t
**
txdataF
,
...
@@ -165,9 +166,16 @@ int nr_beam_precoding(int32_t **txdataF,
...
@@ -165,9 +166,16 @@ int nr_beam_precoding(int32_t **txdataF,
}
}
}
}
void
(
*
multadd_cpx_vector_ptr
)(
int
*
,
int
*
,
int
*
,
short
,
unsigned
int
,
int
);
#ifdef CUDA
multadd_cpx_vector_ptr
=
CUDA_multadd_cpx_vector
;
#else
multadd_cpx_vector_ptr
=
multadd_cpx_vector
;
#endif
for
(
p
=
0
;
p
<
nb_antenna_ports
;
p
++
)
{
for
(
p
=
0
;
p
<
nb_antenna_ports
;
p
++
)
{
if
((
frame_parms
->
L_ssb
>>
p
)
&
0x01
)
{
if
((
frame_parms
->
L_ssb
>>
p
)
&
0x01
)
{
multadd_cpx_vector
((
int16_t
*
)
&
txdataF
[
p
][
symbol
*
frame_parms
->
ofdm_symbol_size
],
multadd_cpx_vector
_ptr
((
int16_t
*
)
&
txdataF
[
p
][
symbol
*
frame_parms
->
ofdm_symbol_size
],
(
int16_t
*
)
beam_weights
[
p
][
aa
],
(
int16_t
*
)
beam_weights
[
p
][
aa
],
(
int16_t
*
)
&
txdataF_BF
[
aa
][
symbol
*
frame_parms
->
ofdm_symbol_size
],
(
int16_t
*
)
&
txdataF_BF
[
aa
][
symbol
*
frame_parms
->
ofdm_symbol_size
],
0
,
0
,
...
...
openair1/SCHED_NR/nr_ru_procedures.c
View file @
b76e487d
...
@@ -77,8 +77,15 @@ void nr_feptx0(RU_t *ru,int tti_tx,int first_symbol, int num_symbols, int aa) {
...
@@ -77,8 +77,15 @@ void nr_feptx0(RU_t *ru,int tti_tx,int first_symbol, int num_symbols, int aa) {
LOG_D
(
PHY
,
"SFN/SF:RU:TX:%d/%d Generating slot %d (first_symbol %d num_symbols %d)
\n
"
,
ru
->
proc
.
frame_tx
,
ru
->
proc
.
tti_tx
,
slot
,
first_symbol
,
num_symbols
);
LOG_D
(
PHY
,
"SFN/SF:RU:TX:%d/%d Generating slot %d (first_symbol %d num_symbols %d)
\n
"
,
ru
->
proc
.
frame_tx
,
ru
->
proc
.
tti_tx
,
slot
,
first_symbol
,
num_symbols
);
void
(
*
PHY_ofdm_mod_ptr
)(
int
*
,
int
*
,
int
,
unsigned
char
,
unsigned
short
,
Extension_t
);
#ifdef CUDA
PHY_ofdm_mod_ptr
=
CUDA_PHY_ofdm_mod
;
#else
PHY_ofdm_mod_ptr
=
PHY_ofdm_mod
;
#endif
if
(
fp
->
Ncp
==
1
)
{
if
(
fp
->
Ncp
==
1
)
{
PHY_ofdm_mod
(
&
ru
->
common
.
txdataF_BF
[
aa
][
slot_offsetF
],
PHY_ofdm_mod
_ptr
(
&
ru
->
common
.
txdataF_BF
[
aa
][
slot_offsetF
],
(
int
*
)
&
ru
->
common
.
txdata
[
aa
][
slot_offset
],
(
int
*
)
&
ru
->
common
.
txdata
[
aa
][
slot_offset
],
fp
->
ofdm_symbol_size
,
fp
->
ofdm_symbol_size
,
num_symbols
,
num_symbols
,
...
@@ -87,13 +94,13 @@ void nr_feptx0(RU_t *ru,int tti_tx,int first_symbol, int num_symbols, int aa) {
...
@@ -87,13 +94,13 @@ void nr_feptx0(RU_t *ru,int tti_tx,int first_symbol, int num_symbols, int aa) {
}
}
else
{
else
{
if
(
first_symbol
==
0
)
{
if
(
first_symbol
==
0
)
{
PHY_ofdm_mod
(
&
ru
->
common
.
txdataF_BF
[
aa
][
slot_offsetF
],
PHY_ofdm_mod
_ptr
(
&
ru
->
common
.
txdataF_BF
[
aa
][
slot_offsetF
],
(
int
*
)
&
ru
->
common
.
txdata
[
aa
][
slot_offset
],
(
int
*
)
&
ru
->
common
.
txdata
[
aa
][
slot_offset
],
fp
->
ofdm_symbol_size
,
fp
->
ofdm_symbol_size
,
1
,
1
,
fp
->
nb_prefix_samples0
,
fp
->
nb_prefix_samples0
,
CYCLIC_PREFIX
);
CYCLIC_PREFIX
);
PHY_ofdm_mod
(
&
ru
->
common
.
txdataF_BF
[
aa
][
slot_offsetF
+
fp
->
ofdm_symbol_size
],
PHY_ofdm_mod
_ptr
(
&
ru
->
common
.
txdataF_BF
[
aa
][
slot_offsetF
+
fp
->
ofdm_symbol_size
],
(
int
*
)
&
ru
->
common
.
txdata
[
aa
][
slot_offset
+
fp
->
nb_prefix_samples0
+
fp
->
ofdm_symbol_size
],
(
int
*
)
&
ru
->
common
.
txdata
[
aa
][
slot_offset
+
fp
->
nb_prefix_samples0
+
fp
->
ofdm_symbol_size
],
fp
->
ofdm_symbol_size
,
fp
->
ofdm_symbol_size
,
num_symbols
-
1
,
num_symbols
-
1
,
...
@@ -101,7 +108,7 @@ void nr_feptx0(RU_t *ru,int tti_tx,int first_symbol, int num_symbols, int aa) {
...
@@ -101,7 +108,7 @@ void nr_feptx0(RU_t *ru,int tti_tx,int first_symbol, int num_symbols, int aa) {
CYCLIC_PREFIX
);
CYCLIC_PREFIX
);
}
}
else
{
else
{
PHY_ofdm_mod
(
&
ru
->
common
.
txdataF_BF
[
aa
][
slot_offsetF
],
PHY_ofdm_mod
_ptr
(
&
ru
->
common
.
txdataF_BF
[
aa
][
slot_offsetF
],
(
int
*
)
&
ru
->
common
.
txdata
[
aa
][
slot_offset
],
(
int
*
)
&
ru
->
common
.
txdata
[
aa
][
slot_offset
],
fp
->
ofdm_symbol_size
,
fp
->
ofdm_symbol_size
,
num_symbols
,
num_symbols
,
...
@@ -113,40 +120,8 @@ void nr_feptx0(RU_t *ru,int tti_tx,int first_symbol, int num_symbols, int aa) {
...
@@ -113,40 +120,8 @@ void nr_feptx0(RU_t *ru,int tti_tx,int first_symbol, int num_symbols, int aa) {
//VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_PHY_PROCEDURES_RU_FEPTX_OFDM+(first_symbol!=0?1:0), 0);
//VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_PHY_PROCEDURES_RU_FEPTX_OFDM+(first_symbol!=0?1:0), 0);
}
}
void
CUDA_prec_ofdm
(
RU_t
*
ru
,
int
frame_tx
,
int
tti_tx
){
nfapi_nr_config_request_t
*
cfg
=
&
ru
->
gNB_list
[
0
]
->
gNB_config
;
if
(
nr_slot_select
(
cfg
,
tti_tx
)
==
SF_UL
)
return
;
int
slot
=
tti_tx
;
NR_DL_FRAME_PARMS
*
fp
=
ru
->
nr_frame_parms
;
PHY_VARS_gNB
*
gNB
=
ru
->
gNB_list
[
0
];
int
nb_antenna_ports
=
8
;
//data L1 to ru
for
(
int
p
=
0
;
p
<
nb_antenna_ports
;
++
p
){
memcpy
((
void
*
)
ru
->
common
.
txdataF
[
p
],
(
void
*
)
&
gNB
->
common_vars
.
txdataF
[
p
],
fp
->
ofdm_symbol_size
*
sizeof
(
int32_t
)
*
fp
->
symbols_per_slot
);
//fake data
for
(
int
j
=
0
;
j
<
fp
->
ofdm_symbol_size
*
fp
->
symbols_per_slot
;
j
++
){
((
short
*
)
&
ru
->
common
.
txdataF
[
p
][
j
])[
0
]
=
1
;
((
short
*
)
&
ru
->
common
.
txdataF
[
p
][
j
])[
1
]
=
1
;
}
}
CUDA_beam_precoding
((
int
**
)
ru
->
common
.
txdataF
,
(
int
***
)
ru
->
beam_weights
[
0
],
fp
->
L_ssb
,
3
,
fp
->
ofdm_symbol_size
,
fp
->
symbols_per_slot
,
nb_antenna_ports
,
ru
->
nb_tx
);
CUDA_ifft_ofdm
((
int
**
)
ru
->
common
.
txdata
,
fp
->
ofdm_symbol_size
,
fp
->
symbols_per_slot
,
fp
->
nb_prefix_samples
,
fp
->
nb_prefix_samples0
,
ru
->
nb_tx
,
fp
->
Ncp
,
CYCLIC_PREFIX
);
}
void
nr_feptx_ofdm_2thread
(
RU_t
*
ru
,
int
frame_tx
,
int
tti_tx
)
{
void
nr_feptx_ofdm_2thread
(
RU_t
*
ru
,
int
frame_tx
,
int
tti_tx
)
{
printf
(
"nr_feptx_ofdm_2thread : frame_tx:%d tti_tx:%d
\n
"
,
frame_tx
,
tti_tx
);
return
;
nfapi_nr_config_request_t
*
cfg
=
&
ru
->
gNB_list
[
0
]
->
gNB_config
;
nfapi_nr_config_request_t
*
cfg
=
&
ru
->
gNB_list
[
0
]
->
gNB_config
;
RU_proc_t
*
proc
=
&
ru
->
proc
;
RU_proc_t
*
proc
=
&
ru
->
proc
;
...
@@ -245,7 +220,6 @@ printf("nr_feptx_ofdm_2thread : frame_tx:%d tti_tx:%d\n", frame_tx, tti_tx);retu
...
@@ -245,7 +220,6 @@ printf("nr_feptx_ofdm_2thread : frame_tx:%d tti_tx:%d\n", frame_tx, tti_tx);retu
}
}
static
void
*
nr_feptx_thread
(
void
*
param
)
{
static
void
*
nr_feptx_thread
(
void
*
param
)
{
RU_feptx_t
*
feptx
=
(
RU_feptx_t
*
)
param
;
RU_feptx_t
*
feptx
=
(
RU_feptx_t
*
)
param
;
RU_t
*
ru
;
RU_t
*
ru
;
int
aa
,
slot
,
start
,
l
,
nb_antenna_ports
,
ret
;
int
aa
,
slot
,
start
,
l
,
nb_antenna_ports
,
ret
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment