Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
O
OpenXG-RAN
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
wangjie
OpenXG-RAN
Commits
6a271962
Commit
6a271962
authored
Nov 15, 2019
by
tyhsu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
v2. gpu resource allocate
parent
fdc78bfb
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
99 additions
and
43 deletions
+99
-43
openair1/CUDA/CUDA_phy_procedure.cu
openair1/CUDA/CUDA_phy_procedure.cu
+87
-20
openair1/CUDA/cuda_struct.h
openair1/CUDA/cuda_struct.h
+3
-3
openair1/CUDA/init_cuda.cu
openair1/CUDA/init_cuda.cu
+4
-18
openair1/SCHED_NR/nr_ru_procedures.c
openair1/SCHED_NR/nr_ru_procedures.c
+5
-2
No files found.
openair1/CUDA/CUDA_phy_procedure.cu
View file @
6a271962
...
...
@@ -20,10 +20,10 @@
*/
/*! \file CUDA_phy_procedure.cu
* \brief Create and Implementation of beamforming and ifft in gpu
* \brief Create and Implementation of beamforming and ifft in gpu
(resource allocate)
* \author TY Hsu, CW Chang
* \date 2018
* \version 0.
1
* \version 0.
2
* \company ISIP@NCTU and Eurecom
* \email: tyhsu@cs.nctu.edu.tw, zhang0756107.cs07g@nctu.edu.tw
* \note
...
...
@@ -143,48 +143,115 @@ extern "C" void CUDA_ifft_ofdm( int **output,
}
__global__
void
conjMul
(
int
*
d_x1
,
int
*
d_x2
,
int
*
d_y
,
int
aa
,
int
div
,
int
fftsize
,
int
nb_symbols
){
int
id
=
blockIdx
.
x
*
1024
+
threadIdx
.
x
;
int
*
x1
=
&
d_x1
[
id
];
int
*
x2
=
&
d_x2
[
id
%
fftsize
];
int
*
y
=
&
d_y
[
aa
*
fftsize
*
nb_symbols
+
id
];
__device__
inline
void
beamComp
(
int
*
res
,
int
*
x1
,
int
*
x2
){
((
short
*
)
res
)[
0
]
=
((
short
*
)
x1
)[
0
]
*
((
short
*
)
x2
)[
0
]
+
((
short
*
)
x1
)[
1
]
*
((
short
*
)
x2
)[
1
];
((
short
*
)
res
)[
1
]
=
((
short
*
)
x1
)[
0
]
*
((
short
*
)
x2
)[
1
]
-
((
short
*
)
x1
)[
1
]
*
((
short
*
)
x2
)[
0
];
}
int
re
,
im
;
//conj(x1) * x2
re
=
((
short
*
)
x1
)[
0
]
*
((
short
*
)
x2
)[
0
]
+
((
short
*
)
x1
)[
1
]
*
((
short
*
)
x2
)[
1
];
im
=
((
short
*
)
x1
)[
0
]
*
((
short
*
)
x2
)[
1
]
-
((
short
*
)
x1
)[
1
]
*
((
short
*
)
x2
)[
0
];
__global__
void
conjMulAll
(
int
*
txdataF
,
int
*
weight
,
int
*
res
,
int
fftsize
,
int
nb_symbols
,
int
nb_tx
,
int
nb_antenna_ports
){
__shared__
int
x1
[
2048
*
5
];
int
symbSart
=
blockIdx
.
x
*
5
;
int
portId
=
blockIdx
.
y
;
int
id
=
threadIdx
.
x
;
int
id2
=
id
+
1024
;
int
aaSize
=
nb_antenna_ports
*
nb_symbols
*
fftsize
;
int
portSize
=
nb_symbols
*
fftsize
;
re
=
re
/
div
;
im
=
im
/
div
;
int
s1
=
0
;
for
(
int
symbId
=
symbSart
;
symbId
<
(
symbSart
+
5
)
&&
symbId
<
nb_symbols
;
symbId
++
){
x1
[
s1
*
fftsize
+
id
]
=
txdataF
[
symbId
*
fftsize
+
id
];
x1
[
s1
*
fftsize
+
id2
]
=
txdataF
[
symbId
*
fftsize
+
id2
];
s1
++
;
}
for
(
int
aa
=
0
;
aa
<
nb_tx
;
aa
++
){
for
(
int
symbId
=
symbSart
;
symbId
<
(
symbSart
+
5
)
&&
symbId
<
nb_symbols
;
symbId
++
){
int
resId
=
aa
*
aaSize
+
portId
*
portSize
+
symbId
*
fftsize
;
s1
=
symbId
%
5
;
beamComp
(
&
res
[
resId
+
id
],
&
x1
[
s1
*
fftsize
+
id
],
&
weight
[
portId
*
(
nb_tx
*
fftsize
)
+
aa
*
fftsize
+
id
]);
beamComp
(
&
res
[
resId
+
id2
],
&
x1
[
s1
*
fftsize
+
id2
],
&
weight
[
portId
*
(
nb_tx
*
fftsize
)
+
aa
*
fftsize
+
id2
]);
}
}
((
short
*
)
y
)[
0
]
+=
re
;
((
short
*
)
y
)[
1
]
+=
im
;
}
__device__
inline
void
partAdd
(
int
*
res
,
int
*
x
){
((
short
*
)
res
)[
0
]
+=
((
short
*
)
x
)[
0
];
((
short
*
)
res
)[
1
]
+=
((
short
*
)
x
)[
1
];
}
__global__
void
combine
(
int
*
res
,
int
*
txdataF_BF
,
int
fftsize
,
int
nb_symbols
,
int
nb_tx
,
int
nb_antenna_ports
){
__shared__
int
buf
[
2048
*
5
];
int
symbStart
=
blockIdx
.
x
;
int
txId
=
blockIdx
.
y
;
int
id
=
threadIdx
.
x
;
int
id2
=
id
+
1024
;
int
txSize
=
nb_antenna_ports
*
nb_symbols
*
fftsize
;
int
portSize
=
nb_symbols
*
fftsize
;
int
s1
=
0
;
for
(
int
p
=
0
;
p
<
nb_antenna_ports
;
p
++
){
for
(
int
symbId
=
symbStart
;
symbId
<
symbId
+
5
&&
symbId
<
nb_symbols
;
symbId
++
){
s1
=
symbId
%
5
;
partAdd
(
&
buf
[
s1
+
id
],
&
res
[
txId
*
txSize
+
p
*
portSize
+
symbId
*
fftsize
+
id
]);
partAdd
(
&
buf
[
s1
+
id2
],
&
res
[
txId
*
txSize
+
p
*
portSize
+
symbId
*
fftsize
+
id2
]);
}
}
for
(
int
symbId
=
symbStart
;
symbId
<
symbId
+
5
&&
symbId
<
nb_symbols
;
symbId
++
){
s1
=
symbId
%
5
;
res
[
txId
*
nb_symbols
*
fftsize
+
symbId
*
fftsize
+
id
]
=
buf
[
s1
*
fftsize
+
id
];
res
[
txId
*
nb_symbols
*
fftsize
+
symbId
*
fftsize
+
id2
]
=
buf
[
s1
*
fftsize
+
id2
];
}
}
extern
"C"
void
CUDA_beam_precoding
(
int
**
txdataF
,
int
***
weight
,
int
L_ssb
,
int
shift
,
int
fftsize
,
int
nb_symbols
,
int
nb_antenna_ports
,
int
nb_tx
){
cudaEvent_t
start
,
stop
;
float
time
;
cudaEventCreate
(
&
start
);
cudaEventCreate
(
&
stop
);
//initial BF data;
gpuErrchk
(
cudaMemset
(
cu_ru
.
d_txdataF_BF
,
0
,
fftsize
*
nb_symbols
*
sizeof
(
int
)
*
nb_tx
)
);
//move data to gpu
int
slotsize
=
fftsize
*
nb_symbols
;
for
(
int
p
=
0
;
p
<
nb_antenna_ports
;
p
++
){
gpuErrchk
(
cudaMemcpy
(
cu_ru
.
d_txdataF
[
p
],
txdataF
[
p
],
fftsize
*
sizeof
(
int
)
*
nb_symbols
,
cudaMemcpyHostToDevice
)
);
gpuErrchk
(
cudaMemcpy
(
&
cu_ru
.
d_txdataF
[
p
*
slotsize
],
txdataF
[
p
],
slotsize
*
sizeof
(
int
)
,
cudaMemcpyHostToDevice
)
);
}
cudaEventRecord
(
start
);
int
threadNum
=
1024
;
int
blockNum
=
fftsize
*
nb_symbols
/
threadNum
;
int
div
=
1
<<
shift
;
for
(
int
aa
=
0
;
aa
<
nb_tx
;
aa
++
){
for
(
int
p
=
0
;
p
<
nb_antenna_ports
;
p
++
){
if
((
L_ssb
>>
p
)
&
0x01
){
gpuErrchk
(
cudaMemcpy
(
cu_ru
.
d_weight
[
p
][
aa
],
weight
[
p
][
aa
],
fftsize
*
sizeof
(
int
),
cudaMemcpyHostToDevice
)
);
conjMul
<<<
blockNum
,
threadNum
>>>
(
cu_ru
.
d_txdataF
[
p
],
cu_ru
.
d_weight
[
p
][
aa
],
cu_ru
.
d_txdataF_BF
,
aa
,
div
,
fftsize
,
nb_symbols
);
gpuErrchk
(
cudaMemcpy
(
&
cu_ru
.
d_weight
[
p
*
(
nb_tx
*
fftsize
)
+
aa
*
fftsize
],
weight
[
p
][
aa
],
fftsize
*
sizeof
(
int
),
cudaMemcpyHostToDevice
)
);
}
}
}
cudaEventRecord
(
stop
);
cudaEventSynchronize
(
stop
);
cudaEventElapsedTime
(
&
time
,
start
,
stop
);
printf
(
"HTD: %f
\n
"
,
time
*
1000.0
);
cudaEventRecord
(
start
);
dim3
block
(
3
,
8
);
dim3
thread
(
1024
);
conjMulAll
<<<
block
,
thread
>>>
(
cu_ru
.
d_txdataF
,
cu_ru
.
d_weight
,
cu_ru
.
d_res
,
fftsize
,
nb_symbols
,
nb_tx
,
nb_antenna_ports
);
combine
<<<
block
,
thread
>>>
(
cu_ru
.
d_res
,
cu_ru
.
d_txdataF_BF
,
fftsize
,
nb_symbols
,
nb_tx
,
nb_antenna_ports
);
cudaEventRecord
(
stop
);
cudaEventSynchronize
(
stop
);
cudaEventElapsedTime
(
&
time
,
start
,
stop
);
printf
(
"conjMul+comb: %f
\n
"
,
time
*
1000.0
);
}
...
...
openair1/CUDA/cuda_struct.h
View file @
6a271962
...
...
@@ -13,9 +13,9 @@ typedef float2 Complex;
typedef
struct
cuda_cu_ru_t
{
//beamforming precoding
int
*
*
d_txdataF
;
//14symb-port0, 14symb-port1, ......
int
*
**
d_weight
;
//[p][tx][symb
]
cudaStream_t
*
d_beam_stream
;
int
*
d_txdataF
;
//14symb-port0, 14symb-port1, ......
int
*
d_weight
;
//[p * tx * fftsize
]
int
*
d_res
;
//ifft
int
*
d_txdataF_BF
;
//14symb-tx0, 14symb-tx1, ......
...
...
openair1/CUDA/init_cuda.cu
View file @
6a271962
...
...
@@ -23,7 +23,7 @@
* \brief Create and Implementation of beamforming and ifft in gpu
* \author TY Hsu, CW Chang
* \date 2018
* \version 0.
1
* \version 0.
2
* \company ISIP@NCTU and Eurecom
* \email: tyhsu@cs.nctu.edu.tw, zhang0756107.cs07g@nctu.edu.tw
* \note
...
...
@@ -47,23 +47,9 @@ extern "C" void init_cuda(int nb_tx, int nb_symbols, int fftsize){
int
nb_antenna_ports
=
8
;
//beamforming precoding
cu_ru
.
d_txdataF
=
(
int
**
)
malloc
(
sizeof
(
int
*
)
*
nb_antenna_ports
);
for
(
int
p
=
0
;
p
<
nb_antenna_ports
;
p
++
){
gpuErrchk
(
cudaMalloc
((
void
**
)
&
cu_ru
.
d_txdataF
[
p
],
fftsize
*
sizeof
(
int
)
*
nb_symbols
)
);
}
cu_ru
.
d_beam_stream
=
(
cudaStream_t
*
)
malloc
(
sizeof
(
cudaStream_t
)
*
nb_tx
);
for
(
int
aa
=
0
;
aa
<
nb_tx
;
aa
++
){
gpuErrchk
(
cudaStreamCreate
(
&
cu_ru
.
d_beam_stream
[
aa
])
);
}
cu_ru
.
d_weight
=
(
int
***
)
malloc
(
sizeof
(
int
**
)
*
nb_antenna_ports
);
for
(
int
p
=
0
;
p
<
nb_antenna_ports
;
p
++
){
cu_ru
.
d_weight
[
p
]
=
(
int
**
)
malloc
(
sizeof
(
int
*
)
*
nb_tx
);
for
(
int
aa
=
0
;
aa
<
nb_tx
;
aa
++
){
gpuErrchk
(
cudaMalloc
((
void
**
)
&
cu_ru
.
d_weight
[
p
][
aa
],
fftsize
*
sizeof
(
int
))
);
}
}
gpuErrchk
(
cudaMalloc
((
void
**
)
&
cu_ru
.
d_txdataF
,
sizeof
(
int
)
*
nb_tx
*
nb_antenna_ports
*
nb_symbols
*
fftsize
)
);
gpuErrchk
(
cudaMalloc
((
void
**
)
&
cu_ru
.
d_weight
,
sizeof
(
int
)
*
nb_tx
*
nb_antenna_ports
*
fftsize
)
);
gpuErrchk
(
cudaMalloc
((
void
**
)
&
cu_ru
.
d_res
,
sizeof
(
int
)
*
nb_tx
*
nb_antenna_ports
*
fftsize
*
nb_symbols
)
);
//ifft
gpuErrchk
(
cudaMalloc
((
void
**
)
&
cu_ru
.
d_txdataF_BF
,
fftsize
*
sizeof
(
int
)
*
nb_symbols
*
nb_tx
)
);
...
...
openair1/SCHED_NR/nr_ru_procedures.c
View file @
6a271962
...
...
@@ -132,7 +132,8 @@ void CUDA_prec_ofdm(RU_t *ru,int frame_tx,int tti_tx){
((
short
*
)
&
ru
->
common
.
txdataF
[
p
][
j
])[
1
]
=
1
;
}
}
clock_t
start
,
end
;
start
=
clock
();
CUDA_beam_precoding
((
int
**
)
ru
->
common
.
txdataF
,
(
int
***
)
ru
->
beam_weights
[
0
],
fp
->
L_ssb
,
3
,
fp
->
ofdm_symbol_size
,
fp
->
symbols_per_slot
,
nb_antenna_ports
,
ru
->
nb_tx
);
...
...
@@ -140,7 +141,9 @@ void CUDA_prec_ofdm(RU_t *ru,int frame_tx,int tti_tx){
fp
->
ofdm_symbol_size
,
fp
->
symbols_per_slot
,
fp
->
nb_prefix_samples
,
fp
->
nb_prefix_samples0
,
ru
->
nb_tx
,
fp
->
Ncp
,
CYCLIC_PREFIX
);
end
=
clock
();
double
time
=
((
double
)(
end
-
start
))
/
CLOCKS_PER_SEC
;
printf
(
"CUDA_prec_ofdm ------------------------- >%lf
\n
"
,
time
*
1000000
);
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment