Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
O
OpenXG-RAN
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
常顺宇
OpenXG-RAN
Commits
a678c7eb
Commit
a678c7eb
authored
Jun 07, 2021
by
Fang-WANG
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
perf cufft(symbol)
parent
13752beb
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
284 additions
and
1 deletion
+284
-1
hs/CMakeLists.txt
hs/CMakeLists.txt
+16
-0
hs/cuFFT1.cu
hs/cuFFT1.cu
+67
-0
hs/cuFFT2.cu
hs/cuFFT2.cu
+90
-0
hs/cuFFT3.cu
hs/cuFFT3.cu
+110
-0
targets/PROJECTS/GENERIC-NR-5GC/CONF/gnb.sa.band78.fr1.106PRB.usrpb210.conf
...ENERIC-NR-5GC/CONF/gnb.sa.band78.fr1.106PRB.usrpb210.conf
+1
-1
No files found.
hs/CMakeLists.txt
0 → 100644
View file @
a678c7eb
cmake_minimum_required
(
VERSION 2.8
)
project
(
run
)
FIND_PACKAGE
(
CUDA REQUIRED
)
# Pass options to NVCC
# 由于cuda采用NVCC编译而不是gCC编译,因此需要将参数传递给NVCC
set
(
CUDA_NVCC_FLAGS
${
CUDA_NVCC_FLAGS
}
-lcufft
)
# For compilation ...
# Specify target & source files to compile it from
CUDA_ADD_EXECUTABLE
(
run cuFFT1.cu
)
# For linking ...
# Specify target & libraries to link it with
CUDA_ADD_CUFFT_TO_TARGET
(
run
)
# 添加对gdb的支持
# SET(CMAKE_BUILD_TYPE "Debug")
# SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -Wall -g2 -ggdb")
# SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O3 -Wall")
hs/cuFFT1.cu
0 → 100644
View file @
a678c7eb
#include <stdio.h>
#include <cufft.h>
//gdb debug
// void testtest(int16_t *x,int16_t *y,unsigned char scale)
// {
// printf("testtest \n");
// }
#define LEN 2048 //signal sampling points
void
cudft2048
(
int16_t
*
x
,
int16_t
*
y
,
unsigned
char
scale
)
{
// testtest(0,0,0);
// printf("hs222222:\n");
cufftComplex
*
CompData
=
(
cufftComplex
*
)
malloc
(
LEN
*
sizeof
(
cufftComplex
));
//allocate memory for the data in host
cufftComplex
*
CompData1
=
(
cufftComplex
*
)
malloc
(
LEN
*
sizeof
(
cufftComplex
));
for
(
int
i
=
0
;
i
<
LEN
;
i
++
)
{
// printf("%d\n",i);
CompData
[
i
].
x
=
x
[
i
*
2
];
CompData
[
i
].
y
=
x
[
i
*
2
+
1
];
}
cufftComplex
*
d_fftData
;
cudaMalloc
((
void
**
)
&
d_fftData
,
LEN
*
sizeof
(
cufftComplex
));
// allocate memory for the data in device
cudaMemcpy
(
d_fftData
,
CompData
,
LEN
*
sizeof
(
cufftComplex
),
cudaMemcpyHostToDevice
);
// copy data from host to device
cufftHandle
plan
;
// cuda library function handle
cufftPlan1d
(
&
plan
,
LEN
,
CUFFT_C2C
,
1
);
//declaration
cufftExecC2C
(
plan
,
(
cufftComplex
*
)
d_fftData
,
(
cufftComplex
*
)
d_fftData
,
CUFFT_FORWARD
);
//execute
cudaDeviceSynchronize
();
//wait to be done
cudaMemcpy
(
CompData1
,
d_fftData
,
LEN
*
sizeof
(
cufftComplex
),
cudaMemcpyDeviceToHost
);
// copy the result from device to host
for
(
int
i
=
0
;
i
<
LEN
;
i
++
)
{
y
[
i
*
2
]
=
CompData1
[
i
].
x
/
45.2
;
y
[
i
*
2
+
1
]
=
CompData1
[
i
].
y
/
45.2
;
}
// printf("hs1111111111111111:\n");
// for (int i = 0; i < LEN; i++)
// {
// printf("a=%d + %dj\tb=%d + %dj\n", x[i*2],x[i*2+1],y[i*2],y[i*2+1]);
// }
cufftDestroy
(
plan
);
free
(
CompData
);
cudaFree
(
d_fftData
);
}
void
initcudft
()
{
}
int
main
()
{
int16_t
*
a
=
(
int16_t
*
)
malloc
(
LEN
*
sizeof
(
int32_t
));
int
i
;
for
(
i
=
0
;
i
<
LEN
;
i
++
)
{
*
(
a
+
2
*
i
)
=
i
;
*
(
a
+
2
*
i
+
1
)
=
LEN
-
i
;
}
for
(
i
=
0
;
i
<
3
;
i
++
)
{
int32_t
*
b
=
(
int32_t
*
)
malloc
(
LEN
*
sizeof
(
int32_t
));
cudft2048
((
int16_t
*
)
a
,(
int16_t
*
)
b
,
1
);
free
(
b
);
}
}
\ No newline at end of file
hs/cuFFT2.cu
0 → 100644
View file @
a678c7eb
#include <stdio.h>
#include <cufft.h>
#include<cuda_runtime.h>
#define LEN 2048
__global__
void
int_cufftComplex
(
int16_t
*
a
,
cufftComplex
*
b
,
int
length
)
{
int
id
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
if
(
id
>=
length
)
{
return
;
}
b
[
id
].
x
=
a
[
id
*
2
];
b
[
id
].
y
=
a
[
id
*
2
+
1
];
}
__global__
void
cufftComplex_int
(
cufftComplex
*
a
,
int16_t
*
b
,
int
length
)
{
int
id
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
if
(
id
>=
length
)
{
return
;
}
b
[
id
*
2
]
=
a
[
id
].
x
/
45.2
;
b
[
id
*
2
+
1
]
=
a
[
id
].
y
/
45.2
;
}
void
initcudft
()
{
}
void
cudft2048
(
int16_t
*
x
,
int16_t
*
y
,
unsigned
char
scale
)
{
int16_t
*
x1
;
cudaMalloc
((
void
**
)
&
x1
,
LEN
*
sizeof
(
int32_t
));
cudaMemcpy
(
x1
,
x
,
LEN
*
sizeof
(
int32_t
),
cudaMemcpyHostToDevice
);
int
threadNum
=
512
;
int
blockNum
=
4
;
cufftComplex
*
CompData
;
cudaMalloc
((
void
**
)
&
CompData
,
LEN
*
sizeof
(
cufftComplex
));
int_cufftComplex
<<<
blockNum
,
threadNum
>>>
(
x1
,
CompData
,
LEN
);
cufftHandle
plan
;
// cuda library function handle
cufftPlan1d
(
&
plan
,
LEN
,
CUFFT_C2C
,
1
);
//declaration,这句要warm-up
cufftExecC2C
(
plan
,
(
cufftComplex
*
)
CompData
,
(
cufftComplex
*
)
CompData
,
CUFFT_FORWARD
);
//execute
cudaDeviceSynchronize
();
//wait to be done
cufftComplex_int
<<<
blockNum
,
threadNum
>>>
(
CompData
,
x1
,
LEN
);
cudaMemcpy
(
y
,
x1
,
LEN
*
sizeof
(
int32_t
),
cudaMemcpyDeviceToHost
);
// copy the result from device to host
// printf("hs1111111111111111:\n");
// for (int i = 0; i < LEN; i++)
// {
// printf("a=%d + %dj\tb=%d + %dj\n", x[i*2],x[i*2+1],y[i*2],y[i*2+1]);
// }
cufftDestroy
(
plan
);
cudaFree
(
CompData
);
cudaFree
(
x1
);
}
int
load_cuFFT
(
void
)
{
initcudft
();
cudft2048
(
0
,
0
,
1
);
return
0
;
}
int
main
()
{
load_cuFFT
();
int16_t
*
a
=
(
int16_t
*
)
malloc
(
LEN
*
sizeof
(
int32_t
));
int16_t
*
b
=
(
int16_t
*
)
malloc
(
LEN
*
sizeof
(
int32_t
));
int
i
;
for
(
i
=
0
;
i
<
LEN
;
i
++
)
{
*
(
a
+
2
*
i
)
=
i
;
*
(
a
+
2
*
i
+
1
)
=
LEN
-
i
;
}
for
(
i
=
0
;
i
<
3
;
i
++
)
{
cudft2048
((
int16_t
*
)
a
,(
int16_t
*
)
b
,
0
);
}
}
\ No newline at end of file
hs/cuFFT3.cu
0 → 100644
View file @
a678c7eb
#include <stdio.h>
#include <cufft.h>
#include<cuda_runtime.h>
#define LEN 2048
#define SQRT2048_real 45.2876
#define SQRT2048_imag 45.3065
__global__
void
int_cufftComplex
(
int16_t
*
a
,
cufftComplex
*
b
,
int
length
)
{
int
id
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
if
(
id
>=
length
)
{
return
;
}
b
[
id
].
x
=
a
[
id
*
2
];
b
[
id
].
y
=
a
[
id
*
2
+
1
];
}
__global__
void
cufftComplex_int
(
cufftComplex
*
a
,
int16_t
*
b
,
int
length
)
{
int
id
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
if
(
id
>=
length
)
{
return
;
}
b
[
id
*
2
]
=
a
[
id
].
x
/
SQRT2048_real
;
b
[
id
*
2
+
1
]
=
a
[
id
].
y
/
SQRT2048_imag
;
// b[id*2] = a[id].x;
// b[id*2+1] = a[id].y;
}
int16_t
*
x1
;
cufftComplex
*
CompData
;
cufftHandle
plan
;
void
initcudft
()
{
cudaMalloc
((
void
**
)
&
x1
,
LEN
*
sizeof
(
int32_t
));
cudaMalloc
((
void
**
)
&
CompData
,
LEN
*
sizeof
(
cufftComplex
));
cufftPlan1d
(
&
plan
,
LEN
,
CUFFT_C2C
,
1
);
//declaration,这句要warm-up
}
void
destroycudft
()
{
// cudaFree(CompData);
// cudaFree(x1);
// cufftDestroy(plan);
}
void
cudft2048
(
int16_t
*
x
,
int16_t
*
y
,
unsigned
char
scale
)
{
// int16_t *x1;
// cudaMalloc((void**)&x1, LEN * sizeof(int32_t));
cudaMemcpy
(
x1
,
x
,
LEN
*
sizeof
(
int32_t
),
cudaMemcpyHostToDevice
);
int
threadNum
=
512
;
int
blockNum
=
4
;
// cufftComplex *CompData;
// cudaMalloc((void**)&CompData, LEN * sizeof(cufftComplex));
int_cufftComplex
<<<
blockNum
,
threadNum
>>>
(
x1
,
CompData
,
LEN
);
// int_cufftComplex<<<1, 8>>>(x1, CompData, LEN);
// cufftHandle plan;// cuda library function handle
// cufftPlan1d(&plan, LEN, CUFFT_C2C, 1);//declaration,这句要warm-up
cufftExecC2C
(
plan
,
(
cufftComplex
*
)
CompData
,
(
cufftComplex
*
)
CompData
,
CUFFT_FORWARD
);
//execute
cudaDeviceSynchronize
();
//wait to be done
cufftComplex_int
<<<
blockNum
,
threadNum
>>>
(
CompData
,
x1
,
LEN
);
// cufftComplex_int<<<1, 8>>>(CompData, x1, LEN);
cudaMemcpy
(
y
,
x1
,
LEN
*
sizeof
(
int32_t
),
cudaMemcpyDeviceToHost
);
// copy the result from device to host
// printf("hs1111111111111111:\n");
// cufftDestroy(plan);
// cudaFree(CompData);
// cudaFree(x1);
}
void
load_cuFFT
(
void
)
{
initcudft
();
int16_t
*
a
=
(
int16_t
*
)
malloc
(
LEN
*
sizeof
(
int32_t
));
int16_t
*
b
=
(
int16_t
*
)
malloc
(
LEN
*
sizeof
(
int32_t
));
cudft2048
(
a
,
b
,
1
);
}
int
main
()
{
load_cuFFT
();
int16_t
*
a
=
(
int16_t
*
)
malloc
(
LEN
*
sizeof
(
int32_t
));
int16_t
*
b
=
(
int16_t
*
)
malloc
(
LEN
*
sizeof
(
int32_t
));
for
(
int
i
=
0
;
i
<
LEN
;
i
++
)
{
*
(
a
+
2
*
i
)
=
i
;
*
(
a
+
2
*
i
+
1
)
=
LEN
-
i
;
}
for
(
int
i
=
0
;
i
<
3
;
i
++
)
{
cudft2048
((
int16_t
*
)
a
,(
int16_t
*
)
b
,
0
);
printf
(
"hs1111111111111111:
\n
"
);
for
(
int
j
=
0
;
j
<
LEN
;
j
++
)
{
printf
(
"a=%d + %dj
\t
b=%d + %dj
\n
"
,
a
[
j
*
2
],
a
[
j
*
2
+
1
],
b
[
j
*
2
],
b
[
j
*
2
+
1
]);
}
}
destroycudft
();
}
\ No newline at end of file
targets/PROJECTS/GENERIC-NR-5GC/CONF/gnb.sa.band78.fr1.106PRB.usrpb210.conf
View file @
a678c7eb
...
...
@@ -262,7 +262,7 @@ THREAD_STRUCT = (
#three config for level of parallelism "PARALLEL_SINGLE_THREAD", "PARALLEL_RU_L1_SPLIT", or "PARALLEL_RU_L1_TRX_SPLIT"
parallel_config
=
"PARALLEL_SINGLE_THREAD"
;
#two option for worker "WORKER_DISABLE" or "WORKER_ENABLE"
worker_config
=
"WORKER_
EN
ABLE"
;
worker_config
=
"WORKER_
DIS
ABLE"
;
}
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment