Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
O
OpenXG-RAN
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
lizhongxiao
OpenXG-RAN
Commits
82b32199
Commit
82b32199
authored
Dec 20, 2017
by
ISIP CS/NCTU
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
GPU part uploading
parent
ae0494b0
Changes
11
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
1822 additions
and
0 deletions
+1822
-0
openair1/PHY/CUDA/CUDA_debug.h
openair1/PHY/CUDA/CUDA_debug.h
+3
-0
openair1/PHY/CUDA/CUDA_define.h
openair1/PHY/CUDA/CUDA_define.h
+3
-0
openair1/PHY/CUDA/INIT/defs.h
openair1/PHY/CUDA/INIT/defs.h
+18
-0
openair1/PHY/CUDA/INIT/init_cuda.cu
openair1/PHY/CUDA/INIT/init_cuda.cu
+307
-0
openair1/PHY/CUDA/LTE_TRANSPORT/defs.h
openair1/PHY/CUDA/LTE_TRANSPORT/defs.h
+60
-0
openair1/PHY/CUDA/LTE_TRANSPORT/ulsch_demodulation.cu
openair1/PHY/CUDA/LTE_TRANSPORT/ulsch_demodulation.cu
+1131
-0
openair1/PHY/CUDA/MODULATION/defs.h
openair1/PHY/CUDA/MODULATION/defs.h
+23
-0
openair1/PHY/CUDA/MODULATION/lte_dfts.cu
openair1/PHY/CUDA/MODULATION/lte_dfts.cu
+148
-0
openair1/PHY/CUDA/defs.h
openair1/PHY/CUDA/defs.h
+9
-0
openair1/PHY/CUDA/defs_struct.h
openair1/PHY/CUDA/defs_struct.h
+101
-0
openair1/PHY/CUDA/extern.h
openair1/PHY/CUDA/extern.h
+19
-0
No files found.
openair1/PHY/CUDA/CUDA_debug.h
0 → 100644
View file @
82b32199
#ifndef CUDA_debug
//#define CUDA_debug
#endif
openair1/PHY/CUDA/CUDA_define.h
0 → 100644
View file @
82b32199
#ifndef CUDA
#define CUDA
#endif
\ No newline at end of file
openair1/PHY/CUDA/INIT/defs.h
0 → 100644
View file @
82b32199
#ifndef __INIT__DEFS_CU__H__
#define __INIT__DEFS_CU__H__
#include <stdint.h>
#include <stdio.h>
#include "PHY/impl_defs_lte.h"
#include "PHY/defs.h"
#ifdef __cplusplus
extern
"C"
#endif
void
init_cuda
(
PHY_VARS_eNB
*
phy_vars_eNB
,
LTE_DL_FRAME_PARMS
frame_parms
);
#ifdef __cplusplus
extern
"C"
#endif
void
free_cufft
();
#endif
openair1/PHY/CUDA/INIT/init_cuda.cu
0 → 100644
View file @
82b32199
#include "defs.h"
#include "PHY/CUDA/extern.h"
#include "PHY/LTE_TRANSPORT/extern.h"
#include <stdio.h>
int
device_count
;
dl_cu_t
dl_cu
[
10
];
ul_cu_t
ul_cu
[
10
];
estimation_const_t
esti_const
;
para_ulsch
ulsch_para
[
10
];
ext_rbs
ext_rbs_para
[
10
];
void
init_cufft
(
void
);
void
free_cufft
(
void
);
__global__
void
generate_ul_ref_sigs_rx_cu
(
float2
*
x
,
unsigned
int
Msc_RS
,
unsigned
int
u
,
unsigned
int
v
)
{
unsigned
short
ref_primes
[
33
]
=
{
11
,
23
,
31
,
47
,
50
,
71
,
89
,
107
,
113
,
139
,
179
,
191
,
211
,
239
,
283
,
293
,
317
,
359
,
383
,
431
,
479
,
523
,
571
,
599
,
647
,
719
,
863
,
887
,
953
,
971
,
1069
,
1151
,
1193
};
float
qbar
;
float
phase
;
unsigned
short
dftsizes
[
33
]
=
{
12
,
24
,
36
,
48
,
60
,
72
,
96
,
108
,
120
,
144
,
180
,
192
,
216
,
240
,
288
,
300
,
324
,
360
,
384
,
432
,
480
,
540
,
576
,
600
,
648
,
720
,
864
,
900
,
960
,
972
,
1080
,
1152
,
1200
};
char
ref24
[
720
]
=
{
-
1
,
3
,
1
,
-
3
,
3
,
-
1
,
1
,
3
,
-
3
,
3
,
1
,
3
,
-
3
,
3
,
1
,
1
,
-
1
,
1
,
3
,
-
3
,
3
,
-
3
,
-
1
,
-
3
,
-
3
,
3
,
-
3
,
-
3
,
-
3
,
1
,
-
3
,
-
3
,
3
,
-
1
,
1
,
1
,
1
,
3
,
1
,
-
1
,
3
,
-
3
,
-
3
,
1
,
3
,
1
,
1
,
-
3
,
3
,
-
1
,
3
,
3
,
1
,
1
,
-
3
,
3
,
3
,
3
,
3
,
1
,
-
1
,
3
,
-
1
,
1
,
1
,
-
1
,
-
3
,
-
1
,
-
1
,
1
,
3
,
3
,
-
1
,
-
3
,
1
,
1
,
3
,
-
3
,
1
,
1
,
-
3
,
-
1
,
-
1
,
1
,
3
,
1
,
3
,
1
,
-
1
,
3
,
1
,
1
,
-
3
,
-
1
,
-
3
,
-
1
,
-
1
,
-
1
,
-
1
,
-
3
,
-
3
,
-
1
,
1
,
1
,
3
,
3
,
-
1
,
3
,
-
1
,
1
,
-
1
,
-
3
,
1
,
-
1
,
-
3
,
-
3
,
1
,
-
3
,
-
1
,
-
1
,
-
3
,
1
,
1
,
3
,
-
1
,
1
,
3
,
1
,
-
3
,
1
,
-
3
,
1
,
1
,
-
1
,
-
1
,
3
,
-
1
,
-
3
,
3
,
-
3
,
-
3
,
-
3
,
1
,
1
,
1
,
1
,
-
1
,
-
1
,
3
,
-
3
,
-
3
,
3
,
-
3
,
1
,
-
1
,
-
1
,
1
,
-
1
,
1
,
1
,
-
1
,
-
3
,
-
1
,
1
,
-
1
,
3
,
-
1
,
-
3
,
-
3
,
3
,
3
,
-
1
,
-
1
,
-
3
,
-
1
,
3
,
1
,
3
,
1
,
3
,
1
,
1
,
-
1
,
3
,
1
,
-
1
,
1
,
3
,
-
3
,
-
1
,
-
1
,
1
,
-
3
,
1
,
3
,
-
3
,
1
,
-
1
,
-
3
,
3
,
-
3
,
3
,
-
1
,
-
1
,
-
1
,
-
1
,
1
,
-
3
,
-
3
,
-
3
,
1
,
-
3
,
-
3
,
-
3
,
1
,
-
3
,
1
,
1
,
-
3
,
3
,
3
,
-
1
,
-
3
,
-
1
,
3
,
-
3
,
3
,
3
,
3
,
-
1
,
1
,
1
,
-
3
,
1
,
-
1
,
1
,
1
,
-
3
,
1
,
1
,
-
1
,
1
,
-
3
,
-
3
,
3
,
-
1
,
3
,
-
1
,
-
1
,
-
3
,
-
3
,
-
3
,
-
1
,
-
3
,
-
3
,
1
,
-
1
,
1
,
3
,
3
,
-
1
,
1
,
-
1
,
3
,
1
,
3
,
3
,
-
3
,
-
3
,
1
,
3
,
1
,
-
1
,
-
3
,
-
3
,
-
3
,
3
,
3
,
-
3
,
3
,
3
,
-
1
,
-
3
,
3
,
-
1
,
1
,
-
3
,
1
,
1
,
3
,
3
,
1
,
1
,
1
,
-
1
,
-
1
,
1
,
-
3
,
3
,
-
1
,
1
,
1
,
-
3
,
3
,
3
,
-
1
,
-
3
,
3
,
-
3
,
-
1
,
-
3
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
3
,
-
1
,
3
,
3
,
1
,
-
1
,
1
,
3
,
3
,
3
,
-
1
,
1
,
1
,
-
3
,
1
,
3
,
-
1
,
-
3
,
3
,
-
3
,
-
3
,
3
,
1
,
3
,
1
,
-
3
,
3
,
1
,
3
,
1
,
1
,
3
,
3
,
-
1
,
-
1
,
-
3
,
1
,
-
3
,
-
1
,
3
,
1
,
1
,
3
,
-
1
,
-
1
,
1
,
-
3
,
1
,
3
,
-
3
,
1
,
-
1
,
-
3
,
-
1
,
3
,
1
,
3
,
1
,
-
1
,
-
3
,
-
3
,
-
1
,
-
1
,
-
3
,
-
3
,
-
3
,
-
1
,
-
1
,
-
3
,
3
,
-
1
,
-
1
,
-
1
,
-
1
,
1
,
1
,
-
3
,
3
,
1
,
3
,
3
,
1
,
-
1
,
1
,
-
3
,
1
,
-
3
,
1
,
1
,
-
3
,
-
1
,
1
,
3
,
-
1
,
3
,
3
,
-
1
,
-
3
,
1
,
-
1
,
-
3
,
3
,
3
,
3
,
-
1
,
1
,
1
,
3
,
-
1
,
-
3
,
-
1
,
3
,
-
1
,
-
1
,
-
1
,
1
,
1
,
1
,
1
,
1
,
-
1
,
3
,
-
1
,
-
3
,
1
,
1
,
3
,
-
3
,
1
,
-
3
,
-
1
,
1
,
1
,
-
3
,
-
3
,
3
,
1
,
1
,
-
3
,
1
,
3
,
3
,
1
,
-
1
,
-
3
,
3
,
-
1
,
3
,
3
,
3
,
-
3
,
1
,
-
1
,
1
,
-
1
,
-
3
,
-
1
,
1
,
3
,
-
1
,
3
,
-
3
,
-
3
,
-
1
,
-
3
,
3
,
-
3
,
-
3
,
-
3
,
-
1
,
-
1
,
-
3
,
-
1
,
-
3
,
3
,
1
,
3
,
-
3
,
-
1
,
3
,
-
1
,
1
,
-
1
,
3
,
-
3
,
1
,
-
1
,
-
3
,
-
3
,
1
,
1
,
-
1
,
1
,
-
1
,
1
,
-
1
,
3
,
1
,
-
3
,
-
1
,
1
,
-
1
,
1
,
-
1
,
-
1
,
3
,
3
,
-
3
,
-
1
,
1
,
-
3
,
-
3
,
-
1
,
-
3
,
3
,
1
,
-
1
,
-
3
,
-
1
,
-
3
,
-
3
,
3
,
-
3
,
3
,
-
3
,
-
1
,
1
,
3
,
1
,
-
3
,
1
,
3
,
3
,
-
1
,
-
3
,
-
1
,
-
1
,
-
1
,
-
1
,
3
,
3
,
3
,
1
,
3
,
3
,
-
3
,
1
,
3
,
-
1
,
3
,
-
1
,
3
,
3
,
-
3
,
3
,
1
,
-
1
,
3
,
3
,
1
,
-
1
,
3
,
3
,
-
1
,
-
3
,
3
,
-
3
,
-
1
,
-
1
,
3
,
-
1
,
3
,
-
1
,
-
1
,
1
,
1
,
1
,
1
,
-
1
,
-
1
,
-
3
,
-
1
,
3
,
1
,
-
1
,
1
,
-
1
,
3
,
-
1
,
3
,
1
,
1
,
-
1
,
-
1
,
-
3
,
1
,
1
,
-
3
,
1
,
3
,
-
3
,
1
,
1
,
-
3
,
-
3
,
-
1
,
-
1
,
-
3
,
-
1
,
1
,
3
,
1
,
1
,
-
3
,
-
1
,
-
1
,
-
3
,
3
,
-
3
,
3
,
1
,
-
3
,
3
,
-
3
,
1
,
-
1
,
1
,
-
3
,
1
,
1
,
1
,
-
1
,
-
3
,
3
,
3
,
1
,
1
,
3
,
-
1
,
-
3
,
-
1
,
-
1
,
-
1
,
3
,
1
,
-
3
,
-
3
,
-
1
,
3
,
-
3
,
-
1
,
-
3
,
-
1
,
-
3
,
-
1
,
-
1
,
-
3
,
-
1
,
-
1
,
1
,
-
3
,
-
1
,
-
1
,
1
,
-
1
,
-
3
,
1
,
1
,
-
3
,
1
,
-
3
,
-
3
,
3
,
1
,
1
,
-
1
,
3
,
-
1
,
-
1
,
1
,
1
,
-
1
,
-
1
,
-
3
,
-
1
,
3
,
-
1
,
3
,
-
1
,
1
,
3
,
1
,
-
1
,
3
,
1
,
3
,
-
3
,
-
3
,
1
,
-
1
,
-
1
,
1
,
3
};
char
ref12
[
360
]
=
{
-
1
,
1
,
3
,
-
3
,
3
,
3
,
1
,
1
,
3
,
1
,
-
3
,
3
,
1
,
1
,
3
,
3
,
3
,
-
1
,
1
,
-
3
,
-
3
,
1
,
-
3
,
3
,
1
,
1
,
-
3
,
-
3
,
-
3
,
-
1
,
-
3
,
-
3
,
1
,
-
3
,
1
,
-
1
,
-
1
,
1
,
1
,
1
,
1
,
-
1
,
-
3
,
-
3
,
1
,
-
3
,
3
,
-
1
,
-
1
,
3
,
1
,
-
1
,
1
,
-
1
,
-
3
,
-
1
,
1
,
-
1
,
1
,
3
,
1
,
-
3
,
3
,
-
1
,
-
1
,
1
,
1
,
-
1
,
-
1
,
3
,
-
3
,
1
,
-
1
,
3
,
-
3
,
-
3
,
-
3
,
3
,
1
,
-
1
,
3
,
3
,
-
3
,
1
,
-
3
,
-
1
,
-
1
,
-
1
,
1
,
-
3
,
3
,
-
1
,
1
,
-
3
,
3
,
1
,
1
,
-
3
,
3
,
1
,
-
1
,
-
1
,
-
1
,
1
,
1
,
3
,
-
1
,
1
,
1
,
-
3
,
-
1
,
3
,
3
,
-
1
,
-
3
,
1
,
1
,
1
,
1
,
1
,
-
1
,
3
,
-
1
,
1
,
1
,
-
3
,
-
3
,
-
1
,
-
3
,
-
3
,
3
,
-
1
,
3
,
1
,
-
1
,
-
1
,
3
,
3
,
-
3
,
1
,
3
,
1
,
3
,
3
,
1
,
-
3
,
1
,
1
,
-
3
,
1
,
1
,
1
,
-
3
,
-
3
,
-
3
,
1
,
3
,
3
,
-
3
,
3
,
-
3
,
1
,
1
,
3
,
-
1
,
-
3
,
3
,
3
,
-
3
,
1
,
-
1
,
-
3
,
-
1
,
3
,
1
,
3
,
3
,
3
,
-
1
,
1
,
3
,
-
1
,
1
,
-
3
,
-
1
,
-
1
,
1
,
1
,
3
,
1
,
-
1
,
-
3
,
1
,
3
,
1
,
-
1
,
1
,
3
,
3
,
3
,
-
1
,
-
1
,
3
,
-
1
,
-
3
,
1
,
1
,
3
,
-
3
,
3
,
-
3
,
-
3
,
3
,
1
,
3
,
-
1
,
-
3
,
3
,
1
,
1
,
-
3
,
1
,
-
3
,
-
3
,
-
1
,
-
1
,
1
,
-
3
,
-
1
,
3
,
1
,
3
,
1
,
-
1
,
-
1
,
3
,
-
3
,
-
1
,
-
3
,
-
1
,
-
1
,
-
3
,
1
,
1
,
1
,
1
,
3
,
1
,
-
1
,
1
,
-
3
,
-
1
,
-
1
,
3
,
-
1
,
1
,
-
3
,
-
3
,
-
3
,
-
3
,
-
3
,
1
,
-
1
,
-
3
,
1
,
1
,
-
3
,
-
3
,
-
3
,
-
3
,
-
1
,
3
,
-
3
,
1
,
-
3
,
3
,
1
,
1
,
-
1
,
-
3
,
-
1
,
-
3
,
1
,
-
1
,
1
,
3
,
-
1
,
1
,
1
,
1
,
3
,
1
,
3
,
3
,
-
1
,
1
,
-
1
,
-
3
,
-
3
,
1
,
1
,
-
3
,
3
,
3
,
1
,
3
,
3
,
1
,
-
3
,
-
1
,
-
1
,
3
,
1
,
3
,
-
3
,
-
3
,
3
,
-
3
,
1
,
-
1
,
-
1
,
3
,
-
1
,
-
3
,
-
3
,
-
1
,
-
3
,
-
1
,
-
3
,
3
,
1
,
-
1
,
1
,
3
,
-
3
,
-
3
,
-
1
,
3
,
-
3
,
3
,
-
1
,
3
,
3
,
-
3
,
3
,
3
,
-
1
,
-
1
,
3
,
-
3
,
-
3
,
-
1
,
-
1
,
-
3
,
-
1
,
3
,
-
3
,
3
,
1
,
-
1
};
unsigned
int
q
,
m
,
n
;
if
(
Msc_RS
>=
2
)
{
qbar
=
ref_primes
[
Msc_RS
]
*
(
u
+
1
)
/
(
double
)
31
;
if
((((
int
)
floor
(
2
*
qbar
))
&
1
)
==
0
)
q
=
(
int
)(
floor
(
qbar
+
.5
))
-
v
;
else
q
=
(
int
)(
floor
(
qbar
+
.5
))
+
v
;
for
(
n
=
0
;
n
<
dftsizes
[
Msc_RS
];
n
++
)
{
m
=
n
%
ref_primes
[
Msc_RS
];
phase
=
(
float
)
q
*
m
*
(
m
+
1
)
/
ref_primes
[
Msc_RS
];
x
[
n
].
x
=
cosf
(
M_PI
*
phase
);
x
[
n
].
y
=-
sinf
(
M_PI
*
phase
);
}
}
else
if
(
Msc_RS
==
1
)
{
for
(
n
=
0
;
n
<
dftsizes
[
1
];
n
++
)
{
x
[
n
].
x
=
cosf
(
M_PI
*
((
float
)
ref24
[(
u
*
24
)
+
n
])
/
4
);
x
[
n
].
y
=
sinf
(
M_PI
*
((
float
)
ref24
[(
u
*
24
)
+
n
])
/
4
);
}
}
else
if
(
Msc_RS
==
0
)
{
for
(
n
=
0
;
n
<
dftsizes
[
0
];
n
++
)
{
x
[
n
].
x
=
cosf
(
M_PI
*
ref12
[(
u
*
12
)
+
n
]
/
4
);
x
[
n
].
y
=
sinf
(
M_PI
*
ref12
[(
u
*
12
)
+
n
]
/
4
);
}
}
}
void
init_cuda
(
PHY_VARS_eNB
*
phy_vars_eNB
,
LTE_DL_FRAME_PARMS
frame_parms
)
{
unsigned
short
dftsizes
[
33
]
=
{
12
,
24
,
36
,
48
,
60
,
72
,
96
,
108
,
120
,
144
,
180
,
192
,
216
,
240
,
288
,
300
,
324
,
360
,
384
,
432
,
480
,
540
,
576
,
600
,
648
,
720
,
864
,
900
,
960
,
972
,
1080
,
1152
,
1200
};
int
i
,
j
,
k
;
int
u
,
v
,
Msc_RS
;
cudaGetDeviceCount
(
&
device_count
);
printf
(
"[CUDA] now we have %d device
\n
"
,
device_count
);
LTE_DL_FRAME_PARMS
*
const
frame_parm
=
&
phy_vars_eNB
->
lte_frame_parms
;
LTE_eNB_COMMON
*
const
eNB_common_vars
=
&
phy_vars_eNB
->
lte_eNB_common_vars
;
LTE_eNB_PUSCH
**
const
eNB_pusch_vars
=
phy_vars_eNB
->
lte_eNB_pusch_vars
;
LTE_eNB_SRS
*
const
eNB_srs_vars
=
phy_vars_eNB
->
lte_eNB_srs_vars
;
LTE_eNB_PRACH
*
const
eNB_prach_vars
=
&
phy_vars_eNB
->
lte_eNB_prach_vars
;
for
(
i
=
0
;
i
<
device_count
;
i
++
)
{
cudaDeviceProp
deviceProp
;
cudaGetDeviceProperties
(
&
deviceProp
,
i
);
printf
(
"[CUDA] device number= %d, device name= %s
\n
"
,
i
,
deviceProp
.
name
);
}
for
(
Msc_RS
=
0
;
Msc_RS
<
2
;
Msc_RS
++
)
{
for
(
u
=
0
;
u
<
30
;
u
++
)
{
for
(
v
=
0
;
v
<
1
;
v
++
)
{
cudaMalloc
(
(
void
**
)
&
esti_const
.
d_ul_ref_sigs_rx
[
u
][
v
][
Msc_RS
],
2
*
sizeof
(
float2
)
*
dftsizes
[
Msc_RS
]
);
generate_ul_ref_sigs_rx_cu
<<<
1
,
1
>>>
(
esti_const
.
d_ul_ref_sigs_rx
[
u
][
v
][
Msc_RS
],
Msc_RS
,
u
,
v
);
}
}
}
for
(
Msc_RS
=
2
;
Msc_RS
<
33
;
Msc_RS
++
)
{
for
(
u
=
0
;
u
<
30
;
u
++
)
{
for
(
v
=
0
;
v
<
2
;
v
++
)
{
cudaMalloc
(
(
void
**
)
&
esti_const
.
d_ul_ref_sigs_rx
[
u
][
v
][
Msc_RS
],
2
*
sizeof
(
float2
)
*
dftsizes
[
Msc_RS
]
);
generate_ul_ref_sigs_rx_cu
<<<
1
,
1
>>>
(
esti_const
.
d_ul_ref_sigs_rx
[
u
][
v
][
Msc_RS
],
Msc_RS
,
u
,
v
);
}
}
}
//host mem alloc
/*
int eNB_id, UE_id;
for ( eNB_id = 0; eNB_id < 3; eNB_id++ )
{
printf("Initial host port to device port\n");
printf("Initial RX port\n");
cudaMallocHost((void **) &eNB_common_vars->rxdata_7_5kHz[eNB_id],frame_parm->nb_antennas_rx*sizeof(int*));
cudaMallocHost((void **) &eNB_common_vars->rxdataF[eNB_id], frame_parm->nb_antennas_rx*sizeof(int*));
for ( i = 0; i < frame_parms.nb_antennas_rx; i++ )
{
cudaMallocHost((void **)&eNB_common_vars->rxdata_7_5kHz[eNB_id][i], frame_parm->samples_per_tti*sizeof(int));
cudaMallocHost((void **)&eNB_common_vars->rxdataF[eNB_id][i], 2*sizeof(int)*(frame_parm->ofdm_symbol_size*frame_parm->symbols_per_tti) );
}
printf("Initial TX port\n");
cudaMallocHost((void **)eNB_common_vars->txdataF[eNB_id], frame_parm->nb_antennas_tx*sizeof(int*));
for ( i = 0; i < frame_parms.nb_antennas_rx; i++ )
{
cudaMallocHost((void **)&eNB_common_vars->txdataF[eNB_id][i], 2*(frame_parm->ofdm_symbol_size*frame_parm->symbols_per_tti)*sizeof(int) );
}
}
for ( UE_id = 0; UE_id < NUMBER_OF_UE_MAX; UE_id++ )
{
for ( eNB_id = 0; eNB_id < 3; eNB_id++ )
{
cudaMallocHost((void **) &eNB_pusch_vars[UE_id]->rxdataF_comp[eNB_id], frame_parm->nb_antennas_rx*sizeof(int*));
for ( i = 0; i < frame_parms.nb_antennas_rx; i++ )
{
cudaMallocHost((void **)&eNB_pusch_vars[UE_id]->rxdataF_comp[eNB_id][i], sizeof(int)*frame_parm->N_RB_UL*12*frame_parm->symbols_per_tti );
}
}
}
*/
for
(
i
=
0
;
i
<
10
;
i
++
)
{
ul_cu
[
i
].
CP
=
frame_parms
.
nb_prefix_samples
;
ul_cu
[
i
].
CP0
=
frame_parms
.
nb_prefix_samples0
;
ul_cu
[
i
].
fftsize
=
frame_parms
.
ofdm_symbol_size
;
ul_cu
[
i
].
Ncp
=
frame_parms
.
Ncp
;
ul_cu
[
i
].
symbols_per_tti
=
frame_parms
.
symbols_per_tti
;
ul_cu
[
i
].
samples_per_tti
=
frame_parms
.
samples_per_tti
;
ul_cu
[
i
].
nb_antennas_rx
=
frame_parms
.
nb_antennas_rx
;
ul_cu
[
i
].
N_RB_UL
=
frame_parms
.
N_RB_UL
;
ul_cu
[
i
].
d_rxdata
=
(
int
**
)
malloc
(
frame_parms
.
nb_antennas_rx
*
sizeof
(
int
*
)
);
ul_cu
[
i
].
d_rxdata_fft
=
(
float2
**
)
malloc
(
frame_parms
.
nb_antennas_rx
*
sizeof
(
float2
*
)
);
ul_cu
[
i
].
d_rxdataF
=
(
int
**
)
malloc
(
frame_parms
.
nb_antennas_rx
*
sizeof
(
int
*
)
);
ul_cu
[
i
].
d_rxdata_ext
=
(
float2
**
)
malloc
(
frame_parms
.
nb_antennas_rx
*
sizeof
(
float2
*
)
);
ul_cu
[
i
].
d_rxdata_ext_int
=
(
int
**
)
malloc
(
frame_parms
.
nb_antennas_rx
*
sizeof
(
int
*
)
);
ul_cu
[
i
].
d_rxdata_comp
=
(
float2
**
)
malloc
(
frame_parms
.
nb_antennas_rx
*
sizeof
(
float2
*
)
);
ul_cu
[
i
].
d_rxdata_comp_int
=
(
int
**
)
malloc
(
frame_parms
.
nb_antennas_rx
*
sizeof
(
int
*
)
);
ul_cu
[
i
].
d_drs_ch
=
(
float2
**
)
malloc
(
frame_parms
.
nb_antennas_rx
*
sizeof
(
float2
*
)
);
ul_cu
[
i
].
d_drs_ch_int
=
(
int
**
)
malloc
(
frame_parms
.
nb_antennas_rx
*
sizeof
(
int
*
)
);
ul_cu
[
i
].
d_ulsch_power
=
(
int
**
)
malloc
(
frame_parms
.
nb_antennas_rx
*
sizeof
(
int
*
)
);
for
(
k
=
0
;
k
<
ul_cu
[
i
].
nb_antennas_rx
;
k
++
)
{
if
(
cudaMalloc
((
void
**
)
&
ul_cu
[
i
].
d_rxdata
[
k
]
,
sizeof
(
int
)
*
15
*
512
))
printf
(
"error
\n
"
);
cudaMalloc
((
void
**
)
&
ul_cu
[
i
].
d_rxdata_fft
[
k
]
,
sizeof
(
float2
)
*
ul_cu
[
i
].
symbols_per_tti
*
ul_cu
[
i
].
fftsize
);
cudaMalloc
((
void
**
)
&
ul_cu
[
i
].
d_rxdataF
[
k
]
,
2
*
sizeof
(
int
)
*
ul_cu
[
i
].
symbols_per_tti
*
ul_cu
[
i
].
fftsize
);
cudaMalloc
((
void
**
)
&
ul_cu
[
i
].
d_rxdata_ext
[
k
]
,
sizeof
(
float2
)
*
frame_parms
.
N_RB_UL
*
12
*
frame_parms
.
symbols_per_tti
);
cudaMalloc
((
void
**
)
&
ul_cu
[
i
].
d_rxdata_ext_int
[
k
]
,
sizeof
(
int
)
*
frame_parms
.
N_RB_UL
*
12
*
frame_parms
.
symbols_per_tti
);
cudaMalloc
((
void
**
)
&
ul_cu
[
i
].
d_rxdata_comp
[
k
]
,
sizeof
(
float2
)
*
frame_parms
.
N_RB_UL
*
12
*
frame_parms
.
symbols_per_tti
);
cudaMalloc
((
void
**
)
&
ul_cu
[
i
].
d_rxdata_comp_int
[
k
],
sizeof
(
int
)
*
frame_parms
.
N_RB_UL
*
12
*
frame_parms
.
symbols_per_tti
);
cudaMalloc
((
void
**
)
&
ul_cu
[
i
].
d_drs_ch
[
k
]
,
sizeof
(
float2
)
*
frame_parms
.
N_RB_UL
*
12
*
frame_parms
.
symbols_per_tti
);
cudaMalloc
((
void
**
)
&
ul_cu
[
i
].
d_drs_ch_int
[
k
]
,
sizeof
(
int
)
*
frame_parms
.
N_RB_UL
*
12
*
frame_parms
.
symbols_per_tti
+
1
);
cudaMalloc
((
void
**
)
&
ul_cu
[
i
].
d_ulsch_power
[
k
]
,
sizeof
(
int
)
);
}
dl_cu
[
i
].
CP
=
frame_parms
.
nb_prefix_samples
;
dl_cu
[
i
].
CP0
=
frame_parms
.
nb_prefix_samples0
;
dl_cu
[
i
].
ifftsize
=
frame_parms
.
ofdm_symbol_size
;
dl_cu
[
i
].
Ncp
=
frame_parms
.
Ncp
;
dl_cu
[
i
].
symbols_per_tti
=
frame_parms
.
symbols_per_tti
;
dl_cu
[
i
].
samples_per_tti
=
frame_parms
.
samples_per_tti
;
}
printf
(
"[CUDA] CP0=%d, CP=%d, fftsize=%d, symbols_per_tti=%d, samples_per_tti=%d
\n
"
,
ul_cu
[
i
].
CP0
,
ul_cu
[
i
].
CP
,
frame_parms
.
ofdm_symbol_size
,
frame_parms
.
symbols_per_tti
,
frame_parms
.
samples_per_tti
);
init_cufft
(
);
}
void
init_cufft
(
void
)
{
//initial cufft plan fft128, fft256, fft512, fft1024, fft1536, fft2048
int
i
,
j
;
short
fftsize
=
ul_cu
[
i
].
fftsize
;
short
Ncp
=
ul_cu
[
i
].
Ncp
;
short
symbols_per_tti
=
ul_cu
[
i
].
symbols_per_tti
;
short
samples_per_tti
=
ul_cu
[
i
].
samples_per_tti
;
for
(
i
=
0
;
i
<
10
;
i
++
)
{
//for ul cuda
cudaStreamCreateWithFlags
(
&
(
ul_cu
[
i
].
stream_ul
),
cudaStreamNonBlocking
);
cudaStreamCreateWithFlags
(
&
(
ul_cu
[
i
].
tempstrm
),
cudaStreamNonBlocking
);
cufftPlan1d
(
&
(
ul_cu
[
i
].
fft
)
,
fftsize
,
CUFFT_C2C
,
symbols_per_tti
);
cufftSetStream
(
ul_cu
[
i
].
fft
,
ul_cu
[
i
].
stream_ul
);
cudaStreamCreateWithFlags
(
&
(
ul_cu
[
i
].
timing_advance
),
cudaStreamNonBlocking
);
cufftPlan1d
(
&
(
ul_cu
[
i
].
ifft_timing_advance
)
,
fftsize
,
CUFFT_C2C
,
symbols_per_tti
);
cufftSetStream
(
ul_cu
[
i
].
ifft_timing_advance
,
ul_cu
[
i
].
timing_advance
);
//for dl cuda
cudaStreamCreateWithFlags
(
&
(
dl_cu
[
i
].
stream_dl
),
cudaStreamNonBlocking
);
cufftPlan1d
(
&
(
dl_cu
[
i
].
ifft
)
,
fftsize
,
CUFFT_C2C
,
symbols_per_tti
);
cudaMalloc
((
void
**
)
&
(
dl_cu
[
i
].
d_txdata
)
,
sizeof
(
short
)
*
(
symbols_per_tti
+
1
)
*
2
*
symbols_per_tti
*
fftsize
);
cudaMalloc
((
void
**
)
&
(
dl_cu
[
i
].
d_txdata_o
)
,
sizeof
(
short
)
*
samples_per_tti
*
2
);
cudaMalloc
((
void
**
)
&
(
dl_cu
[
i
].
d_txdata_ifft
),
sizeof
(
float2
)
*
symbols_per_tti
*
fftsize
);
cudaMallocHost
((
void
**
)
&
(
dl_cu
[
i
].
h_txdata
)
,
sizeof
(
short
)
*
symbols_per_tti
*
2
*
fftsize
);
cufftSetStream
(
dl_cu
[
i
].
ifft
,
dl_cu
[
i
].
stream_dl
);
}
for
(
i
=
0
;
i
<
10
;
i
++
)
{
cufftPlan1d
(
&
(
ul_cu
[
i
].
idft
.
fft12
)
,
12
,
CUFFT_C2C
,
14
);
cufftPlan1d
(
&
(
ul_cu
[
i
].
idft
.
fft24
)
,
24
,
CUFFT_C2C
,
14
);
cufftPlan1d
(
&
(
ul_cu
[
i
].
idft
.
fft36
)
,
36
,
CUFFT_C2C
,
14
);
cufftPlan1d
(
&
(
ul_cu
[
i
].
idft
.
fft48
)
,
48
,
CUFFT_C2C
,
14
);
cufftPlan1d
(
&
(
ul_cu
[
i
].
idft
.
fft60
)
,
60
,
CUFFT_C2C
,
14
);
cufftPlan1d
(
&
(
ul_cu
[
i
].
idft
.
fft72
)
,
72
,
CUFFT_C2C
,
14
);
cufftPlan1d
(
&
(
ul_cu
[
i
].
idft
.
fft84
)
,
84
,
CUFFT_C2C
,
14
);
cufftPlan1d
(
&
(
ul_cu
[
i
].
idft
.
fft96
)
,
96
,
CUFFT_C2C
,
14
);
cufftPlan1d
(
&
(
ul_cu
[
i
].
idft
.
fft108
)
,
108
,
CUFFT_C2C
,
14
);
cufftPlan1d
(
&
(
ul_cu
[
i
].
idft
.
fft120
)
,
120
,
CUFFT_C2C
,
14
);
cufftPlan1d
(
&
(
ul_cu
[
i
].
idft
.
fft132
)
,
132
,
CUFFT_C2C
,
14
);
cufftPlan1d
(
&
(
ul_cu
[
i
].
idft
.
fft144
)
,
144
,
CUFFT_C2C
,
14
);
cufftPlan1d
(
&
(
ul_cu
[
i
].
idft
.
fft156
)
,
156
,
CUFFT_C2C
,
14
);
cufftPlan1d
(
&
(
ul_cu
[
i
].
idft
.
fft168
)
,
168
,
CUFFT_C2C
,
14
);
cufftPlan1d
(
&
(
ul_cu
[
i
].
idft
.
fft180
)
,
180
,
CUFFT_C2C
,
14
);
cufftPlan1d
(
&
(
ul_cu
[
i
].
idft
.
fft192
)
,
192
,
CUFFT_C2C
,
14
);
cufftPlan1d
(
&
(
ul_cu
[
i
].
idft
.
fft204
)
,
204
,
CUFFT_C2C
,
14
);
cufftPlan1d
(
&
(
ul_cu
[
i
].
idft
.
fft216
)
,
216
,
CUFFT_C2C
,
14
);
cufftPlan1d
(
&
(
ul_cu
[
i
].
idft
.
fft228
)
,
228
,
CUFFT_C2C
,
14
);
cufftPlan1d
(
&
(
ul_cu
[
i
].
idft
.
fft240
)
,
240
,
CUFFT_C2C
,
14
);
cufftPlan1d
(
&
(
ul_cu
[
i
].
idft
.
fft252
)
,
252
,
CUFFT_C2C
,
14
);
cufftPlan1d
(
&
(
ul_cu
[
i
].
idft
.
fft264
)
,
264
,
CUFFT_C2C
,
14
);
cufftPlan1d
(
&
(
ul_cu
[
i
].
idft
.
fft276
)
,
276
,
CUFFT_C2C
,
14
);
cufftPlan1d
(
&
(
ul_cu
[
i
].
idft
.
fft288
)
,
288
,
CUFFT_C2C
,
14
);
cufftPlan1d
(
&
(
ul_cu
[
i
].
idft
.
fft300
)
,
300
,
CUFFT_C2C
,
14
);
cufftSetStream
(
ul_cu
[
i
].
idft
.
fft12
,
ul_cu
[
i
].
stream_ul
);
cufftSetStream
(
ul_cu
[
i
].
idft
.
fft24
,
ul_cu
[
i
].
stream_ul
);
cufftSetStream
(
ul_cu
[
i
].
idft
.
fft36
,
ul_cu
[
i
].
stream_ul
);
cufftSetStream
(
ul_cu
[
i
].
idft
.
fft48
,
ul_cu
[
i
].
stream_ul
);
cufftSetStream
(
ul_cu
[
i
].
idft
.
fft60
,
ul_cu
[
i
].
stream_ul
);
cufftSetStream
(
ul_cu
[
i
].
idft
.
fft72
,
ul_cu
[
i
].
stream_ul
);
cufftSetStream
(
ul_cu
[
i
].
idft
.
fft84
,
ul_cu
[
i
].
stream_ul
);
cufftSetStream
(
ul_cu
[
i
].
idft
.
fft96
,
ul_cu
[
i
].
stream_ul
);
cufftSetStream
(
ul_cu
[
i
].
idft
.
fft108
,
ul_cu
[
i
].
stream_ul
);
cufftSetStream
(
ul_cu
[
i
].
idft
.
fft120
,
ul_cu
[
i
].
stream_ul
);
cufftSetStream
(
ul_cu
[
i
].
idft
.
fft132
,
ul_cu
[
i
].
stream_ul
);
cufftSetStream
(
ul_cu
[
i
].
idft
.
fft144
,
ul_cu
[
i
].
stream_ul
);
cufftSetStream
(
ul_cu
[
i
].
idft
.
fft156
,
ul_cu
[
i
].
stream_ul
);
cufftSetStream
(
ul_cu
[
i
].
idft
.
fft168
,
ul_cu
[
i
].
stream_ul
);
cufftSetStream
(
ul_cu
[
i
].
idft
.
fft180
,
ul_cu
[
i
].
stream_ul
);
cufftSetStream
(
ul_cu
[
i
].
idft
.
fft192
,
ul_cu
[
i
].
stream_ul
);
cufftSetStream
(
ul_cu
[
i
].
idft
.
fft204
,
ul_cu
[
i
].
stream_ul
);
cufftSetStream
(
ul_cu
[
i
].
idft
.
fft216
,
ul_cu
[
i
].
stream_ul
);
cufftSetStream
(
ul_cu
[
i
].
idft
.
fft228
,
ul_cu
[
i
].
stream_ul
);
cufftSetStream
(
ul_cu
[
i
].
idft
.
fft240
,
ul_cu
[
i
].
stream_ul
);
cufftSetStream
(
ul_cu
[
i
].
idft
.
fft252
,
ul_cu
[
i
].
stream_ul
);
cufftSetStream
(
ul_cu
[
i
].
idft
.
fft264
,
ul_cu
[
i
].
stream_ul
);
cufftSetStream
(
ul_cu
[
i
].
idft
.
fft276
,
ul_cu
[
i
].
stream_ul
);
cufftSetStream
(
ul_cu
[
i
].
idft
.
fft288
,
ul_cu
[
i
].
stream_ul
);
cufftSetStream
(
ul_cu
[
i
].
idft
.
fft300
,
ul_cu
[
i
].
stream_ul
);
}
}
void
free_cufft
(
void
)
{
int
i
,
j
;
for
(
i
=
0
;
i
<
10
;
i
++
)
{
//for ul cuda
cudaFree
(
ul_cu
[
i
].
d_rxdata
);
cudaFree
(
ul_cu
[
i
].
d_rxdata_fft
);
cufftDestroy
(
ul_cu
[
i
].
fft
);
cudaStreamDestroy
(
ul_cu
[
i
].
stream_ul
);
//for dl cuda
cudaFree
(
dl_cu
[
i
].
d_txdata
);
cudaFree
(
dl_cu
[
i
].
d_txdata_o
);
cudaFree
(
dl_cu
[
i
].
d_txdata_ifft
);
cudaFreeHost
(
dl_cu
[
i
].
h_txdata
);
cufftDestroy
(
dl_cu
[
i
].
ifft
);
cudaStreamDestroy
(
dl_cu
[
i
].
stream_dl
);
}
cudaDeviceReset
();
printf
(
"end cuda
\n
"
);
}
openair1/PHY/CUDA/LTE_TRANSPORT/defs.h
0 → 100644
View file @
82b32199
#include <stdint.h>
#include <stdio.h>
#include "assertions.h"
#ifdef __cplusplus
extern
"C"
#endif
void
ulsch_extract_rb_and_compensation_cu
(
unsigned
int
first_rb
,
unsigned
int
nb_rb
,
unsigned
short
first_carrier_offset
,
unsigned
short
number_symbols
,
unsigned
short
sf
);
#ifdef __cplusplus
extern
"C"
#endif
void
ulsch_channel_compensation_cu
(
short
sf
,
short
cyclic_shift
,
int
*
out
,
int
*
out2
,
int
*
u
,
int
*
v
,
int
Msc_RS
,
short
const_shift
);
#ifdef __cplusplus
extern
"C"
#endif
void
ulsch_extract_rb_cu
(
unsigned
int
first_rb
,
unsigned
int
nb_rb
,
unsigned
short
number_symbols
,
unsigned
short
sf
);
#ifdef __cplusplus
extern
"C"
#endif
void
exrb_compen_esti_cu
(
unsigned
int
first_rb
,
unsigned
int
nb_rb
,
unsigned
short
number_symbols
,
unsigned
short
sf
);
#ifdef __cplusplus
extern
"C"
#endif
void
estimation_cu
(
unsigned
int
first_rb
,
unsigned
int
nb_rb
,
unsigned
short
number_symbols
,
unsigned
short
sf
);
#ifdef __cplusplus
extern
"C"
#endif
void
compensation_cu
(
unsigned
int
first_rb
,
unsigned
int
nb_rb
,
unsigned
short
number_symbols
,
short
Qm
,
unsigned
short
sf
);
#ifdef __cplusplus
extern
"C"
#endif
void
idft_cu
(
unsigned
int
first_rb
,
unsigned
int
nb_rb
,
unsigned
short
number_symbols
,
short
cl
,
unsigned
short
sf
);
openair1/PHY/CUDA/LTE_TRANSPORT/ulsch_demodulation.cu
0 → 100644
View file @
82b32199
#include "defs.h"
#include "PHY/CUDA/extern.h"
#ifndef CUFFT_H
#define CUFFT_H
#include "cufft.h"
#endif
#define ccmax(a,b) ((a>b) ? (a) : (b))
#define ccmin(a,b) ((a<b) ? (a) : (b))
__global__
void
k_short_12
(
float2
*
x
,
short
*
y
,
int
*
sig_eng
,
float2
*
rxF
,
unsigned
int
first_rb
);
__global__
void
k_short_24
(
float2
*
x
,
short
*
y
,
int
*
sig_eng
,
float2
*
rxF
,
unsigned
int
first_rb
);
__global__
void
k_short_36
(
float2
*
x
,
short
*
y
,
int
*
sig_eng
,
float2
*
rxF
,
unsigned
int
first_rb
);
__global__
void
k_short_48
(
float2
*
x
,
short
*
y
,
int
*
sig_eng
,
float2
*
rxF
,
unsigned
int
first_rb
);
__global__
void
k_short_60
(
float2
*
x
,
short
*
y
,
int
*
sig_eng
,
float2
*
rxF
,
unsigned
int
first_rb
);
__global__
void
k_short_72
(
float2
*
x
,
short
*
y
,
int
*
sig_eng
,
float2
*
rxF
,
unsigned
int
first_rb
);
__global__
void
k_short_96
(
float2
*
x
,
short
*
y
,
int
*
sig_eng
,
float2
*
rxF
,
unsigned
int
first_rb
);
__global__
void
k_short_108
(
float2
*
x
,
short
*
y
,
int
*
sig_eng
,
float2
*
rxF
,
unsigned
int
first_rb
);
__global__
void
k_short_120
(
float2
*
x
,
short
*
y
,
int
*
sig_eng
,
float2
*
rxF
,
unsigned
int
first_rb
);
__global__
void
k_short_144
(
float2
*
x
,
short
*
y
,
int
*
sig_eng
,
float2
*
rxF
,
unsigned
int
first_rb
);
__global__
void
k_short_180
(
float2
*
x
,
short
*
y
,
int
*
sig_eng
,
float2
*
rxF
,
unsigned
int
first_rb
);
__global__
void
k_short_192
(
float2
*
x
,
short
*
y
,
int
*
sig_eng
,
float2
*
rxF
,
unsigned
int
first_rb
);
__global__
void
k_short_216
(
float2
*
x
,
short
*
y
,
int
*
sig_eng
,
float2
*
rxF
,
unsigned
int
first_rb
);
__global__
void
k_short_240
(
float2
*
x
,
short
*
y
,
int
*
sig_eng
,
float2
*
rxF
,
unsigned
int
first_rb
);
__global__
void
k_short_288
(
float2
*
x
,
short
*
y
,
int
*
sig_eng
,
float2
*
rxF
,
unsigned
int
first_rb
);
__global__
void
k_short_300
(
float2
*
x
,
short
*
y
,
int
*
sig_eng
,
float2
*
rxF
,
unsigned
int
first_rb
);
__global__
void
exrb_compen_esti
(
float2
*
x
,
float2
*
ul_ref1
,
float2
*
ul_ref2
,
float2
*
out
,
int
*
sig_engery
,
const
unsigned
int
first_rb
,
short
cyclic_shift1
,
short
cyclic_shift2
,
short
Msc_RS
)
{
const
int
symbol_id
=
blockIdx
.
x
;
const
int
re_id
=
threadIdx
.
x
;
const
int
tidy
=
threadIdx
.
y
;
float2
rxdataF_ext
;
float2
pilot_data1
;
float2
pilot_data2
;
float2
drs_ch
;
__shared__
float
power
[
600
];
__shared__
int
power1
[
600
];
int
cs
,
k
,
channel_level
;
float
phase
,
current_phase1
,
current_phase2
;
float
const_value
=
22.627417
;
float2
out_temp
;
float
cs_re
[
12
]
=
{
1
,
0.866025
,
0.5
,
0
,
-
0.5
,
-
0.866025
,
-
1
,
-
0.866025
,
-
0.5
,
0
,
0.5
,
0.866025
};
float
cs_im
[
12
]
=
{
0
,
0.5
,
0.866025
,
1
,
0.866025
,
0.5
,
0
,
-
0.5
,
-
0.866025
,
-
1
,
-
0.866025
,
-
0.5
};
int
mag
;
int
temp_re
,
temp_im
;
short
inv_ch
[
257
]
=
{
512
,
256
,
170
,
128
,
102
,
85
,
73
,
64
,
56
,
51
,
46
,
42
,
39
,
36
,
34
,
32
,
30
,
28
,
26
,
25
,
24
,
23
,
22
,
21
,
20
,
19
,
18
,
18
,
17
,
17
,
16
,
16
,
15
,
15
,
14
,
14
,
13
,
13
,
13
,
12
,
12
,
12
,
11
,
11
,
11
,
11
,
10
,
10
,
10
,
10
,
10
,
9
,
9
,
9
,
9
,
9
,
8
,
8
,
8
,
8
,
8
,
8
,
8
,
8
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
,
6
,
6
,
6
,
6
,
6
,
6
,
6
,
6
,
6
,
6
,
6
,
6
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
4
,
4
,
4
,
4
,
4
,
4
,
4
,
4
,
4
,
4
,
4
,
4
,
4
,
4
,
4
,
4
,
4
,
4
,
4
,
4
,
4
,
4
,
4
,
4
,
4
,
4
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
1
};
int
const_shift
=
0
;
int
i
;
unsigned
int
xcl
;
unsigned
char
l2
;
int
pos
=
(
362
+
first_rb
*
12
+
re_id
)
%
512
;
pos
=
(
pos
>=
150
&&
pos
<
362
)
?
pos
+
212
:
pos
;
rxdataF_ext
=
x
[
symbol_id
*
512
+
pos
];
pilot_data1
=
x
[
1536
+
pos
];
pilot_data2
=
x
[
5120
+
pos
];
out_temp
.
x
=
(
pilot_data1
.
x
*
ul_ref1
[
re_id
].
x
+
pilot_data1
.
y
*
ul_ref1
[
re_id
].
y
)
*
0.0441941738
;
out_temp
.
y
=
(
pilot_data1
.
y
*
ul_ref1
[
re_id
].
x
-
pilot_data1
.
x
*
ul_ref1
[
re_id
].
y
)
*
0.0441941738
;
cs
=
(
re_id
*
cyclic_shift1
)
%
12
;
pilot_data1
.
x
=
out_temp
.
x
*
cs_re
[
cs
]
+
out_temp
.
y
*
cs_im
[
cs
];
pilot_data1
.
y
=
out_temp
.
y
*
cs_re
[
cs
]
-
out_temp
.
x
*
cs_im
[
cs
];
out_temp
.
x
=
(
pilot_data2
.
x
*
ul_ref2
[
re_id
].
x
+
pilot_data2
.
y
*
ul_ref2
[
re_id
].
y
)
*
0.0441941738
;
out_temp
.
y
=
(
pilot_data2
.
y
*
ul_ref2
[
re_id
].
x
-
pilot_data2
.
x
*
ul_ref2
[
re_id
].
y
)
*
0.0441941738
;
cs
=
(
re_id
*
cyclic_shift2
)
%
12
;
pilot_data2
.
x
=
out_temp
.
x
*
cs_re
[
cs
]
+
out_temp
.
y
*
cs_im
[
cs
];
pilot_data2
.
y
=
out_temp
.
y
*
cs_re
[
cs
]
-
out_temp
.
x
*
cs_im
[
cs
];
switch
(
tidy
)
{
case
0
:
power
[
re_id
<<
1
]
=
pilot_data2
.
x
*
pilot_data1
.
x
+
pilot_data2
.
y
*
pilot_data1
.
y
;
break
;
case
1
:
power
[(
re_id
<<
1
)
+
1
]
=
pilot_data2
.
y
*
pilot_data1
.
x
-
pilot_data2
.
x
*
pilot_data1
.
y
;
break
;
}
__syncthreads
();
for
(
k
=
Msc_RS
>>
1
;
k
>
0
;
k
=
k
>>
1
)
{
if
(
re_id
<
k
)
power
[(
re_id
<<
1
)
+
tidy
]
=
power
[(
re_id
<<
1
)
+
tidy
]
+
power
[((
k
+
re_id
)
<<
1
)
+
tidy
];
__syncthreads
();
if
(
k
%
2
&&
re_id
==
0
)
power
[
tidy
]
=
power
[
tidy
]
+
power
[((
k
-
1
)
<<
1
)
+
tidy
];
__syncthreads
();
}
phase
=
atanf
(
power
[
1
]
/
power
[
0
]
);
if
(
symbol_id
!=
10
&&
symbol_id
!=
3
)
{
current_phase1
=
(
phase
/
7
)
*
(
symbol_id
-
3
);
current_phase2
=
(
phase
/
7
)
*
(
symbol_id
-
10
);
drs_ch
.
x
=
((
pilot_data1
.
x
*
cosf
(
current_phase1
)
-
pilot_data1
.
y
*
sinf
(
current_phase1
))
+
(
pilot_data2
.
x
*
cosf
(
current_phase2
)
-
pilot_data2
.
y
*
sinf
(
current_phase2
)))
/
2
;
drs_ch
.
y
=
((
pilot_data1
.
y
*
cosf
(
current_phase1
)
+
pilot_data1
.
x
*
sinf
(
current_phase1
))
+
(
pilot_data2
.
y
*
cosf
(
current_phase2
)
+
pilot_data2
.
x
*
sinf
(
current_phase2
)))
/
2
;
switch
(
tidy
)
{
case
0
:
power1
[
re_id
<<
1
]
=
((
short
)
drs_ch
.
x
*
(
short
)
drs_ch
.
x
+
(
short
)
drs_ch
.
y
*
(
short
)
drs_ch
.
y
);
break
;
case
1
:
power1
[(
re_id
<<
1
)
+
1
]
=
((
short
)
drs_ch
.
x
*
(
short
)
drs_ch
.
x
+
(
short
)
drs_ch
.
y
*
(
short
)
drs_ch
.
y
)
>>
4
;
break
;
}
__syncthreads
();
for
(
k
=
Msc_RS
>>
1
;
k
>
0
;
k
=
k
>>
1
)
{
if
(
re_id
<
k
)
power1
[(
re_id
<<
1
)
+
tidy
]
=
power1
[(
re_id
<<
1
)
+
tidy
]
+
power1
[((
k
+
re_id
)
<<
1
)
+
tidy
];
__syncthreads
();
if
(
k
%
2
&&
re_id
==
0
)
power1
[
tidy
]
=
power1
[
tidy
]
+
power1
[((
k
-
1
)
<<
1
)
+
tidy
];
__syncthreads
();
}
xcl
=
(
unsigned
int
)(
power1
[
0
]
/
(
Msc_RS
<<
1
));
l2
=
0
;
for
(
i
=
0
;
i
<
31
;
i
++
)
if
((
xcl
&
(
1
<<
i
))
!=
0
)
l2
=
i
+
1
;
channel_level
=
(
short
)(
l2
>>
1
)
+
4
;
mag
=
((
int
)(
drs_ch
.
x
*
drs_ch
.
x
+
drs_ch
.
y
*
drs_ch
.
y
))
>>
channel_level
;
mag
=
(
mag
>=
255
)
?
255
:
mag
;
switch
(
tidy
)
{
case
0
:
out
[
symbol_id
*
Msc_RS
+
re_id
].
x
=
(
float
)((((
int
)(((
rxdataF_ext
.
x
*
drs_ch
.
x
)
+
(
rxdataF_ext
.
y
*
drs_ch
.
y
))
*
0.0441941738
))
>>
channel_level
)
*
inv_ch
[
mag
]);
break
;
case
1
:
out
[
symbol_id
*
Msc_RS
+
re_id
].
y
=
(
float
)((((
int
)(((
rxdataF_ext
.
y
*
drs_ch
.
x
)
-
(
rxdataF_ext
.
x
*
drs_ch
.
y
))
*
0.0441941738
))
>>
channel_level
)
*
inv_ch
[
mag
]);
break
;
}
if
(
tidy
==
0
&&
re_id
==
0
&&
symbol_id
==
0
)
sig_engery
[
0
]
=
(
int
)
power1
[
1
]
*
8
/
Msc_RS
;
}
else
if
(
symbol_id
==
3
)
{
out
[
symbol_id
*
300
+
re_id
]
=
pilot_data1
;
}
else
if
(
symbol_id
==
10
)
{
out
[
symbol_id
*
300
+
re_id
]
=
pilot_data2
;
}
}
__global__
void
exrb
(
float2
*
x
,
float2
*
out
,
short
*
out2
,
const
unsigned
int
first_rb
,
const
short
Msc_RS
)
{
const
int
symbol_id
=
blockIdx
.
x
;
const
int
re_id
=
threadIdx
.
x
;
const
int
tidy
=
threadIdx
.
y
;
float2
rxdataF_ext
;
float2
pilot_data1
;
float2
pilot_data2
;
int
pos
=
(
362
+
first_rb
*
12
+
re_id
)
%
512
;
pos
=
(
pos
>=
150
&&
pos
<
362
)
?
pos
+
212
:
pos
;
rxdataF_ext
.
x
=
x
[
symbol_id
*
512
+
pos
].
x
*
0.0441941738
;
rxdataF_ext
.
y
=
x
[
symbol_id
*
512
+
pos
].
y
*
0.0441941738
;
pilot_data1
.
x
=
x
[
1536
+
pos
].
x
*
0.0441941738
;
pilot_data1
.
y
=
x
[
1536
+
pos
].
y
*
0.0441941738
;
pilot_data2
.
x
=
x
[
5120
+
pos
].
x
*
0.0441941738
;
pilot_data2
.
y
=
x
[
5120
+
pos
].
y
*
0.0441941738
;
out
[
symbol_id
*
300
+
re_id
]
=
rxdataF_ext
;
out2
[((
symbol_id
*
300
+
re_id
)
<<
1
)]
=
(
short
)
rxdataF_ext
.
x
;
out2
[((
symbol_id
*
300
+
re_id
)
<<
1
)
+
1
]
=
(
short
)
rxdataF_ext
.
y
;
}
__global__
void
estimation
(
float2
*
x
,
float2
*
ul_ref1
,
float2
*
ul_ref2
,
float2
*
out
,
short
*
out2
,
short
cyclic_shift1
,
short
cyclic_shift2
,
short
Msc_RS
)
{
const
int
symbol_id
=
blockIdx
.
x
;
const
int
re_id
=
threadIdx
.
x
;
const
int
tidy
=
threadIdx
.
y
;
int
cs
,
k
,
channel_level
;
float
phase
,
current_phase1
,
current_phase2
;
float
const_value
=
22.627417
;
float2
out_temp
;
float
cs_re
[
12
]
=
{
1
,
0.866025
,
0.5
,
0
,
-
0.5
,
-
0.866025
,
-
1
,
-
0.866025
,
-
0.5
,
0
,
0.5
,
0.866025
};
float
cs_im
[
12
]
=
{
0
,
0.5
,
0.866025
,
1
,
0.866025
,
0.5
,
0
,
-
0.5
,
-
0.866025
,
-
1
,
-
0.866025
,
-
0.5
};
float2
drs_ch
;
float2
pilot_data1
;
float2
pilot_data2
;
__shared__
float
power
[
600
];
pilot_data1
=
x
[
900
+
re_id
];
pilot_data2
=
x
[
3000
+
re_id
];
out_temp
.
x
=
pilot_data1
.
x
*
ul_ref1
[
re_id
].
x
+
pilot_data1
.
y
*
ul_ref1
[
re_id
].
y
;
out_temp
.
y
=
pilot_data1
.
y
*
ul_ref1
[
re_id
].
x
-
pilot_data1
.
x
*
ul_ref1
[
re_id
].
y
;
cs
=
(
re_id
*
cyclic_shift1
)
%
12
;
pilot_data1
.
x
=
out_temp
.
x
*
cs_re
[
cs
]
+
out_temp
.
y
*
cs_im
[
cs
];
pilot_data1
.
y
=
out_temp
.
y
*
cs_re
[
cs
]
-
out_temp
.
x
*
cs_im
[
cs
];
out_temp
.
x
=
pilot_data2
.
x
*
ul_ref2
[
re_id
].
x
+
pilot_data2
.
y
*
ul_ref2
[
re_id
].
y
;
out_temp
.
y
=
pilot_data2
.
y
*
ul_ref2
[
re_id
].
x
-
pilot_data2
.
x
*
ul_ref2
[
re_id
].
y
;
cs
=
(
re_id
*
cyclic_shift2
)
%
12
;
pilot_data2
.
x
=
out_temp
.
x
*
cs_re
[
cs
]
+
out_temp
.
y
*
cs_im
[
cs
];
pilot_data2
.
y
=
out_temp
.
y
*
cs_re
[
cs
]
-
out_temp
.
x
*
cs_im
[
cs
];
if
(
tidy
==
0
)
power
[
re_id
<<
1
]
=
pilot_data2
.
x
*
pilot_data1
.
x
+
pilot_data2
.
y
*
pilot_data1
.
y
;
else
power
[(
re_id
<<
1
)
+
1
]
=
pilot_data2
.
y
*
pilot_data1
.
x
-
pilot_data2
.
x
*
pilot_data1
.
y
;
__syncthreads
();
for
(
k
=
Msc_RS
>>
1
;
k
>
0
;
k
=
k
>>
1
)
{
if
(
re_id
<
k
)
power
[(
re_id
<<
1
)
+
tidy
]
=
power
[(
re_id
<<
1
)
+
tidy
]
+
power
[((
k
+
re_id
)
<<
1
)
+
tidy
];
__syncthreads
();
if
(
k
%
2
&&
re_id
==
0
)
power
[
tidy
]
=
power
[
tidy
]
+
power
[((
k
-
1
)
<<
1
)
+
tidy
];
__syncthreads
();
}
phase
=
atanf
(
power
[
1
]
/
power
[
0
]
);
if
(
symbol_id
!=
10
&&
symbol_id
!=
3
)
{
current_phase1
=
(
phase
/
7
)
*
(
symbol_id
-
3
);
current_phase2
=
(
phase
/
7
)
*
(
symbol_id
-
10
);
drs_ch
.
x
=
((
pilot_data1
.
x
*
cosf
(
current_phase1
)
-
pilot_data1
.
y
*
sinf
(
current_phase1
))
+
(
pilot_data2
.
x
*
cosf
(
current_phase2
)
-
pilot_data2
.
y
*
sinf
(
current_phase2
)))
/
2
;
drs_ch
.
y
=
((
pilot_data1
.
y
*
cosf
(
current_phase1
)
+
pilot_data1
.
x
*
sinf
(
current_phase1
))
+
(
pilot_data2
.
y
*
cosf
(
current_phase2
)
+
pilot_data2
.
x
*
sinf
(
current_phase2
)))
/
2
;
out
[
symbol_id
*
300
+
re_id
]
=
drs_ch
;
out2
[((
symbol_id
*
300
+
re_id
)
<<
1
)]
=
(
short
)
drs_ch
.
x
;
out2
[((
symbol_id
*
300
+
re_id
)
<<
1
)
+
1
]
=
(
short
)
drs_ch
.
y
;
}
else
if
(
symbol_id
==
3
)
{
out
[
symbol_id
*
300
+
re_id
]
=
pilot_data1
;
out2
[((
symbol_id
*
300
+
re_id
)
<<
1
)]
=
(
short
)
pilot_data1
.
x
;
out2
[((
symbol_id
*
300
+
re_id
)
<<
1
)
+
1
]
=
(
short
)
pilot_data1
.
y
;
}
else
if
(
symbol_id
==
10
)
{
out
[
symbol_id
*
300
+
re_id
]
=
pilot_data2
;
out2
[((
symbol_id
*
300
+
re_id
)
<<
1
)]
=
(
short
)
pilot_data2
.
x
;
out2
[((
symbol_id
*
300
+
re_id
)
<<
1
)
+
1
]
=
(
short
)
pilot_data2
.
y
;
}
}
/*__global__ void add_value( int *x, int *y )
{
x[14*300] = y[0];
}*/
__global__
void
compensation
(
float2
*
x
,
short
*
xt
,
float2
*
drs
,
short
*
drst
,
float
Qm_const
,
float2
*
out
,
short
*
out2
,
int
*
sig_engery
,
short
Msc_RS
)
{
const
int
symbol_id
=
blockIdx
.
x
;
const
int
re_id
=
threadIdx
.
x
;
const
int
tidy
=
threadIdx
.
y
;
int
k
,
channel_level
,
mag
;
float2
out_temp
;
int
temp_re
,
temp_im
;
__shared__
int
power
[
600
];
/*short inv_ch[257] = {512,256,170,128,102,85,73,64,56,51,46,42,39,36,34,32,30,28,26,25,24,23,22,21,20,19,18,18,17,17,16,16,15,15,14,14,13,13,13,12,12,12,11,11,11,11,10,10,10,10,10,
9,9,9,9,9,8,8,8,8,8,8,8,8,7,7,7,7,7,7,7,7,7,6,6,6,6,6,6,6,6,6,6,6,6,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,3,3,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1};*/
float
sig_eng
,
inv_ch
;
float2
rxdataF_ext
;
int
const_shift
=
0
;
float2
drs_ch
;
int
i
;
unsigned
int
xcl
;
unsigned
char
l2
;
short
clr
;
short
cli
;
/*rxdataF_ext.x = xt[(symbol_id * 300 + re_id)<<1];
rxdataF_ext.y = xt[((symbol_id * 300 + re_id)<<1)+1];
drs_ch.x = drst[(symbol_id * 300 + re_id)<<1];
drs_ch.y = drst[((symbol_id * 300 + re_id)<<1)+1];*/
clr
=
drst
[(
symbol_id
*
300
+
re_id
)
<<
1
];
cli
=
drst
[((
symbol_id
*
300
+
re_id
)
<<
1
)
+
1
];
drs_ch
=
drs
[
symbol_id
*
300
+
re_id
];
rxdataF_ext
=
x
[
symbol_id
*
300
+
re_id
];
if
(
symbol_id
!=
10
&&
symbol_id
!=
3
)
{
switch
(
tidy
)
{
case
0
:
power
[
re_id
<<
1
]
=
(
clr
*
clr
+
cli
*
cli
);
break
;
case
1
:
power
[(
re_id
<<
1
)
+
1
]
=
(
clr
*
clr
+
cli
*
cli
)
>>
4
;
break
;
}
//power[re_id] = (clr * clr + cli * cli);
__syncthreads
();
for
(
k
=
Msc_RS
>>
1
;
k
>
0
;
k
=
k
>>
1
)
{
if
(
re_id
<
k
)
power
[(
re_id
<<
1
)
+
tidy
]
=
power
[(
re_id
<<
1
)
+
tidy
]
+
power
[((
k
+
re_id
)
<<
1
)
+
tidy
];
__syncthreads
();
if
(
k
%
2
&&
re_id
==
0
)
power
[
tidy
]
=
power
[
tidy
]
+
power
[((
k
-
1
)
<<
1
)
+
tidy
];
__syncthreads
();
}
//xcl = (unsigned int)(power[0]/(Msc_RS<<1));
//l2=0;
//for (i=0; i<31; i++)
//if ((xcl&(1<<i)) != 0)
//l2 = i+1;
//channel_level = (short)(l2>>1) + 4;
sig_eng
=
power
[
1
]
*
8
/
Msc_RS
;
mag
=
drs_ch
.
x
*
drs_ch
.
x
+
drs_ch
.
y
*
drs_ch
.
y
;
inv_ch
=
512
/
(
sqrtf
(
mag
)
*
Qm_const
);
inv_ch
=
(
inv_ch
>
512
)
?
512
:
inv_ch
;
inv_ch
=
(
inv_ch
<
1
)
?
1
:
inv_ch
;
switch
(
tidy
)
{
case
0
:
out
[
symbol_id
*
Msc_RS
+
re_id
].
x
=
(((
rxdataF_ext
.
x
*
drs_ch
.
x
)
+
(
rxdataF_ext
.
y
*
drs_ch
.
y
))
*
inv_ch
)
/
sqrtf
(
mag
);
out2
[(
symbol_id
*
300
+
re_id
)
<<
1
]
=
(
short
)
out
[
symbol_id
*
Msc_RS
+
re_id
].
x
;
break
;
case
1
:
out
[
symbol_id
*
Msc_RS
+
re_id
].
y
=
(((
rxdataF_ext
.
y
*
drs_ch
.
x
)
-
(
rxdataF_ext
.
x
*
drs_ch
.
y
))
*
inv_ch
)
/
sqrtf
(
mag
);
out2
[((
symbol_id
*
300
+
re_id
)
<<
1
)
+
1
]
=
(
short
)
out
[
symbol_id
*
Msc_RS
+
re_id
].
y
;
break
;
}
if
(
tidy
==
0
&&
re_id
==
0
&&
symbol_id
==
0
)
sig_engery
[
0
]
=
(
int
)
sig_eng
;
}
}
void
exrb_compen_esti_cu
(
unsigned
int
first_rb
,
unsigned
int
nb_rb
,
unsigned
short
number_symbols
,
unsigned
short
sf
)
{
dim3
block
(
number_symbols
,
1
,
1
);
dim3
thread
(
ulsch_para
[
sf
].
Msc_RS
,
2
,
1
);
//printf("[TEST]using RB = %d\n",nb_rb);
exrb_compen_esti
<<<
block
,
thread
,
0
,
ul_cu
[
sf
].
stream_ul
>>>
(
ul_cu
[
sf
].
d_rxdata_fft
[
0
],
esti_const
.
d_ul_ref_sigs_rx
[
ulsch_para
[
sf
].
u1
][
ulsch_para
[
sf
].
v1
][
ulsch_para
[
sf
].
Msc_RS_idx
],
esti_const
.
d_ul_ref_sigs_rx
[
ulsch_para
[
sf
].
u2
][
ulsch_para
[
sf
].
v2
][
ulsch_para
[
sf
].
Msc_RS_idx
],
ul_cu
[
sf
].
d_rxdata_comp
[
0
],
ul_cu
[
sf
].
d_ulsch_power
[
0
],
first_rb
,
ulsch_para
[
sf
].
cyclic_shift1
,
ulsch_para
[
sf
].
cyclic_shift2
,
ulsch_para
[
sf
].
Msc_RS
);
int
aarx
=
0
;
switch
(
ulsch_para
[
sf
].
Msc_RS
)
{
case
12
:
cufftExecC2C
(
ul_cu
[
sf
].
idft
.
fft12
,
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
CUFFT_INVERSE
);
k_short_12
<<<
number_symbols
,
300
,
0
,
ul_cu
[
sf
].
stream_ul
>>>
(
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
short
*
)
ul_cu
[
sf
].
d_rxdata_comp_int
[
aarx
],
ul_cu
[
sf
].
d_ulsch_power
[
0
],
ul_cu
[
sf
].
d_rxdata_fft
[
0
],
first_rb
);
break
;
case
24
:
cufftExecC2C
(
ul_cu
[
sf
].
idft
.
fft24
,
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
CUFFT_INVERSE
);
k_short_24
<<<
number_symbols
,
300
,
0
,
ul_cu
[
sf
].
stream_ul
>>>
(
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
short
*
)
ul_cu
[
sf
].
d_rxdata_comp_int
[
aarx
],
ul_cu
[
sf
].
d_ulsch_power
[
0
],
ul_cu
[
sf
].
d_rxdata_fft
[
0
],
first_rb
);
break
;
case
36
:
cufftExecC2C
(
ul_cu
[
sf
].
idft
.
fft36
,
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
CUFFT_INVERSE
);
k_short_36
<<<
number_symbols
,
300
,
0
,
ul_cu
[
sf
].
stream_ul
>>>
(
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
short
*
)
ul_cu
[
sf
].
d_rxdata_comp_int
[
aarx
],
ul_cu
[
sf
].
d_ulsch_power
[
0
],
ul_cu
[
sf
].
d_rxdata_fft
[
0
],
first_rb
);
break
;
case
48
:
cufftExecC2C
(
ul_cu
[
sf
].
idft
.
fft48
,
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
CUFFT_INVERSE
);
k_short_48
<<<
number_symbols
,
300
,
0
,
ul_cu
[
sf
].
stream_ul
>>>
(
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
short
*
)
ul_cu
[
sf
].
d_rxdata_comp_int
[
aarx
],
ul_cu
[
sf
].
d_ulsch_power
[
0
],
ul_cu
[
sf
].
d_rxdata_fft
[
0
],
first_rb
);
break
;
case
60
:
cufftExecC2C
(
ul_cu
[
sf
].
idft
.
fft60
,
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
CUFFT_INVERSE
);
k_short_60
<<<
number_symbols
,
300
,
0
,
ul_cu
[
sf
].
stream_ul
>>>
(
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
short
*
)
ul_cu
[
sf
].
d_rxdata_comp_int
[
aarx
],
ul_cu
[
sf
].
d_ulsch_power
[
0
],
ul_cu
[
sf
].
d_rxdata_fft
[
0
],
first_rb
);
break
;
case
72
:
cufftExecC2C
(
ul_cu
[
sf
].
idft
.
fft72
,
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
CUFFT_INVERSE
);
k_short_72
<<<
number_symbols
,
300
,
0
,
ul_cu
[
sf
].
stream_ul
>>>
(
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
short
*
)
ul_cu
[
sf
].
d_rxdata_comp_int
[
aarx
],
ul_cu
[
sf
].
d_ulsch_power
[
0
],
ul_cu
[
sf
].
d_rxdata_fft
[
0
],
first_rb
);
break
;
case
96
:
cufftExecC2C
(
ul_cu
[
sf
].
idft
.
fft96
,
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
CUFFT_INVERSE
);
k_short_96
<<<
number_symbols
,
300
,
0
,
ul_cu
[
sf
].
stream_ul
>>>
(
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
short
*
)
ul_cu
[
sf
].
d_rxdata_comp_int
[
aarx
],
ul_cu
[
sf
].
d_ulsch_power
[
0
],
ul_cu
[
sf
].
d_rxdata_fft
[
0
],
first_rb
);
break
;
case
108
:
cufftExecC2C
(
ul_cu
[
sf
].
idft
.
fft108
,
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
CUFFT_INVERSE
);
k_short_108
<<<
number_symbols
,
300
,
0
,
ul_cu
[
sf
].
stream_ul
>>>
(
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
short
*
)
ul_cu
[
sf
].
d_rxdata_comp_int
[
aarx
],
ul_cu
[
sf
].
d_ulsch_power
[
0
],
ul_cu
[
sf
].
d_rxdata_fft
[
0
],
first_rb
);
break
;
case
120
:
cufftExecC2C
(
ul_cu
[
sf
].
idft
.
fft120
,
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
CUFFT_INVERSE
);
k_short_120
<<<
number_symbols
,
300
,
0
,
ul_cu
[
sf
].
stream_ul
>>>
(
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
short
*
)
ul_cu
[
sf
].
d_rxdata_comp_int
[
aarx
],
ul_cu
[
sf
].
d_ulsch_power
[
0
],
ul_cu
[
sf
].
d_rxdata_fft
[
0
],
first_rb
);
break
;
case
144
:
cufftExecC2C
(
ul_cu
[
sf
].
idft
.
fft144
,
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
CUFFT_INVERSE
);
k_short_144
<<<
number_symbols
,
300
,
0
,
ul_cu
[
sf
].
stream_ul
>>>
(
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
short
*
)
ul_cu
[
sf
].
d_rxdata_comp_int
[
aarx
],
ul_cu
[
sf
].
d_ulsch_power
[
0
],
ul_cu
[
sf
].
d_rxdata_fft
[
0
],
first_rb
);
break
;
case
180
:
cufftExecC2C
(
ul_cu
[
sf
].
idft
.
fft180
,
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
CUFFT_INVERSE
);
k_short_180
<<<
number_symbols
,
300
,
0
,
ul_cu
[
sf
].
stream_ul
>>>
(
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
short
*
)
ul_cu
[
sf
].
d_rxdata_comp_int
[
aarx
],
ul_cu
[
sf
].
d_ulsch_power
[
0
],
ul_cu
[
sf
].
d_rxdata_fft
[
0
],
first_rb
);
break
;
case
192
:
cufftExecC2C
(
ul_cu
[
sf
].
idft
.
fft192
,
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
CUFFT_INVERSE
);
k_short_192
<<<
number_symbols
,
300
,
0
,
ul_cu
[
sf
].
stream_ul
>>>
(
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
short
*
)
ul_cu
[
sf
].
d_rxdata_comp_int
[
aarx
],
ul_cu
[
sf
].
d_ulsch_power
[
0
],
ul_cu
[
sf
].
d_rxdata_fft
[
0
],
first_rb
);
break
;
case
216
:
cufftExecC2C
(
ul_cu
[
sf
].
idft
.
fft216
,
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
CUFFT_INVERSE
);
k_short_216
<<<
number_symbols
,
300
,
0
,
ul_cu
[
sf
].
stream_ul
>>>
(
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
short
*
)
ul_cu
[
sf
].
d_rxdata_comp_int
[
aarx
],
ul_cu
[
sf
].
d_ulsch_power
[
0
],
ul_cu
[
sf
].
d_rxdata_fft
[
0
],
first_rb
);
break
;
case
240
:
cufftExecC2C
(
ul_cu
[
sf
].
idft
.
fft240
,
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
CUFFT_INVERSE
);
k_short_240
<<<
number_symbols
,
300
,
0
,
ul_cu
[
sf
].
stream_ul
>>>
(
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
short
*
)
ul_cu
[
sf
].
d_rxdata_comp_int
[
aarx
],
ul_cu
[
sf
].
d_ulsch_power
[
0
],
ul_cu
[
sf
].
d_rxdata_fft
[
0
],
first_rb
);
break
;
case
288
:
cufftExecC2C
(
ul_cu
[
sf
].
idft
.
fft288
,
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
CUFFT_INVERSE
);
k_short_288
<<<
number_symbols
,
300
,
0
,
ul_cu
[
sf
].
stream_ul
>>>
(
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
short
*
)
ul_cu
[
sf
].
d_rxdata_comp_int
[
aarx
],
ul_cu
[
sf
].
d_ulsch_power
[
0
],
ul_cu
[
sf
].
d_rxdata_fft
[
0
],
first_rb
);
break
;
case
300
:
cufftExecC2C
(
ul_cu
[
sf
].
idft
.
fft300
,
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
CUFFT_INVERSE
);
k_short_300
<<<
number_symbols
,
300
,
0
,
ul_cu
[
sf
].
stream_ul
>>>
(
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
short
*
)
ul_cu
[
sf
].
d_rxdata_comp_int
[
aarx
],
ul_cu
[
sf
].
d_ulsch_power
[
0
],
ul_cu
[
sf
].
d_rxdata_fft
[
0
],
first_rb
);
break
;
}
}
void
ulsch_extract_rb_cu
(
unsigned
int
first_rb
,
unsigned
int
nb_rb
,
unsigned
short
number_symbols
,
unsigned
short
sf
)
{
dim3
thread
(
nb_rb
*
12
,
2
,
1
);
dim3
block
(
number_symbols
,
1
,
1
);
exrb
<<<
block
,
thread
,
0
,
ul_cu
[
sf
].
stream_ul
>>>
(
ul_cu
[
sf
].
d_rxdata_fft
[
0
],
ul_cu
[
sf
].
d_rxdata_ext
[
0
],
(
short
*
)
ul_cu
[
sf
].
d_rxdata_comp_int
[
0
],
first_rb
,
nb_rb
*
12
);
}
void
estimation_cu
(
unsigned
int
first_rb
,
unsigned
int
nb_rb
,
unsigned
short
number_symbols
,
unsigned
short
sf
)
{
dim3
block
(
number_symbols
,
1
,
1
);
dim3
thread
(
ulsch_para
[
sf
].
Msc_RS
,
2
,
1
);
estimation
<<<
block
,
thread
,
0
,
ul_cu
[
sf
].
stream_ul
>>>
(
ul_cu
[
sf
].
d_rxdata_ext
[
0
],
esti_const
.
d_ul_ref_sigs_rx
[
ulsch_para
[
sf
].
u1
][
ulsch_para
[
sf
].
v1
][
ulsch_para
[
sf
].
Msc_RS_idx
],
esti_const
.
d_ul_ref_sigs_rx
[
ulsch_para
[
sf
].
u2
][
ulsch_para
[
sf
].
v2
][
ulsch_para
[
sf
].
Msc_RS_idx
],
ul_cu
[
sf
].
d_drs_ch
[
0
],
(
short
*
)
ul_cu
[
sf
].
d_drs_ch_int
[
0
],
ulsch_para
[
sf
].
cyclic_shift1
,
ulsch_para
[
sf
].
cyclic_shift2
,
ulsch_para
[
sf
].
Msc_RS
);
}
void
compensation_cu
(
unsigned
int
first_rb
,
unsigned
int
nb_rb
,
unsigned
short
number_symbols
,
short
Qm
,
unsigned
short
sf
)
{
float
Qm_const
;
dim3
block
(
number_symbols
,
1
,
1
);
dim3
thread
(
ulsch_para
[
sf
].
Msc_RS
,
2
,
1
);
//printf("in compensation\n");
switch
(
Qm
)
{
case
2
:
Qm_const
=
1.0
;
break
;
case
4
:
Qm_const
=
0.632456
;
break
;
}
compensation
<<<
block
,
thread
,
0
,
ul_cu
[
sf
].
stream_ul
>>>
(
ul_cu
[
sf
].
d_rxdata_ext
[
0
],
(
short
*
)
ul_cu
[
sf
].
d_rxdata_ext_int
[
0
],
ul_cu
[
sf
].
d_drs_ch
[
0
],
(
short
*
)
ul_cu
[
sf
].
d_drs_ch_int
[
0
],
Qm_const
,
ul_cu
[
sf
].
d_rxdata_comp
[
0
],
(
short
*
)
ul_cu
[
sf
].
d_rxdata_comp_int
[
0
],
ul_cu
[
sf
].
d_ulsch_power
[
0
],
ulsch_para
[
sf
].
Msc_RS
);
}
void
idft_cu
(
unsigned
int
first_rb
,
unsigned
int
nb_rb
,
unsigned
short
number_symbols
,
short
cl
,
unsigned
short
sf
)
{
int
aarx
=
0
;
switch
(
ulsch_para
[
sf
].
Msc_RS
)
{
case
12
:
cufftExecC2C
(
ul_cu
[
sf
].
idft
.
fft12
,
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
CUFFT_INVERSE
);
k_short_12
<<<
number_symbols
,
300
,
0
,
ul_cu
[
sf
].
stream_ul
>>>
(
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
short
*
)
ul_cu
[
sf
].
d_rxdata_comp_int
[
aarx
],
ul_cu
[
sf
].
d_ulsch_power
[
0
],
ul_cu
[
sf
].
d_rxdata_fft
[
0
],
first_rb
);
break
;
case
24
:
cufftExecC2C
(
ul_cu
[
sf
].
idft
.
fft24
,
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
CUFFT_INVERSE
);
k_short_24
<<<
number_symbols
,
300
,
0
,
ul_cu
[
sf
].
stream_ul
>>>
(
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
short
*
)
ul_cu
[
sf
].
d_rxdata_comp_int
[
aarx
],
ul_cu
[
sf
].
d_ulsch_power
[
0
],
ul_cu
[
sf
].
d_rxdata_fft
[
0
],
first_rb
);
break
;
case
36
:
cufftExecC2C
(
ul_cu
[
sf
].
idft
.
fft36
,
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
CUFFT_INVERSE
);
k_short_36
<<<
number_symbols
,
300
,
0
,
ul_cu
[
sf
].
stream_ul
>>>
(
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
short
*
)
ul_cu
[
sf
].
d_rxdata_comp_int
[
aarx
],
ul_cu
[
sf
].
d_ulsch_power
[
0
],
ul_cu
[
sf
].
d_rxdata_fft
[
0
],
first_rb
);
break
;
case
48
:
cufftExecC2C
(
ul_cu
[
sf
].
idft
.
fft48
,
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
CUFFT_INVERSE
);
k_short_48
<<<
number_symbols
,
300
,
0
,
ul_cu
[
sf
].
stream_ul
>>>
(
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
short
*
)
ul_cu
[
sf
].
d_rxdata_comp_int
[
aarx
],
ul_cu
[
sf
].
d_ulsch_power
[
0
],
ul_cu
[
sf
].
d_rxdata_fft
[
0
],
first_rb
);
break
;
case
60
:
cufftExecC2C
(
ul_cu
[
sf
].
idft
.
fft60
,
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
CUFFT_INVERSE
);
k_short_60
<<<
number_symbols
,
300
,
0
,
ul_cu
[
sf
].
stream_ul
>>>
(
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
short
*
)
ul_cu
[
sf
].
d_rxdata_comp_int
[
aarx
],
ul_cu
[
sf
].
d_ulsch_power
[
0
],
ul_cu
[
sf
].
d_rxdata_fft
[
0
],
first_rb
);
break
;
case
72
:
cufftExecC2C
(
ul_cu
[
sf
].
idft
.
fft72
,
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
CUFFT_INVERSE
);
k_short_72
<<<
number_symbols
,
300
,
0
,
ul_cu
[
sf
].
stream_ul
>>>
(
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
short
*
)
ul_cu
[
sf
].
d_rxdata_comp_int
[
aarx
],
ul_cu
[
sf
].
d_ulsch_power
[
0
],
ul_cu
[
sf
].
d_rxdata_fft
[
0
],
first_rb
);
break
;
case
96
:
cufftExecC2C
(
ul_cu
[
sf
].
idft
.
fft96
,
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
CUFFT_INVERSE
);
k_short_96
<<<
number_symbols
,
300
,
0
,
ul_cu
[
sf
].
stream_ul
>>>
(
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
short
*
)
ul_cu
[
sf
].
d_rxdata_comp_int
[
aarx
],
ul_cu
[
sf
].
d_ulsch_power
[
0
],
ul_cu
[
sf
].
d_rxdata_fft
[
0
],
first_rb
);
break
;
case
108
:
cufftExecC2C
(
ul_cu
[
sf
].
idft
.
fft108
,
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
CUFFT_INVERSE
);
k_short_108
<<<
number_symbols
,
300
,
0
,
ul_cu
[
sf
].
stream_ul
>>>
(
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
short
*
)
ul_cu
[
sf
].
d_rxdata_comp_int
[
aarx
],
ul_cu
[
sf
].
d_ulsch_power
[
0
],
ul_cu
[
sf
].
d_rxdata_fft
[
0
],
first_rb
);
break
;
case
120
:
cufftExecC2C
(
ul_cu
[
sf
].
idft
.
fft120
,
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
CUFFT_INVERSE
);
k_short_120
<<<
number_symbols
,
300
,
0
,
ul_cu
[
sf
].
stream_ul
>>>
(
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
short
*
)
ul_cu
[
sf
].
d_rxdata_comp_int
[
aarx
],
ul_cu
[
sf
].
d_ulsch_power
[
0
],
ul_cu
[
sf
].
d_rxdata_fft
[
0
],
first_rb
);
break
;
case
144
:
cufftExecC2C
(
ul_cu
[
sf
].
idft
.
fft144
,
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
CUFFT_INVERSE
);
k_short_144
<<<
number_symbols
,
300
,
0
,
ul_cu
[
sf
].
stream_ul
>>>
(
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
short
*
)
ul_cu
[
sf
].
d_rxdata_comp_int
[
aarx
],
ul_cu
[
sf
].
d_ulsch_power
[
0
],
ul_cu
[
sf
].
d_rxdata_fft
[
0
],
first_rb
);
break
;
case
180
:
cufftExecC2C
(
ul_cu
[
sf
].
idft
.
fft180
,
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
CUFFT_INVERSE
);
k_short_180
<<<
number_symbols
,
300
,
0
,
ul_cu
[
sf
].
stream_ul
>>>
(
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
short
*
)
ul_cu
[
sf
].
d_rxdata_comp_int
[
aarx
],
ul_cu
[
sf
].
d_ulsch_power
[
0
],
ul_cu
[
sf
].
d_rxdata_fft
[
0
],
first_rb
);
break
;
case
192
:
cufftExecC2C
(
ul_cu
[
sf
].
idft
.
fft192
,
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
CUFFT_INVERSE
);
k_short_192
<<<
number_symbols
,
300
,
0
,
ul_cu
[
sf
].
stream_ul
>>>
(
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
short
*
)
ul_cu
[
sf
].
d_rxdata_comp_int
[
aarx
],
ul_cu
[
sf
].
d_ulsch_power
[
0
],
ul_cu
[
sf
].
d_rxdata_fft
[
0
],
first_rb
);
break
;
case
216
:
cufftExecC2C
(
ul_cu
[
sf
].
idft
.
fft216
,
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
CUFFT_INVERSE
);
k_short_216
<<<
number_symbols
,
300
,
0
,
ul_cu
[
sf
].
stream_ul
>>>
(
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
short
*
)
ul_cu
[
sf
].
d_rxdata_comp_int
[
aarx
],
ul_cu
[
sf
].
d_ulsch_power
[
0
],
ul_cu
[
sf
].
d_rxdata_fft
[
0
],
first_rb
);
break
;
case
240
:
cufftExecC2C
(
ul_cu
[
sf
].
idft
.
fft240
,
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
CUFFT_INVERSE
);
k_short_240
<<<
number_symbols
,
300
,
0
,
ul_cu
[
sf
].
stream_ul
>>>
(
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
short
*
)
ul_cu
[
sf
].
d_rxdata_comp_int
[
aarx
],
ul_cu
[
sf
].
d_ulsch_power
[
0
],
ul_cu
[
sf
].
d_rxdata_fft
[
0
],
first_rb
);
break
;
case
288
:
cufftExecC2C
(
ul_cu
[
sf
].
idft
.
fft288
,
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
CUFFT_INVERSE
);
k_short_288
<<<
number_symbols
,
300
,
0
,
ul_cu
[
sf
].
stream_ul
>>>
(
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
short
*
)
ul_cu
[
sf
].
d_rxdata_comp_int
[
aarx
],
ul_cu
[
sf
].
d_ulsch_power
[
0
],
ul_cu
[
sf
].
d_rxdata_fft
[
0
],
first_rb
);
break
;
case
300
:
cufftExecC2C
(
ul_cu
[
sf
].
idft
.
fft300
,
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
CUFFT_INVERSE
);
k_short_300
<<<
number_symbols
,
300
,
0
,
ul_cu
[
sf
].
stream_ul
>>>
(
ul_cu
[
sf
].
d_rxdata_comp
[
aarx
],
(
short
*
)
ul_cu
[
sf
].
d_rxdata_comp_int
[
aarx
],
ul_cu
[
sf
].
d_ulsch_power
[
0
],
ul_cu
[
sf
].
d_rxdata_fft
[
0
],
first_rb
);
break
;
}
//add_value<<< 1,1,0,ul_cu[sf].stream_ul >>>(ul_cu[sf].d_rxdata_comp_int[aarx],ul_cu[sf].d_ulsch_power[0]);
}
__global__
void
k_short_12
(
float2
*
x
,
short
*
y
,
int
*
sig_eng
,
float2
*
rxF
,
unsigned
int
first_rb
)
{
const
float
one_per_sqart12
=
0.28867513459
;
int
outi
=
300
*
blockIdx
.
x
+
threadIdx
.
x
;
int
ini
=
12
*
blockIdx
.
x
+
threadIdx
.
x
;
const
int
symbol_id
=
blockIdx
.
x
;
const
int
re_id
=
threadIdx
.
x
;
int
pos
=
(
362
+
first_rb
*
12
+
re_id
)
%
512
;
pos
=
(
pos
>=
150
&&
pos
<
362
)
?
pos
+
212
:
pos
;
switch
(
symbol_id
)
{
case
3
:
y
[
1800
+
(
re_id
<<
1
)]
=
rxF
[
3584
+
pos
].
x
*
0.0441941738
;
y
[
1801
+
(
re_id
<<
1
)]
=
rxF
[
3584
+
pos
].
y
*
0.0441941738
;
break
;
case
10
:
if
(
re_id
==
0
)
{
y
[
6000
+
(
re_id
<<
1
)]
=
sig_eng
[
0
];
y
[
6001
+
(
re_id
<<
1
)]
=
sig_eng
[
1
];
}
break
;
default:
y
[
outi
<<
1
]
=
(
short
)(
x
[
ini
].
x
*
one_per_sqart12
);
y
[(
outi
<<
1
)
+
1
]
=
(
short
)(
x
[
ini
].
y
*
one_per_sqart12
);
}
}
__global__
void
k_short_24
(
float2
*
x
,
short
*
y
,
int
*
sig_eng
,
float2
*
rxF
,
unsigned
int
first_rb
)
{
const
float
one_per_sqart24
=
0.2041241452
;
int
outi
=
300
*
blockIdx
.
x
+
threadIdx
.
x
;
int
ini
=
24
*
blockIdx
.
x
+
threadIdx
.
x
;
const
int
symbol_id
=
blockIdx
.
x
;
const
int
re_id
=
threadIdx
.
x
;
int
pos
=
(
362
+
first_rb
*
12
+
re_id
)
%
512
;
pos
=
(
pos
>=
150
&&
pos
<
362
)
?
pos
+
212
:
pos
;
switch
(
symbol_id
)
{
case
3
:
y
[
1800
+
(
re_id
<<
1
)]
=
rxF
[
3584
+
pos
].
x
*
0.0441941738
;
y
[
1801
+
(
re_id
<<
1
)]
=
rxF
[
3584
+
pos
].
y
*
0.0441941738
;
break
;
case
10
:
if
(
re_id
==
0
)
{
y
[
6000
+
(
re_id
<<
1
)]
=
sig_eng
[
0
];
y
[
6001
+
(
re_id
<<
1
)]
=
sig_eng
[
1
];
}
break
;
default:
y
[
outi
<<
1
]
=
(
short
)(
x
[
ini
].
x
*
one_per_sqart24
);
y
[(
outi
<<
1
)
+
1
]
=
(
short
)(
x
[
ini
].
y
*
one_per_sqart24
);
}
}
__global__
void
k_short_36
(
float2
*
x
,
short
*
y
,
int
*
sig_eng
,
float2
*
rxF
,
unsigned
int
first_rb
)
{
const
float
one_per_sqart36
=
0.16666666667
;
int
outi
=
300
*
blockIdx
.
x
+
threadIdx
.
x
;
int
ini
=
36
*
blockIdx
.
x
+
threadIdx
.
x
;
const
int
symbol_id
=
blockIdx
.
x
;
const
int
re_id
=
threadIdx
.
x
;
int
pos
=
(
362
+
first_rb
*
12
+
re_id
)
%
512
;
pos
=
(
pos
>=
150
&&
pos
<
362
)
?
pos
+
212
:
pos
;
switch
(
symbol_id
)
{
case
3
:
y
[
1800
+
(
re_id
<<
1
)]
=
rxF
[
3584
+
pos
].
x
*
0.0441941738
;
y
[
1801
+
(
re_id
<<
1
)]
=
rxF
[
3584
+
pos
].
y
*
0.0441941738
;
break
;
case
10
:
if
(
re_id
==
0
)
{
y
[
6000
+
(
re_id
<<
1
)]
=
sig_eng
[
0
];
y
[
6001
+
(
re_id
<<
1
)]
=
sig_eng
[
1
];
}
break
;
default:
y
[
outi
<<
1
]
=
(
short
)(
x
[
ini
].
x
*
one_per_sqart36
);
y
[(
outi
<<
1
)
+
1
]
=
(
short
)(
x
[
ini
].
y
*
one_per_sqart36
);
}
}
__global__
void
k_short_48
(
float2
*
x
,
short
*
y
,
int
*
sig_eng
,
float2
*
rxF
,
unsigned
int
first_rb
)
{
const
float
one_per_sqart48
=
0.144337567297
;
int
outi
=
300
*
blockIdx
.
x
+
threadIdx
.
x
;
int
ini
=
48
*
blockIdx
.
x
+
threadIdx
.
x
;
const
int
symbol_id
=
blockIdx
.
x
;
const
int
re_id
=
threadIdx
.
x
;
int
pos
=
(
362
+
first_rb
*
12
+
re_id
)
%
512
;
pos
=
(
pos
>=
150
&&
pos
<
362
)
?
pos
+
212
:
pos
;
switch
(
symbol_id
)
{
case
3
:
y
[
1800
+
(
re_id
<<
1
)]
=
rxF
[
3584
+
pos
].
x
*
0.0441941738
;
y
[
1801
+
(
re_id
<<
1
)]
=
rxF
[
3584
+
pos
].
y
*
0.0441941738
;
break
;
case
10
:
if
(
re_id
==
0
)
{
y
[
6000
+
(
re_id
<<
1
)]
=
sig_eng
[
0
];
y
[
6001
+
(
re_id
<<
1
)]
=
sig_eng
[
1
];
}
break
;
default:
y
[
outi
<<
1
]
=
(
short
)(
x
[
ini
].
x
*
one_per_sqart48
);
y
[(
outi
<<
1
)
+
1
]
=
(
short
)(
x
[
ini
].
y
*
one_per_sqart48
);
}
}
__global__
void
k_short_60
(
float2
*
x
,
short
*
y
,
int
*
sig_eng
,
float2
*
rxF
,
unsigned
int
first_rb
)
{
const
float
one_per_sqart60
=
0.12909944487
;
int
outi
=
300
*
blockIdx
.
x
+
threadIdx
.
x
;
int
ini
=
60
*
blockIdx
.
x
+
threadIdx
.
x
;
const
int
symbol_id
=
blockIdx
.
x
;
const
int
re_id
=
threadIdx
.
x
;
int
pos
=
(
362
+
first_rb
*
12
+
re_id
)
%
512
;
pos
=
(
pos
>=
150
&&
pos
<
362
)
?
pos
+
212
:
pos
;
switch
(
symbol_id
)
{
case
3
:
y
[
1800
+
(
re_id
<<
1
)]
=
rxF
[
3584
+
pos
].
x
*
0.0441941738
;
y
[
1801
+
(
re_id
<<
1
)]
=
rxF
[
3584
+
pos
].
y
*
0.0441941738
;
break
;
case
10
:
if
(
re_id
==
0
)
{
y
[
6000
+
(
re_id
<<
1
)]
=
sig_eng
[
0
];
y
[
6001
+
(
re_id
<<
1
)]
=
sig_eng
[
1
];
}
break
;
default:
y
[
outi
<<
1
]
=
(
short
)(
x
[
ini
].
x
*
one_per_sqart60
);
y
[(
outi
<<
1
)
+
1
]
=
(
short
)(
x
[
ini
].
y
*
one_per_sqart60
);
}
}
__global__
void
k_short_72
(
float2
*
x
,
short
*
y
,
int
*
sig_eng
,
float2
*
rxF
,
unsigned
int
first_rb
)
{
const
float
one_per_sqart72
=
0.117851130198
;
int
outi
=
300
*
blockIdx
.
x
+
threadIdx
.
x
;
int
ini
=
72
*
blockIdx
.
x
+
threadIdx
.
x
;
const
int
symbol_id
=
blockIdx
.
x
;
const
int
re_id
=
threadIdx
.
x
;
int
pos
=
(
362
+
first_rb
*
12
+
re_id
)
%
512
;
pos
=
(
pos
>=
150
&&
pos
<
362
)
?
pos
+
212
:
pos
;
switch
(
symbol_id
)
{
case
3
:
y
[
1800
+
(
re_id
<<
1
)]
=
rxF
[
3584
+
pos
].
x
*
0.0441941738
;
y
[
1801
+
(
re_id
<<
1
)]
=
rxF
[
3584
+
pos
].
y
*
0.0441941738
;
break
;
case
10
:
if
(
re_id
==
0
)
{
y
[
6000
+
(
re_id
<<
1
)]
=
sig_eng
[
0
];
y
[
6001
+
(
re_id
<<
1
)]
=
sig_eng
[
1
];
}
break
;
default:
y
[
outi
<<
1
]
=
(
short
)(
x
[
ini
].
x
*
one_per_sqart72
);
y
[(
outi
<<
1
)
+
1
]
=
(
short
)(
x
[
ini
].
y
*
one_per_sqart72
);
}
}
__global__
void
k_short_96
(
float2
*
x
,
short
*
y
,
int
*
sig_eng
,
float2
*
rxF
,
unsigned
int
first_rb
)
{
const
float
one_per_sqart96
=
0.102062072616
;
int
outi
=
300
*
blockIdx
.
x
+
threadIdx
.
x
;
int
ini
=
96
*
blockIdx
.
x
+
threadIdx
.
x
;
const
int
symbol_id
=
blockIdx
.
x
;
const
int
re_id
=
threadIdx
.
x
;
int
pos
=
(
362
+
first_rb
*
12
+
re_id
)
%
512
;
pos
=
(
pos
>=
150
&&
pos
<
362
)
?
pos
+
212
:
pos
;
switch
(
symbol_id
)
{
case
3
:
y
[
1800
+
(
re_id
<<
1
)]
=
rxF
[
3584
+
pos
].
x
*
0.0441941738
;
y
[
1801
+
(
re_id
<<
1
)]
=
rxF
[
3584
+
pos
].
y
*
0.0441941738
;
break
;
case
10
:
if
(
re_id
==
0
)
{
y
[
6000
+
(
re_id
<<
1
)]
=
sig_eng
[
0
];
y
[
6001
+
(
re_id
<<
1
)]
=
sig_eng
[
1
];
}
break
;
default:
y
[
outi
<<
1
]
=
(
short
)(
x
[
ini
].
x
*
one_per_sqart96
);
y
[(
outi
<<
1
)
+
1
]
=
(
short
)(
x
[
ini
].
y
*
one_per_sqart96
);
}
}
__global__
void
k_short_108
(
float2
*
x
,
short
*
y
,
int
*
sig_eng
,
float2
*
rxF
,
unsigned
int
first_rb
)
{
const
float
one_per_sqart108
=
0.096225044865
;
int
outi
=
300
*
blockIdx
.
x
+
threadIdx
.
x
;
int
ini
=
108
*
blockIdx
.
x
+
threadIdx
.
x
;
const
int
symbol_id
=
blockIdx
.
x
;
const
int
re_id
=
threadIdx
.
x
;
int
pos
=
(
362
+
first_rb
*
12
+
re_id
)
%
512
;
pos
=
(
pos
>=
150
&&
pos
<
362
)
?
pos
+
212
:
pos
;
switch
(
symbol_id
)
{
case
3
:
y
[
1800
+
(
re_id
<<
1
)]
=
rxF
[
3584
+
pos
].
x
*
0.0441941738
;
y
[
1801
+
(
re_id
<<
1
)]
=
rxF
[
3584
+
pos
].
y
*
0.0441941738
;
break
;
case
10
:
if
(
re_id
==
0
)
{
y
[
6000
+
(
re_id
<<
1
)]
=
sig_eng
[
0
];
y
[
6001
+
(
re_id
<<
1
)]
=
sig_eng
[
1
];
}
break
;
default:
y
[
outi
<<
1
]
=
(
short
)(
x
[
ini
].
x
*
one_per_sqart108
);
y
[(
outi
<<
1
)
+
1
]
=
(
short
)(
x
[
ini
].
y
*
one_per_sqart108
);
}
}
__global__
void
k_short_120
(
float2
*
x
,
short
*
y
,
int
*
sig_eng
,
float2
*
rxF
,
unsigned
int
first_rb
)
{
const
float
one_per_sqart120
=
0.0912870929175
;
int
outi
=
300
*
blockIdx
.
x
+
threadIdx
.
x
;
int
ini
=
120
*
blockIdx
.
x
+
threadIdx
.
x
;
const
int
symbol_id
=
blockIdx
.
x
;
const
int
re_id
=
threadIdx
.
x
;
int
pos
=
(
362
+
first_rb
*
12
+
re_id
)
%
512
;
pos
=
(
pos
>=
150
&&
pos
<
362
)
?
pos
+
212
:
pos
;
switch
(
symbol_id
)
{
case
3
:
y
[
1800
+
(
re_id
<<
1
)]
=
rxF
[
3584
+
pos
].
x
*
0.0441941738
;
y
[
1801
+
(
re_id
<<
1
)]
=
rxF
[
3584
+
pos
].
y
*
0.0441941738
;
break
;
case
10
:
if
(
re_id
==
0
)
{
y
[
6000
+
(
re_id
<<
1
)]
=
sig_eng
[
0
];
y
[
6001
+
(
re_id
<<
1
)]
=
sig_eng
[
1
];
}
break
;
default:
y
[
outi
<<
1
]
=
(
short
)(
x
[
ini
].
x
*
one_per_sqart120
);
y
[(
outi
<<
1
)
+
1
]
=
(
short
)(
x
[
ini
].
y
*
one_per_sqart120
);
}
}
__global__
void
k_short_144
(
float2
*
x
,
short
*
y
,
int
*
sig_eng
,
float2
*
rxF
,
unsigned
int
first_rb
)
{
const
float
one_per_sqart144
=
0.083333333333333
;
int
outi
=
300
*
blockIdx
.
x
+
threadIdx
.
x
;
int
ini
=
144
*
blockIdx
.
x
+
threadIdx
.
x
;
const
int
symbol_id
=
blockIdx
.
x
;
const
int
re_id
=
threadIdx
.
x
;
int
pos
=
(
362
+
first_rb
*
12
+
re_id
)
%
512
;
pos
=
(
pos
>=
150
&&
pos
<
362
)
?
pos
+
212
:
pos
;
switch
(
symbol_id
)
{
case
3
:
y
[
1800
+
(
re_id
<<
1
)]
=
rxF
[
3584
+
pos
].
x
*
0.0441941738
;
y
[
1801
+
(
re_id
<<
1
)]
=
rxF
[
3584
+
pos
].
y
*
0.0441941738
;
break
;
case
10
:
if
(
re_id
==
0
)
{
y
[
6000
+
(
re_id
<<
1
)]
=
sig_eng
[
0
];
y
[
6001
+
(
re_id
<<
1
)]
=
sig_eng
[
1
];
}
break
;
default:
y
[
outi
<<
1
]
=
(
short
)(
x
[
ini
].
x
*
one_per_sqart144
);
y
[(
outi
<<
1
)
+
1
]
=
(
short
)(
x
[
ini
].
y
*
one_per_sqart144
);
}
}
__global__
void
k_short_180
(
float2
*
x
,
short
*
y
,
int
*
sig_eng
,
float2
*
rxF
,
unsigned
int
first_rb
)
{
const
float
one_per_sqart180
=
0.07453559925
;
int
outi
=
300
*
blockIdx
.
x
+
threadIdx
.
x
;
int
ini
=
180
*
blockIdx
.
x
+
threadIdx
.
x
;
const
int
symbol_id
=
blockIdx
.
x
;
const
int
re_id
=
threadIdx
.
x
;
int
pos
=
(
362
+
first_rb
*
12
+
re_id
)
%
512
;
pos
=
(
pos
>=
150
&&
pos
<
362
)
?
pos
+
212
:
pos
;
switch
(
symbol_id
)
{
case
3
:
y
[
1800
+
(
re_id
<<
1
)]
=
rxF
[
3584
+
pos
].
x
*
0.0441941738
;
y
[
1801
+
(
re_id
<<
1
)]
=
rxF
[
3584
+
pos
].
y
*
0.0441941738
;
break
;
case
10
:
if
(
re_id
==
0
)
{
y
[
6000
+
(
re_id
<<
1
)]
=
sig_eng
[
0
];
y
[
6001
+
(
re_id
<<
1
)]
=
sig_eng
[
1
];
}
break
;
default:
y
[
outi
<<
1
]
=
(
short
)(
x
[
ini
].
x
*
one_per_sqart180
);
y
[(
outi
<<
1
)
+
1
]
=
(
short
)(
x
[
ini
].
y
*
one_per_sqart180
);
}
}
__global__
void
k_short_192
(
float2
*
x
,
short
*
y
,
int
*
sig_eng
,
float2
*
rxF
,
unsigned
int
first_rb
)
{
const
float
one_per_sqart192
=
0.072168783649
;
int
outi
=
300
*
blockIdx
.
x
+
threadIdx
.
x
;
int
ini
=
192
*
blockIdx
.
x
+
threadIdx
.
x
;
const
int
symbol_id
=
blockIdx
.
x
;
const
int
re_id
=
threadIdx
.
x
;
int
pos
=
(
362
+
first_rb
*
12
+
re_id
)
%
512
;
pos
=
(
pos
>=
150
&&
pos
<
362
)
?
pos
+
212
:
pos
;
switch
(
symbol_id
)
{
case
3
:
y
[
1800
+
(
re_id
<<
1
)]
=
rxF
[
3584
+
pos
].
x
*
0.0441941738
;
y
[
1801
+
(
re_id
<<
1
)]
=
rxF
[
3584
+
pos
].
y
*
0.0441941738
;
break
;
case
10
:
if
(
re_id
==
0
)
{
y
[
6000
+
(
re_id
<<
1
)]
=
sig_eng
[
0
];
y
[
6001
+
(
re_id
<<
1
)]
=
sig_eng
[
1
];
}
break
;
default:
y
[
outi
<<
1
]
=
(
short
)(
x
[
ini
].
x
*
one_per_sqart192
);
y
[(
outi
<<
1
)
+
1
]
=
(
short
)(
x
[
ini
].
y
*
one_per_sqart192
);
}
}
__global__
void
k_short_216
(
float2
*
x
,
short
*
y
,
int
*
sig_eng
,
float2
*
rxF
,
unsigned
int
first_rb
)
{
const
float
one_per_sqart216
=
0.068041381744
;
int
outi
=
300
*
blockIdx
.
x
+
threadIdx
.
x
;
int
ini
=
216
*
blockIdx
.
x
+
threadIdx
.
x
;
const
int
symbol_id
=
blockIdx
.
x
;
const
int
re_id
=
threadIdx
.
x
;
int
pos
=
(
362
+
first_rb
*
12
+
re_id
)
%
512
;
pos
=
(
pos
>=
150
&&
pos
<
362
)
?
pos
+
212
:
pos
;
switch
(
symbol_id
)
{
case
3
:
y
[
1800
+
(
re_id
<<
1
)]
=
rxF
[
3584
+
pos
].
x
*
0.0441941738
;
y
[
1801
+
(
re_id
<<
1
)]
=
rxF
[
3584
+
pos
].
y
*
0.0441941738
;
break
;
case
10
:
if
(
re_id
==
0
)
{
y
[
6000
+
(
re_id
<<
1
)]
=
sig_eng
[
0
];
y
[
6001
+
(
re_id
<<
1
)]
=
sig_eng
[
1
];
}
break
;
default:
y
[
outi
<<
1
]
=
(
short
)(
x
[
ini
].
x
*
one_per_sqart216
);
y
[(
outi
<<
1
)
+
1
]
=
(
short
)(
x
[
ini
].
y
*
one_per_sqart216
);
}
}
__global__
void
k_short_240
(
float2
*
x
,
short
*
y
,
int
*
sig_eng
,
float2
*
rxF
,
unsigned
int
first_rb
)
{
const
float
one_per_sqart240
=
0.0645497224368
;
int
outi
=
300
*
blockIdx
.
x
+
threadIdx
.
x
;
int
ini
=
240
*
blockIdx
.
x
+
threadIdx
.
x
;
const
int
symbol_id
=
blockIdx
.
x
;
const
int
re_id
=
threadIdx
.
x
;
int
pos
=
(
362
+
first_rb
*
12
+
re_id
)
%
512
;
pos
=
(
pos
>=
150
&&
pos
<
362
)
?
pos
+
212
:
pos
;
switch
(
symbol_id
)
{
case
3
:
y
[
1800
+
(
re_id
<<
1
)]
=
rxF
[
3584
+
pos
].
x
*
0.0441941738
;
y
[
1801
+
(
re_id
<<
1
)]
=
rxF
[
3584
+
pos
].
y
*
0.0441941738
;
break
;
case
10
:
if
(
re_id
==
0
)
{
y
[
6000
+
(
re_id
<<
1
)]
=
sig_eng
[
0
];
y
[
6001
+
(
re_id
<<
1
)]
=
sig_eng
[
1
];
}
break
;
default:
y
[
outi
<<
1
]
=
(
short
)(
x
[
ini
].
x
*
one_per_sqart240
);
y
[(
outi
<<
1
)
+
1
]
=
(
short
)(
x
[
ini
].
y
*
one_per_sqart240
);
}
}
__global__
void
k_short_288
(
float2
*
x
,
short
*
y
,
int
*
sig_eng
,
float2
*
rxF
,
unsigned
int
first_rb
)
{
const
float
one_per_sqart288
=
0.0589255651
;
int
outi
=
300
*
blockIdx
.
x
+
threadIdx
.
x
;
int
ini
=
288
*
blockIdx
.
x
+
threadIdx
.
x
;
const
int
symbol_id
=
blockIdx
.
x
;
const
int
re_id
=
threadIdx
.
x
;
int
pos
=
(
362
+
first_rb
*
12
+
re_id
)
%
512
;
pos
=
(
pos
>=
150
&&
pos
<
362
)
?
pos
+
212
:
pos
;
switch
(
symbol_id
)
{
case
3
:
y
[
1800
+
(
re_id
<<
1
)]
=
rxF
[
3584
+
pos
].
x
*
0.0441941738
;
y
[
1801
+
(
re_id
<<
1
)]
=
rxF
[
3584
+
pos
].
y
*
0.0441941738
;
break
;
case
10
:
if
(
re_id
==
0
)
{
y
[
6000
+
(
re_id
<<
1
)]
=
sig_eng
[
0
];
y
[
6001
+
(
re_id
<<
1
)]
=
sig_eng
[
1
];
}
break
;
default:
y
[
outi
<<
1
]
=
(
short
)(
x
[
ini
].
x
*
one_per_sqart288
);
y
[(
outi
<<
1
)
+
1
]
=
(
short
)(
x
[
ini
].
y
*
one_per_sqart288
);
}
}
__global__
void
k_short_300
(
float2
*
x
,
short
*
y
,
int
*
sig_eng
,
float2
*
rxF
,
unsigned
int
first_rb
)
{
const
float
one_per_sqart300
=
0.057735026919
;
const
int
outi
=
300
*
blockIdx
.
x
+
threadIdx
.
x
;
const
int
ini
=
300
*
blockIdx
.
x
+
threadIdx
.
x
;
const
int
symbol_id
=
blockIdx
.
x
;
const
int
re_id
=
threadIdx
.
x
;
int
pos
=
(
362
+
first_rb
*
12
+
re_id
)
%
512
;
pos
=
(
pos
>=
150
&&
pos
<
362
)
?
pos
+
212
:
pos
;
switch
(
symbol_id
)
{
case
3
:
y
[
1800
+
(
re_id
<<
1
)]
=
rxF
[
3584
+
pos
].
x
*
0.0441941738
;
y
[
1801
+
(
re_id
<<
1
)]
=
rxF
[
3584
+
pos
].
y
*
0.0441941738
;
break
;
case
10
:
if
(
re_id
==
0
)
{
y
[
6000
]
=
sig_eng
[
0
];
y
[
6001
]
=
sig_eng
[
1
];
}
break
;
default:
y
[
outi
<<
1
]
=
(
short
)(
x
[
ini
].
x
*
one_per_sqart300
);
y
[(
outi
<<
1
)
+
1
]
=
(
short
)(
x
[
ini
].
y
*
one_per_sqart300
);
}
}
openair1/PHY/CUDA/MODULATION/defs.h
0 → 100644
View file @
82b32199
#ifndef __DEFS_CU__H__
#define __DEFS_CU__H__
#include <stdint.h>
#include <stdio.h>
#ifndef CUFFT_H
#define CUFFT_H
#include "cufft.h"
#endif
//typedef float2 Complex;
#ifdef __cplusplus
extern
"C"
#endif
void
idft512ad_cu
(
short
*
,
short
*
,
int
);
#ifdef __cplusplus
extern
"C"
#endif
void
dft512rm_cu
(
short
*
,
short
*
,
int
);
#endif
openair1/PHY/CUDA/MODULATION/lte_dfts.cu
0 → 100644
View file @
82b32199
#include "stdio.h"
#include "cufft.h"
#include "defs.h"
#include "PHY/CUDA/extern.h"
typedef
float2
Complex
;
//for dftXXXrm
__global__
void
k_rmcp
(
int16_t
*
x
,
Complex
*
y
,
int
CP
,
int
CP0
)
{
int
i
=
blockDim
.
x
*
blockIdx
.
x
+
threadIdx
.
x
;
int
j
=
(
blockDim
.
x
+
CP
)
*
blockIdx
.
x
+
threadIdx
.
x
+
CP0
;
if
(
blockIdx
.
x
>
6
)
j
=
j
+
CP0
-
CP
;
y
[
i
].
x
=
(
float
)
x
[(
j
<<
1
)];
y
[
i
].
y
=
(
float
)
x
[(
j
<<
1
)
+
1
];
}
//for dftXXXrm
__global__
void
k_short
(
Complex
*
x
,
short
*
y
)
{
int
i
=
blockDim
.
x
*
blockIdx
.
x
+
threadIdx
.
x
;
y
[
i
<<
1
]
=
(
short
)(
x
[
i
].
x
*
0.04419417
);
//for divide sqrt(512)
y
[(
i
<<
1
)
+
1
]
=
(
short
)(
x
[
i
].
y
*
0.04419417
);
}
__global__
void
k_adcp_extend
(
short
*
x
,
Complex
*
y
)
{
int
i
=
blockDim
.
x
*
blockIdx
.
x
+
threadIdx
.
x
;
y
[
i
].
x
=
(
float
)
x
[
(
i
<<
1
)
];
y
[
i
].
y
=
(
float
)
x
[
(
i
<<
1
)
+
1
];
}
__global__
void
k_test
(
Complex
*
x
)
{
int
tid
=
threadIdx
.
x
;
int
bid
=
blockIdx
.
x
;
x
[
bid
*
blockDim
.
x
+
tid
].
x
=
tid
*
22
;
x
[
bid
*
blockDim
.
x
+
tid
].
y
=
bid
*
22
;
}
void
idft512ad_cu
(
int16_t
*
x
,
int16_t
*
y
,
int
sf
)
{
//dl_cu
int
i
;
// printf("[CUDA] IN idft, sf num = %2d\n",sf);
cudaMemcpyAsync
(
dl_cu
[
sf
].
d_txdata
,
x
,
sizeof
(
short
)
*
2
*
dl_cu
[
sf
].
ifftsize
*
dl_cu
[
sf
].
symbols_per_tti
,
cudaMemcpyHostToDevice
,
dl_cu
[
sf
].
stream_dl
);
k_adcp_extend
<<<
dl_cu
[
sf
].
symbols_per_tti
,
dl_cu
[
sf
].
ifftsize
,
0
,
dl_cu
[
sf
].
stream_dl
>>>
(
dl_cu
[
sf
].
d_txdata
,
dl_cu
[
sf
].
d_txdata_ifft
);
cufftExecC2C
(
dl_cu
[
sf
].
ifft
,
(
cufftComplex
*
)
dl_cu
[
sf
].
d_txdata_ifft
,
(
cufftComplex
*
)
dl_cu
[
sf
].
d_txdata_ifft
,
CUFFT_INVERSE
);
k_short
<<<
dl_cu
[
sf
].
symbols_per_tti
,
dl_cu
[
sf
].
ifftsize
,
0
,
dl_cu
[
sf
].
stream_dl
>>>
(
dl_cu
[
sf
].
d_txdata_ifft
,
dl_cu
[
sf
].
d_txdata
);
cudaMemcpyAsync
(
dl_cu
[
sf
].
h_txdata
,
dl_cu
[
sf
].
d_txdata
,
sizeof
(
short
)
*
2
*
dl_cu
[
sf
].
ifftsize
*
dl_cu
[
sf
].
symbols_per_tti
,
cudaMemcpyDeviceToHost
,
dl_cu
[
sf
].
stream_dl
);
int
index
=
0
;
short
*
temp
=
dl_cu
[
sf
].
h_txdata
;
cudaStreamSynchronize
(
dl_cu
[
sf
].
stream_dl
);
for
(
i
=
0
;
i
<
dl_cu
[
sf
].
symbols_per_tti
;
i
++
)
{
int
cp
=
0
;
if
(
i
==
0
||
i
==
7
)
cp
=
dl_cu
[
sf
].
CP0
;
else
cp
=
dl_cu
[
sf
].
CP
;
memcpy
(
&
y
[
index
<<
1
],
&
temp
[
(
i
+
1
)
*
dl_cu
[
sf
].
ifftsize
*
2
-
cp
*
2
],
cp
*
sizeof
(
short
)
*
2
);
memcpy
(
&
y
[
(
index
+
cp
)
<<
1
],
&
temp
[
i
*
dl_cu
[
sf
].
ifftsize
*
2
],
dl_cu
[
sf
].
ifftsize
*
2
*
sizeof
(
short
)
);
index
=
index
+
cp
+
dl_cu
[
sf
].
ifftsize
;
}
}
void
dft512rm_cu
(
int16_t
*
x
,
int16_t
*
y
,
int
sf
)
{
//printf("enter DFT\n");
cudaEvent_t
startEvent
,
stopEvent
;
cudaEventCreate
(
&
startEvent
);
cudaEventCreate
(
&
stopEvent
);
cudaEventRecord
(
startEvent
,
0
);
cudaMemcpyAsync
(
ul_cu
[
sf
].
d_rxdata
[
0
],
x
,
sizeof
(
short
)
*
ul_cu
[
sf
].
samples_per_tti
*
2
,
cudaMemcpyHostToDevice
,
ul_cu
[
sf
].
stream_ul
);
k_rmcp
<<<
ul_cu
[
sf
].
symbols_per_tti
,
ul_cu
[
sf
].
fftsize
,
0
,
ul_cu
[
sf
].
stream_ul
>>>
((
short
*
)
ul_cu
[
sf
].
d_rxdata
[
0
],
ul_cu
[
sf
].
d_rxdata_fft
[
0
],
36
,
40
);
cufftExecC2C
(
ul_cu
[
sf
].
fft
,
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_fft
[
0
],
(
cufftComplex
*
)
ul_cu
[
sf
].
d_rxdata_fft
[
0
],
CUFFT_FORWARD
);
k_short
<<<
ul_cu
[
sf
].
symbols_per_tti
,
ul_cu
[
sf
].
fftsize
,
0
,
ul_cu
[
sf
].
stream_ul
>>>
(
ul_cu
[
sf
].
d_rxdata_fft
[
0
],
(
short
*
)
ul_cu
[
sf
].
d_rxdataF
[
0
]);
cudaStreamSynchronize
(
ul_cu
[
sf
].
stream_ul
);
cudaMemcpyAsync
(
y
,
ul_cu
[
sf
].
d_rxdataF
[
0
],
sizeof
(
short
)
*
ul_cu
[
sf
].
symbols_per_tti
*
2
*
ul_cu
[
sf
].
fftsize
,
cudaMemcpyDeviceToHost
,
ul_cu
[
sf
].
stream_ul
);
cudaStreamSynchronize
(
ul_cu
[
sf
].
stream_ul
);
float
time
;
cudaEventRecord
(
stopEvent
,
0
);
cudaEventSynchronize
(
stopEvent
);
cudaEventElapsedTime
(
&
time
,
startEvent
,
stopEvent
);
//printf("[GPU] end of DFT %f\n",time);
cudaEventDestroy
(
startEvent
);
cudaEventDestroy
(
stopEvent
);
}
openair1/PHY/CUDA/defs.h
0 → 100644
View file @
82b32199
#include "PHY/defs.h"
#include "PHY/extern.h"
#include "PHY/CUDA/LTE_TRANSPORT/defs.h"
void
rx_ulsch_cu
(
PHY_VARS_eNB
*
phy_vars_eNB
,
uint32_t
sched_subframe
,
uint8_t
eNB_id
,
// this is the effective sector id
uint8_t
UE_id
,
LTE_eNB_ULSCH_t
**
ulsch
,
uint8_t
cooperation_flag
);
\ No newline at end of file
openair1/PHY/CUDA/defs_struct.h
0 → 100644
View file @
82b32199
#include <stdint.h>
#include <stdio.h>
#ifndef CUFFT_H
#define CUFFT_H
#include "cufft.h"
#endif
typedef
struct
{
float2
*
d_ul_ref_sigs_rx
[
30
][
2
][
33
];
}
estimation_const_t
;
typedef
struct
{
short
u1
;
short
v1
;
short
u2
;
short
v2
;
short
Msc_RS_idx
;
short
cyclic_shift1
;
short
cyclic_shift2
;
short
Msc_RS
;
}
para_ulsch
;
typedef
struct
{
unsigned
int
first_rb
;
unsigned
short
first_carrier_offset
;
short
N_RB_UL
;
unsigned
short
nb_rb1
;
unsigned
short
nb_rb2
;
short
fftsize
;
}
ext_rbs
;
typedef
struct
{
cufftHandle
fft12
;
cufftHandle
fft24
;
cufftHandle
fft36
;
cufftHandle
fft48
;
cufftHandle
fft60
;
cufftHandle
fft72
;
cufftHandle
fft84
;
cufftHandle
fft96
;
cufftHandle
fft108
;
cufftHandle
fft120
;
cufftHandle
fft132
;
cufftHandle
fft144
;
cufftHandle
fft156
;
cufftHandle
fft168
;
cufftHandle
fft180
;
cufftHandle
fft192
;
cufftHandle
fft204
;
cufftHandle
fft216
;
cufftHandle
fft228
;
cufftHandle
fft240
;
cufftHandle
fft252
;
cufftHandle
fft264
;
cufftHandle
fft276
;
cufftHandle
fft288
;
cufftHandle
fft300
;
}
fftHandle
;
typedef
struct
{
cudaStream_t
stream_ul
;
cudaStream_t
timing_advance
;
cudaStream_t
tempstrm
;
cufftHandle
fft
;
cufftHandle
ifft_timing_advance
;
fftHandle
idft
;
int
**
d_rxdata
;
float2
**
d_rxdata_fft
;
int
**
d_rxdataF
;
int
**
d_rxdata_comp_int
;
float2
**
d_rxdata_comp
;
float2
**
d_drs_ch
;
int
**
d_drs_ch_int
;
int
**
d_ulsch_power
;
float2
**
d_rxdata_ext
;
int
**
d_rxdata_ext_int
;
short
N_RB_UL
;
short
nb_antennas_rx
;
short
symbols_per_tti
;
short
samples_per_tti
;
short
Ncp
;
short
fftsize
;
short
CP
;
short
CP0
;
}
ul_cu_t
;
typedef
struct
{
cudaStream_t
stream_dl
;
cufftHandle
ifft
;
short
*
d_txdata
;
short
*
d_txdata_o
;
float2
*
d_txdata_ifft
;
short
*
h_txdata
;
short
symbols_per_tti
;
short
samples_per_tti
;
short
Ncp
;
short
ifftsize
;
short
CP
;
short
CP0
;
}
dl_cu_t
;
\ No newline at end of file
openair1/PHY/CUDA/extern.h
0 → 100644
View file @
82b32199
#ifndef __MODULATION_EXTERN_CU_H__
#define __MODULATION_EXTERN_CU_H_
#include "defs_struct.h"
#include <stdint.h>
#include <stdio.h>
#ifndef CUFFT_H
#define CUFFT_H
#include "cufft.h"
#endif
extern
dl_cu_t
dl_cu
[
10
];
extern
ul_cu_t
ul_cu
[
10
];
extern
estimation_const_t
esti_const
;
extern
int
device_count
;
extern
para_ulsch
ulsch_para
[
10
];
extern
ext_rbs
ext_rbs_para
[
10
];
#endif
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment