Commit 394f0deb authored by Sy's avatar Sy

bnProcPc and bnProc unrolled | BN treatment, small improvement in times at...

bnProcPc and bnProc unrolled | BN treatment,  small improvement in times at the expense of degraded performance in BLER , reordering of generated files
parent 55e32f6e
...@@ -139,7 +139,7 @@ else (CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7l") ...@@ -139,7 +139,7 @@ else (CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7l")
if(EXISTS "/proc/cpuinfo") if(EXISTS "/proc/cpuinfo")
file(STRINGS "/proc/cpuinfo" CPUINFO REGEX flags LIMIT_COUNT 1) file(STRINGS "/proc/cpuinfo" CPUINFO REGEX flags LIMIT_COUNT 1)
if (CPUINFO MATCHES "avx512bw") if (CPUINFO MATCHES "avx512bw")
set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -mavx512bw -march=skylake-avx512 -mtune=skylake-avx512" ) set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -mavx512bw -march=skylake-avx512 -mtune=skylake-avx512 " )
set(COMPILATION_AVX2 "True") set(COMPILATION_AVX2 "True")
else() else()
if (CPUINFO MATCHES "avx2") if (CPUINFO MATCHES "avx2")
......
SNR BLER BER UNCODED_BER ENCODER_MEAN ENCODER_STD ENCODER_MAX DECODER_TIME_MEAN DECODER_TIME_STD DECODER_TIME_MAX DECODER_ITER_MEAN DECODER_ITER_STD DECODER_ITER_MAX
-2.000000 1.000000 0.207386 0.181660 350.462328 0.000002 350.462328 163.684321 -nan 163.684321 5.000000 0.000000 5
-1.900000 1.000000 0.203362 0.181424 49.691355 0.000000 49.691355 106.612229 -nan 106.612229 5.000000 0.000000 5
-1.800000 1.000000 0.203243 0.174874 41.156574 0.000001 41.156574 106.419533 0.000001 106.419533 5.000000 0.000000 5
-1.700000 1.000000 0.199692 0.176373 41.576557 0.000000 41.576557 106.249571 0.000001 106.249571 5.000000 0.000000 5
-1.600000 1.000000 0.191525 0.162247 41.056596 -nan 41.056596 105.518964 0.000001 105.518964 5.000000 0.000000 5
-1.500000 1.000000 0.193774 0.164378 41.485219 -nan 41.485219 106.890835 0.000001 106.890835 5.000000 0.000000 5
-1.400000 1.000000 0.192235 0.164852 41.377230 0.000000 41.377230 106.884179 0.000002 106.884179 5.000000 0.000000 5
-1.300000 1.000000 0.189157 0.157434 41.882535 -nan 41.882535 118.201437 0.000001 118.201437 5.000000 0.000000 5
-1.200000 1.000000 0.189394 0.156487 40.273742 0.000000 40.273742 108.077486 0.000001 108.077486 5.000000 0.000000 5
-1.100000 1.000000 0.180279 0.151042 41.475883 0.000000 41.475883 105.804899 0.000002 105.804899 5.000000 0.000000 5
-1.000000 1.000000 0.185133 0.150016 41.863188 -nan 41.863188 105.778227 -nan 105.778227 5.000000 0.000000 5
-0.900000 1.000000 0.180043 0.148280 41.788530 -nan 41.788530 106.533519 -nan 106.533519 5.000000 0.000000 5
-0.800000 1.000000 0.181700 0.148990 41.606549 -nan 41.606549 122.353146 -nan 122.353146 5.000000 0.000000 5
-0.700000 1.000000 0.176136 0.143071 40.958788 0.000001 40.958788 107.100677 0.000001 107.100677 5.000000 0.000000 5
-0.600000 1.000000 0.173414 0.142124 40.818434 -nan 40.818434 114.571862 0.000001 114.571862 5.000000 0.000000 5
-0.500000 1.000000 0.169744 0.136758 41.645212 -nan 41.645212 106.311549 -nan 106.311549 5.000000 0.000000 5
-0.400000 1.000000 0.170928 0.134549 42.533159 0.000000 42.533159 106.571549 0.000002 106.571549 5.000000 0.000000 5
-0.300000 1.000000 0.166312 0.131629 42.162494 -nan 42.162494 106.150186 0.000001 106.150186 5.000000 0.000000 5
-0.200000 1.000000 0.163116 0.134549 41.555882 -nan 41.555882 105.973565 -nan 105.973565 5.000000 0.000000 5
-0.100000 1.000000 0.165009 0.127525 41.716528 0.000000 41.716528 117.108125 -nan 117.108125 5.000000 0.000000 5
0.000000 1.000000 0.153646 0.125868 42.135839 0.000001 42.135839 106.297533 0.000001 106.297533 5.000000 0.000000 5
0.100000 1.000000 0.149503 0.116319 41.863859 0.000001 41.863859 105.946896 -nan 105.946896 5.000000 0.000000 5
0.200000 1.000000 0.151160 0.116162 41.518563 0.000000 41.518563 106.233579 0.000001 106.233579 5.000000 0.000000 5
0.300000 1.000000 0.151752 0.114899 108.694179 -nan 108.694179 116.888156 -nan 116.888156 5.000000 0.000000 5
0.400000 1.000000 0.148319 0.114268 76.352135 -nan 76.352135 107.328026 0.000002 107.328026 5.000000 0.000000 5
0.500000 1.000000 0.143229 0.108823 56.976621 -nan 56.976621 106.411111 0.000002 106.411111 5.000000 0.000000 5
0.600000 1.000000 0.140152 0.105429 43.605068 -nan 43.605068 129.179314 -nan 129.179314 5.000000 0.000000 5
0.700000 1.000000 0.142401 0.108665 42.849130 -nan 42.849130 106.146889 0.000002 106.146889 5.000000 0.000000 5
0.800000 1.000000 0.134115 0.102983 41.805179 -nan 41.805179 105.963518 0.000001 105.963518 5.000000 0.000000 5
0.900000 1.000000 0.142164 0.105193 41.508383 -nan 41.508383 105.615827 -nan 105.615827 5.000000 0.000000 5
1.000000 1.000000 0.131392 0.095802 41.079894 -nan 41.079894 106.092172 0.000001 106.092172 5.000000 0.000000 5
1.100000 1.000000 0.133404 0.095565 41.256573 -nan 41.256573 106.134900 0.000001 106.134900 5.000000 0.000000 5
1.200000 1.000000 0.131984 0.093277 41.155656 0.000001 41.155656 105.820640 0.000001 105.820640 5.000000 0.000000 5
1.300000 1.000000 0.125473 0.089883 40.216642 -nan 40.216642 105.099633 -nan 105.099633 5.000000 0.000000 5
1.400000 1.000000 0.115649 0.083333 52.048960 0.000000 52.048960 106.145774 -nan 106.145774 5.000000 0.000000 5
1.500000 1.000000 0.128433 0.083807 40.463956 -nan 40.463956 105.743583 -nan 105.743583 5.000000 0.000000 5
1.600000 1.000000 0.122869 0.082071 41.049216 0.000001 41.049216 105.206197 0.000001 105.206197 5.000000 0.000000 5
1.700000 1.000000 0.122988 0.080492 41.913872 -nan 41.913872 104.823010 -nan 104.823010 5.000000 0.000000 5
1.800000 1.000000 0.117898 0.078362 40.812444 0.000000 40.812444 105.950499 -nan 105.950499 5.000000 0.000000 5
1.900000 1.000000 0.117069 0.078598 41.428562 0.000000 41.428562 105.008310 0.000002 105.008310 5.000000 0.000000 5
2.000000 1.000000 0.109730 0.070865 40.903956 -nan 40.903956 106.049516 -nan 106.049516 5.000000 0.000000 5
2.100000 1.000000 0.110677 0.071496 40.365980 0.000000 40.365980 105.550308 -nan 105.550308 5.000000 0.000000 5
2.200000 1.000000 0.106061 0.066840 40.098505 0.000001 40.098505 116.386458 -nan 116.386458 5.000000 0.000000 5
2.300000 1.000000 0.107836 0.066840 41.630557 -nan 41.630557 105.830275 0.000001 105.830275 5.000000 0.000000 5
2.400000 1.000000 0.099313 0.063131 40.906595 0.000000 40.906595 105.513603 0.000001 105.513603 5.000000 0.000000 5
2.500000 1.000000 0.100734 0.061711 41.383906 0.000001 41.383906 106.573557 -nan 106.573557 5.000000 0.000000 5
2.600000 1.000000 0.096828 0.056108 41.539890 -nan 41.539890 105.375623 -nan 105.375623 5.000000 0.000000 5
2.700000 1.000000 0.097301 0.059975 40.686462 0.000000 40.686462 106.816461 0.000001 106.816461 5.000000 0.000000 5
2.800000 1.000000 0.094342 0.055319 40.302460 0.000001 40.302460 106.105103 -nan 106.105103 5.000000 0.000000 5
2.900000 1.000000 0.096946 0.053977 40.671276 -nan 40.671276 105.606260 -nan 105.606260 5.000000 0.000000 5
3.000000 1.000000 0.086529 0.050821 41.382575 -nan 41.382575 106.268916 0.000001 106.268916 5.000000 0.000000 5
3.100000 1.000000 0.088068 0.046796 40.087170 0.000001 40.087170 106.017849 -nan 106.017849 5.000000 0.000000 5
3.200000 1.000000 0.088778 0.048690 41.208411 -nan 41.208411 106.485115 -nan 106.485115 5.000000 0.000000 5
3.300000 1.000000 0.088660 0.045376 46.839523 0.000000 46.839523 105.171609 -nan 105.171609 5.000000 0.000000 5
3.400000 1.000000 0.086056 0.043876 41.234103 0.000000 41.234103 105.971413 -nan 105.971413 5.000000 0.000000 5
3.500000 1.000000 0.085346 0.044271 51.914538 0.000000 51.914538 105.250315 -nan 105.250315 5.000000 0.000000 5
3.600000 1.000000 0.080611 0.039615 39.788348 0.000000 39.788348 105.681404 0.000001 105.681404 5.000000 0.000000 5
3.700000 1.000000 0.081676 0.037405 41.516546 -nan 41.516546 106.541514 -nan 106.541514 5.000000 0.000000 5
3.800000 1.000000 0.082623 0.038826 41.013898 -nan 41.013898 105.181567 0.000002 105.181567 5.000000 0.000000 5
3.900000 1.000000 0.077770 0.034564 40.907250 -nan 40.907250 105.474242 -nan 105.474242 5.000000 0.000000 5
4.000000 1.000000 0.075166 0.032434 40.525805 -nan 40.525805 106.856454 -nan 106.856454 5.000000 0.000000 5
4.100000 1.000000 0.073745 0.030619 41.863868 0.000001 41.863868 104.952984 -nan 104.952984 5.000000 0.000000 5
4.200000 1.000000 0.072206 0.028093 40.012507 0.000000 40.012507 105.829858 0.000001 105.829858 5.000000 0.000000 5
4.300000 1.000000 0.073509 0.029040 41.265241 -nan 41.265241 106.866855 -nan 106.866855 5.000000 0.000000 5
4.400000 1.000000 0.070312 0.028251 42.004048 -nan 42.004048 104.724794 -nan 104.724794 5.000000 0.000000 5
4.500000 1.000000 0.071023 0.026515 40.553139 -nan 40.553139 106.168510 -nan 106.168510 5.000000 0.000000 5
4.600000 1.000000 0.070076 0.025016 41.453892 -nan 41.453892 105.339616 -nan 105.339616 5.000000 0.000000 5
4.700000 1.000000 0.064631 0.023438 42.209825 0.000000 42.209825 105.972866 -nan 105.972866 5.000000 0.000000 5
4.800000 1.000000 0.063920 0.022727 41.778528 0.000001 41.778528 106.402189 0.000001 106.402189 5.000000 0.000000 5
4.900000 1.000000 0.063447 0.018782 40.201824 0.000001 40.201824 106.177825 0.000001 106.177825 5.000000 0.000000 5
5.000000 1.000000 0.056937 0.017677 42.106087 -nan 42.106087 106.188160 0.000000 106.188160 5.000000 0.00000
\ No newline at end of file
This diff is collapsed.
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
#include <string.h> #include <string.h>
#include "nrLDPCdecoder_defs.h" #include "nrLDPCdecoder_defs.h"
#include <omp.h> //#include <omp.h>
/** /**
\brief Circular memcpy1 \brief Circular memcpy1
|<- rem->|<- circular shift ->| |<- rem->|<- circular shift ->|
...@@ -1026,7 +1026,7 @@ static inline void nrLDPC_bn2cnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf ...@@ -1026,7 +1026,7 @@ static inline void nrLDPC_bn2cnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
// CN group with 3 BNs // CN group with 3 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[0]*NR_LDPC_ZMAX; bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[0]*NR_LDPC_ZMAX;
#pragma omp simd
for (j=0;j<2; j++) for (j=0;j<2; j++)
{ {
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[0] + j*bitOffsetInGroup]; p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[0] + j*bitOffsetInGroup];
...@@ -1037,11 +1037,11 @@ static inline void nrLDPC_bn2cnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf ...@@ -1037,11 +1037,11 @@ static inline void nrLDPC_bn2cnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
// CN group with 4 BNs // CN group with 4 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[1]*NR_LDPC_ZMAX; bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[1]*NR_LDPC_ZMAX;
// #pragma omp simd
for (j=0; j<3; j++) for (j=0; j<3; j++)
{ {
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[1] + j*bitOffsetInGroup]; p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[1] + j*bitOffsetInGroup];
#pragma omp simd
for (i=0; i<lut_numCnInCnGroups[1]; i++) for (i=0; i<lut_numCnInCnGroups[1]; i++)
{ {
idxBn = lut_startAddrBnProcBuf_CNG4[j][i] + lut_bnPosBnProcBuf_CNG4[j][i]*Z; idxBn = lut_startAddrBnProcBuf_CNG4[j][i] + lut_bnPosBnProcBuf_CNG4[j][i]*Z;
...@@ -1053,11 +1053,11 @@ static inline void nrLDPC_bn2cnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf ...@@ -1053,11 +1053,11 @@ static inline void nrLDPC_bn2cnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
// CN group with 5 BNs // CN group with 5 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[2]*NR_LDPC_ZMAX; bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[2]*NR_LDPC_ZMAX;
// #pragma omp simd
for (j=0; j<4; j++) for (j=0; j<4; j++)
{ {
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[2] + j*bitOffsetInGroup]; p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[2] + j*bitOffsetInGroup];
#pragma omp simd
for (i=0; i<lut_numCnInCnGroups[2]; i++) for (i=0; i<lut_numCnInCnGroups[2]; i++)
{ {
idxBn = lut_startAddrBnProcBuf_CNG5[j][i] + lut_bnPosBnProcBuf_CNG5[j][i]*Z; idxBn = lut_startAddrBnProcBuf_CNG5[j][i] + lut_bnPosBnProcBuf_CNG5[j][i]*Z;
...@@ -1070,11 +1070,11 @@ static inline void nrLDPC_bn2cnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf ...@@ -1070,11 +1070,11 @@ static inline void nrLDPC_bn2cnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
// CN group with 6 BNs // CN group with 6 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[3]*NR_LDPC_ZMAX; bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[3]*NR_LDPC_ZMAX;
//#pragma omp simd
for (j=0; j<5; j++) for (j=0; j<5; j++)
{ {
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[3] + j*bitOffsetInGroup]; p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[3] + j*bitOffsetInGroup];
#pragma omp simd
for (i=0; i<lut_numCnInCnGroups[3]; i++) for (i=0; i<lut_numCnInCnGroups[3]; i++)
{ {
idxBn = lut_startAddrBnProcBuf_CNG6[j][i] + lut_bnPosBnProcBuf_CNG6[j][i]*Z; idxBn = lut_startAddrBnProcBuf_CNG6[j][i] + lut_bnPosBnProcBuf_CNG6[j][i]*Z;
...@@ -1087,11 +1087,12 @@ static inline void nrLDPC_bn2cnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf ...@@ -1087,11 +1087,12 @@ static inline void nrLDPC_bn2cnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
// CN group with 7 BNs // CN group with 7 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[4]*NR_LDPC_ZMAX; bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[4]*NR_LDPC_ZMAX;
//#pragma omp simd
for (j=0; j<6; j++) for (j=0; j<6; j++)
{ {
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[4] + j*bitOffsetInGroup]; p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[4] + j*bitOffsetInGroup];
#pragma omp simd
for (i=0; i<lut_numCnInCnGroups[4]; i++) for (i=0; i<lut_numCnInCnGroups[4]; i++)
{ {
idxBn = lut_startAddrBnProcBuf_CNG7[j][i] + lut_bnPosBnProcBuf_CNG7[j][i]*Z; idxBn = lut_startAddrBnProcBuf_CNG7[j][i] + lut_bnPosBnProcBuf_CNG7[j][i]*Z;
...@@ -1104,6 +1105,7 @@ static inline void nrLDPC_bn2cnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf ...@@ -1104,6 +1105,7 @@ static inline void nrLDPC_bn2cnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
// CN group with 8 BNs // CN group with 8 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[5]*NR_LDPC_ZMAX; bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[5]*NR_LDPC_ZMAX;
#pragma omp simd
for (j=0; j<7; j++) for (j=0; j<7; j++)
{ {
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[5] + j*bitOffsetInGroup]; p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[5] + j*bitOffsetInGroup];
...@@ -1119,10 +1121,11 @@ static inline void nrLDPC_bn2cnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf ...@@ -1119,10 +1121,11 @@ static inline void nrLDPC_bn2cnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
// CN group with 9 BNs // CN group with 9 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[6]*NR_LDPC_ZMAX; bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[6]*NR_LDPC_ZMAX;
// #pragma omp simd
for (j=0; j<8; j++) for (j=0; j<8; j++)
{ {
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[6] + j*bitOffsetInGroup]; p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[6] + j*bitOffsetInGroup];
#pragma omp simd
for (i=0; i<lut_numCnInCnGroups[6]; i++) for (i=0; i<lut_numCnInCnGroups[6]; i++)
{ {
idxBn = lut_startAddrBnProcBuf_CNG9[j][i] + lut_bnPosBnProcBuf_CNG9[j][i]*Z; idxBn = lut_startAddrBnProcBuf_CNG9[j][i] + lut_bnPosBnProcBuf_CNG9[j][i]*Z;
...@@ -1135,10 +1138,11 @@ static inline void nrLDPC_bn2cnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf ...@@ -1135,10 +1138,11 @@ static inline void nrLDPC_bn2cnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
// CN group with 10 BNs // CN group with 10 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[7]*NR_LDPC_ZMAX; bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[7]*NR_LDPC_ZMAX;
//#pragma omp simd
for (j=0; j<9; j++) for (j=0; j<9; j++)
{ {
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[7] + j*bitOffsetInGroup]; p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[7] + j*bitOffsetInGroup];
#pragma omp simd
for (i=0; i<lut_numCnInCnGroups[7]; i++) for (i=0; i<lut_numCnInCnGroups[7]; i++)
{ {
idxBn = lut_startAddrBnProcBuf_CNG10[j][i] + lut_bnPosBnProcBuf_CNG10[j][i]*Z; idxBn = lut_startAddrBnProcBuf_CNG10[j][i] + lut_bnPosBnProcBuf_CNG10[j][i]*Z;
...@@ -1151,10 +1155,11 @@ static inline void nrLDPC_bn2cnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf ...@@ -1151,10 +1155,11 @@ static inline void nrLDPC_bn2cnProcBuf_BG1(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
// CN group with 19 BNs // CN group with 19 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[8]*NR_LDPC_ZMAX; bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[8]*NR_LDPC_ZMAX;
//#pragma omp simd
for (j=0; j<19; j++) for (j=0; j<19; j++)
{ {
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[8] + j*bitOffsetInGroup]; p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[8] + j*bitOffsetInGroup];
#pragma omp simd
for (i=0; i<lut_numCnInCnGroups[8]; i++) for (i=0; i<lut_numCnInCnGroups[8]; i++)
{ {
idxBn = lut_startAddrBnProcBuf_CNG19[j][i] + lut_bnPosBnProcBuf_CNG19[j][i]*Z; idxBn = lut_startAddrBnProcBuf_CNG19[j][i] + lut_bnPosBnProcBuf_CNG19[j][i]*Z;
......
C=gcc
CFLAGS=-W -Wall -mavx2
LDFLAGS=
EXEC=bnProc_gen_avx2
SRC= $(wildcard *.c)
OBJ= $(SRC:.c=.o)
all: $(EXEC)
bnProc_gen_avx2: $(OBJ)
@$(CC) -o $@ $^ $(LDFLAGS) -O2
%.o: %.c
@$(CC) -o $@ -c $< $(CFLAGS) -I ${OPENAIR_HOME}/openair1 -g -std=c99
.PHONY: clean mrproper
clean:
@rm -rf *.o
mrproper: clean
@rm -rf $(EXEC)
zip:
@tar -zcvf sauvegarde.tar.gz main.c bnProcPc_gen_BG1_avx2.c bnProcPc_gen_BG2_avx2.c bnProc_gen_BG1_avx2.c bnProc_gen_BG2_avx2.c Makefile
#include <stdio.h>
#include<stdint.h>
#define NB_Z 51
void nrLDPC_bnProcPc_BG1_generator_AVX2(uint16_t,int);
void nrLDPC_bnProcPc_BG2_generator_AVX2(uint16_t,int);
void nrLDPC_bnProc_BG1_generator_AVX2(uint16_t,int);
void nrLDPC_bnProc_BG2_generator_AVX2(uint16_t,int);
int main()
{
uint16_t Z[NB_Z]={2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,18,20,22,24,26,28,30,32,36,40,44,48,52,56,60,64,72,80,88,96,104,112,120,128,144,160,176,192,208,224,240,256,288,320,352,384};
for(int i=0; i<NB_Z;i++){
//bnProcPc
nrLDPC_bnProcPc_BG1_generator_AVX2(Z[i], 0);
nrLDPC_bnProcPc_BG2_generator_AVX2(Z[i],0);
//bnProc
nrLDPC_bnProc_BG1_generator_AVX2(Z[i],0);
nrLDPC_bnProc_BG2_generator_AVX2(Z[i],0);
}
return(0);
}
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#include "../../nrLDPCdecoder_defs.h" #include "../../nrLDPCdecoder_defs.h"
#include "../../nrLDPC_types.h" #include "../../nrLDPC_types.h"
#include "../../nrLDPC_bnProc.h" #include "../../nrLDPC_bnProc.h"
//#include "cnProc_gen_avx2.h"
void nrLDPC_cnProc_BG1_generator_AVX2(uint16_t Z,int R) void nrLDPC_cnProc_BG1_generator_AVX2(uint16_t Z,int R)
{ {
...@@ -15,14 +15,14 @@ void nrLDPC_cnProc_BG1_generator_AVX2(uint16_t Z,int R) ...@@ -15,14 +15,14 @@ void nrLDPC_cnProc_BG1_generator_AVX2(uint16_t Z,int R)
// system("mkdir -p ldpc_gen_files/avx2"); // system("mkdir -p ldpc_gen_files/avx2");
char fname[50]; char fname[50];
sprintf(fname,"../ldpc_gen_files/avx2/nrLDPC_cnProc_BG1_Z%d_%s_AVX2.c",Z,ratestr[R]); sprintf(fname,"../ldpc_gen_files/cnProc/nrLDPC_cnProc_BG1_Z%d_R%s_AVX2.c",Z,ratestr[R]);
FILE *fd=fopen(fname,"w"); FILE *fd=fopen(fname,"w");
if (fd == NULL) {printf("Cannot create \n");abort();} if (fd == NULL) {printf("Cannot create \n");abort();}
fprintf(fd,"#include <stdint.h>\n"); fprintf(fd,"#include <stdint.h>\n");
fprintf(fd,"#include <immintrin.h>\n"); fprintf(fd,"#include <immintrin.h>\n");
fprintf(fd,"void nrLDPC_cnProc_BG1_Z%d_%s_AVX2(int8_t* cnProcBuf,int8_t* cnProcBufRes) {\n",Z,ratestr[R]); fprintf(fd,"void nrLDPC_cnProc_BG1_Z%d_R%s_AVX2(int8_t* cnProcBuf,int8_t* cnProcBufRes) {\n",Z,ratestr[R]);
const uint8_t* lut_numCnInCnGroups; const uint8_t* lut_numCnInCnGroups;
const uint32_t* lut_startAddrCnGroups = lut_startAddrCnGroups_BG1; const uint32_t* lut_startAddrCnGroups = lut_startAddrCnGroups_BG1;
......
...@@ -15,13 +15,13 @@ void nrLDPC_cnProc_BG2_generator_AVX2(uint16_t Z,int R) ...@@ -15,13 +15,13 @@ void nrLDPC_cnProc_BG2_generator_AVX2(uint16_t Z,int R)
// system("mkdir -p ldpc_gen_files/avx2"); // system("mkdir -p ldpc_gen_files/avx2");
char fname[50]; char fname[50];
sprintf(fname,"../ldpc_gen_files/avx2/nrLDPC_cnProc_BG2_Z%d_%s_AVX2.c",Z,ratestr[R]); sprintf(fname,"../ldpc_gen_files/cnProc/nrLDPC_cnProc_BG2_Z%d_R%s_AVX2.c",Z,ratestr[R]);
FILE *fd=fopen(fname,"w"); FILE *fd=fopen(fname,"w");
if (fd == NULL) {printf("Cannot create \n");abort();} if (fd == NULL) {printf("Cannot create \n");abort();}
fprintf(fd,"#include <stdint.h>\n"); fprintf(fd,"#include <stdint.h>\n");
fprintf(fd,"#include <immintrin.h>\n"); fprintf(fd,"#include <immintrin.h>\n");
fprintf(fd,"void nrLDPC_cnProc_BG2_Z%d_%s_AVX2(int8_t* cnProcBuf,int8_t* cnProcBufRes) {\n",Z,ratestr[R]); fprintf(fd,"void nrLDPC_cnProc_BG2_Z%d_R%s_AVX2(int8_t* cnProcBuf,int8_t* cnProcBufRes) {\n",Z,ratestr[R]);
const uint8_t* lut_numCnInCnGroups; const uint8_t* lut_numCnInCnGroups;
const uint32_t* lut_startAddrCnGroups = lut_startAddrCnGroups_BG2; const uint32_t* lut_startAddrCnGroups = lut_startAddrCnGroups_BG2;
......
#include <stdio.h> #include <stdio.h>
#include <immintrin.h> #include <stdint.h>
#include "../../nrLDPC_types.h"
#include "../../nrLDPC_init.h"
#include "../../nrLDPC_bnProc.h"
#define NB_Z 51 #define NB_Z 51
void nrLDPC_cnProc_BG1_generator_AVX2(uint16_t,int); void nrLDPC_cnProc_BG1_generator_AVX2(uint16_t,int);
void nrLDPC_cnProc_BG2_generator_AVX2(uint16_t,int); void nrLDPC_cnProc_BG2_generator_AVX2(uint16_t,int);
int main() int main()
{ {
uint16_t Z[NB_Z]={2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,18,20,22,24,26,28,30,32,36,40,44,48,52,56,60,64,72,80,88,96,104,112,120,128,144,160,176,192,208,224,240,256,288,320,352,384}; uint16_t Z[NB_Z]={2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,18,20,22,24,26,28,30,32,36,40,44,48,52,56,60,64,72,80,88,96,104,112,120,128,144,160,176,192,208,224,240,256,288,320,352,384};
...@@ -15,6 +13,8 @@ int main() ...@@ -15,6 +13,8 @@ int main()
nrLDPC_cnProc_BG1_generator_AVX2(Z[i],0); nrLDPC_cnProc_BG1_generator_AVX2(Z[i],0);
nrLDPC_cnProc_BG2_generator_AVX2(Z[i],0); nrLDPC_cnProc_BG2_generator_AVX2(Z[i],0);
} }
return(0); return(0);
......
...@@ -580,3 +580,4 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R) ...@@ -580,3 +580,4 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
}//end of the function nrLDPC_cnProc_BG1 }//end of the function nrLDPC_cnProc_BG1
...@@ -13,7 +13,7 @@ void nrLDPC_cnProc_BG2_generator_AVX512(uint16_t Z,int R) ...@@ -13,7 +13,7 @@ void nrLDPC_cnProc_BG2_generator_AVX512(uint16_t Z,int R)
// system("mkdir -p ../ldpc_gen_files"); // system("mkdir -p ../ldpc_gen_files");
char fname[50]; char fname[50];
sprintf(fname,"../ldpc_gen_files/nrLDPC_cnProc_BG2_Z%d_%s_AVX512.c",Z,ratestr[R]); sprintf(fname,"../ldpc_gen_files/nrLDPC_cnProc_BG2_Z%d_R%s_AVX512.c",Z,ratestr[R]);
FILE *fd=fopen(fname,"w"); FILE *fd=fopen(fname,"w");
if (fd == NULL) {printf("Cannot create \n");abort();} if (fd == NULL) {printf("Cannot create \n");abort();}
...@@ -24,7 +24,7 @@ void nrLDPC_cnProc_BG2_generator_AVX512(uint16_t Z,int R) ...@@ -24,7 +24,7 @@ void nrLDPC_cnProc_BG2_generator_AVX512(uint16_t Z,int R)
fprintf(fd, "#define conditional_negate(a,b,z) _mm512_mask_sub_epi8(a,_mm512_movepi8_mask(b),z,a)\n"); fprintf(fd, "#define conditional_negate(a,b,z) _mm512_mask_sub_epi8(a,_mm512_movepi8_mask(b),z,a)\n");
fprintf(fd,"void nrLDPC_cnProc_BG2_Z%d_%s_AVX512(int8_t* cnProcBuf,int8_t* cnProcBufRes) {\n",Z,ratestr[R]); fprintf(fd,"void nrLDPC_cnProc_BG2_Z%d_R%s_AVX512(int8_t* cnProcBuf,int8_t* cnProcBufRes) {\n",Z,ratestr[R]);
const uint8_t* lut_numCnInCnGroups; const uint8_t* lut_numCnInCnGroups;
const uint32_t* lut_startAddrCnGroups = lut_startAddrCnGroups_BG2; const uint32_t* lut_startAddrCnGroups = lut_startAddrCnGroups_BG2;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment