Commit 140fa111 authored by Sy's avatar Sy

file generated for CN processing according to the values of Z | version for AVX2 and AVX512

parent ad32b610
...@@ -167,7 +167,7 @@ endif() ...@@ -167,7 +167,7 @@ endif()
# #
set(CMAKE_C_FLAGS set(CMAKE_C_FLAGS
"${CMAKE_C_FLAGS} ${C_FLAGS_PROCESSOR} -std=gnu99 -Wall -Wstrict-prototypes -fno-strict-aliasing -rdynamic -funroll-loops -Wno-packed-bitfield-compat -fPIC -mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512ifma -mavx512vbmi") "${CMAKE_C_FLAGS} ${C_FLAGS_PROCESSOR} -std=gnu99 -Wall -fopenmp -fopenmp-simd -Wstrict-prototypes -fno-strict-aliasing -rdynamic -funroll-loops -Wno-packed-bitfield-compat -fPIC")
# add autotools definitions that were maybe used! # add autotools definitions that were maybe used!
set(MKVER "'MAKE_VERSION(a,b,c)=((a)*256+(b)*16+c)'") set(MKVER "'MAKE_VERSION(a,b,c)=((a)*256+(b)*16+c)'")
set(CMAKE_C_FLAGS set(CMAKE_C_FLAGS
...@@ -1305,31 +1305,27 @@ set(PHY_TURBOIF ...@@ -1305,31 +1305,27 @@ set(PHY_TURBOIF
set(PHY_LDPC_ORIG_SRC set(PHY_LDPC_ORIG_SRC
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder.c ${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder.c
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_encoder/ldpc_encoder.c ${OPENAIR1_DIR}/PHY/CODING/nrLDPC_encoder/ldpc_encoder.c
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/nrLDPC_cnProc_BG1_Z384_13_AVX2.c # ${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/nrLDPC_cnProc_BG1_Z384_13_AVX2.c
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/nrLDPC_cnProc_BG1_Z384_13_AVX512.c # ${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/nrLDPC_cnProc_BG1_Z384_13_AVX512.c
) )
set(PHY_LDPC_OPTIM_SRC set(PHY_LDPC_OPTIM_SRC
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder.c ${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder.c
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_encoder/ldpc_encoder_optim.c ${OPENAIR1_DIR}/PHY/CODING/nrLDPC_encoder/ldpc_encoder_optim.c
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/nrLDPC_cnProc_BG1_Z384_13_AVX2.c # ${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/nrLDPC_cnProc_BG1_Z384_13_AVX2.c
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/nrLDPC_cnProc_BG1_Z384_13_AVX512.c #${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/nrLDPC_cnProc_BG1_Z384_13_AVX512.c
) )
set(PHY_LDPC_OPTIM8SEG_SRC set(PHY_LDPC_OPTIM8SEG_SRC
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder.c ${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder.c
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_encoder/ldpc_encoder_optim8seg.c ${OPENAIR1_DIR}/PHY/CODING/nrLDPC_encoder/ldpc_encoder_optim8seg.c
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/nrLDPC_cnProc_BG1_Z384_13_AVX2.c # ${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/nrLDPC_cnProc_BG1_Z384_13_AVX2.c
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/nrLDPC_cnProc_BG1_Z384_13_AVX2.c #${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/nrLDPC_cnProc_BG1_Z384_13_AVX512.c
) )
set(PHY_LDPC_OPTIM8SEGMULTI_SRC set(PHY_LDPC_OPTIM8SEGMULTI_SRC
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder.c ${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder.c
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_encoder/ldpc_encoder_optim8segmulti.c ${OPENAIR1_DIR}/PHY/CODING/nrLDPC_encoder/ldpc_encoder_optim8segmulti.c
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/nrLDPC_cnProc_BG1_Z384_13_AVX2.c # ${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/nrLDPC_cnProc_BG1_Z384_13_AVX2.c
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/nrLDPC_cnProc_BG1_Z384_13_AVX512.c # ${OPENAIR1_DIR}/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/nrLDPC_cnProc_BG1_Z384_13_AVX512.
) )
set(PHY_NR_CODINGIF set(PHY_NR_CODINGIF
${OPENAIR1_DIR}/PHY/CODING/nrLDPC_load.c; ${OPENAIR1_DIR}/PHY/CODING/nrLDPC_load.c;
......
%
% Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
% contributor license agreements. See the NOTICE file distributed with
% this work for additional information regarding copyright ownership.
% The OpenAirInterface Software Alliance licenses this file to You under
% the OAI Public License, Version 1.1 (the "License"); you may not use this file
% except in compliance with the License.
% You may obtain a copy of the License at
%
% http://www.openairinterface.org/?page_id=698
%
% Unless required by applicable law or agreed to in writing, software
% distributed under the License is distributed on an "AS IS" BASIS,
% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
% See the License for the specific language governing permissions and
% limitations under the License.
%-------------------------------------------------------------------------------
% For more information about the OpenAirInterface (OAI) Software Alliance:
% contact@openairinterface.org
%
@online{3gpp5gTimeline,
author = {3GPP},
title = {{3GPP 5G Timeline}},
year = 2016,
urldate = {2017-06-14},
url = {http://www.3gpp.org/images/articleimages/5g_timeline.jpg}
}
@techreport{3gppTR38913,
author = "{Technical Specification Group Radio Access Network}",
title = "{Study on Scenarios and Requirements for Next Generation Access Technologies}",
institution = "{3GPP TR 38.913 V14.2.0}",
month = mar,
year = 2017,
};
@techreport{iturM2038,
author = "{ITU-R}",
title = "{IMT Vision -- Framework and overall objectives of the future development of IMT for 2020 and beyond}",
institution = "{Radiocommunication Sector of ITU}",
month = sep,
year = 2015,
};
@techreport{3gpp2014seb,
author = "{Samsung, Nokia Networks}",
title = "{New SID Proposal: Study on Elevation Beamforming/Full-Dimension (FD) MIMO for LTE}",
institution = "3GPP",
month = sep,
year = 2014,
};
@techreport{3gpp2015fdm,
author = "{Technical Specification Group Radio Access Network}",
title = "{Study on elevation beamforming / Full-Dimension (FD) Multiple Input Multiple Output (MIMO) for LTE}",
institution = "3GPP TR 36.897 V13.0.0",
month = jun,
year = 2015,
};
@techreport{3gpp2008tsg,
author = "{Technical Specification Group Radio Access Network;
Evolved Universal Terrestrial Radio Access (E-UTRA)}",
title = "{Further advancements for E-UTRA physical layer aspects (Release 9)}",
institution = "{3GPP TR 36.814 V9.0.0}",
month = mar,
year = 2010,
};
@techreport{3gpp2011uer,
author = "{Technical Specification Group Radio Access Network;
Evolved Universal Terrestrial Radio Access (E-UTRA)}",
title = "{User Equipment (UE) Radio Transmission and Reception}",
institution = "{3GPP TR 36.101 V10.3.0}",
month = jun,
year = 2011,
};
@techreport{3gpp2009_36211,
author = "{3rd Generation Partnership Project}",
title = "{Physical Channels and Modulation (Release 8)}",
institution = "{3GPP TS 36.211 V8.6.0}",
month = mar,
year = 2009,
};
@techreport{3gpp2017_38212,
author = "{3rd Generation Partnership Project}",
title = "{Multiplexing and channel coding (Release 15)}",
institution = "{3GPP TS 38.212 V15.0.1}",
month = mar,
year = 2018,
};
@article{gallager1962low,
title={Low-density parity-check codes},
author={Gallager, Robert},
journal={IRE Transactions on information theory},
volume={8},
number={1},
pages={21--28},
year={1962},
publisher={IEEE}
}
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -2745,8 +2745,8 @@ static inline void nrLDPC_llr2bit(int8_t* out, int8_t* llrOut, uint16_t numLLR) ...@@ -2745,8 +2745,8 @@ static inline void nrLDPC_llr2bit(int8_t* out, int8_t* llrOut, uint16_t numLLR)
uint32_t M = numLLR>>5; uint32_t M = numLLR>>5;
uint32_t Mr = numLLR&31; uint32_t Mr = numLLR&31;
const __m256i* p_zeros = (__m256i*) zeros512_epi8; const __m256i* p_zeros = (__m256i*) zeros256_epi8;
const __m256i* p_ones = (__m256i*) ones512_epi8; const __m256i* p_ones = (__m256i*) ones256_epi8;
for (i=0; i<M; i++) for (i=0; i<M; i++)
{ {
......
...@@ -108,9 +108,9 @@ static inline void nrLDPC_cnProc_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_pr ...@@ -108,9 +108,9 @@ static inline void nrLDPC_cnProc_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_pr
// Store result // Store result
min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127 min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
//*p_cnProcBufResBit = _mm256_sign_epi8(min, sgn); *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
//p_cnProcBufResBit++; p_cnProcBufResBit++;
p_cnProcBufResBit[i]=_mm256_sign_epi8(min, sgn); //p_cnProcBufResBit[i]=_mm256_sign_epi8(min, sgn);
} }
} }
} }
......
...@@ -34,15 +34,15 @@ ...@@ -34,15 +34,15 @@
#include <stdlib.h> #include <stdlib.h>
#include "nrLDPC_types.h" #include "nrLDPC_types.h"
#ifndef malloc64_clear #ifndef malloc32_clear
/** /**
\brief Allocates 64 byte aligned memory and initializes to zero \brief Allocates 32 byte aligned memory and initializes to zero
\param size Input size in bytes \param size Input size in bytes
\return Pointer to memory \return Pointer to memory
*/ */
static inline void* malloc64_clear(size_t size) static inline void* malloc32_clear(size_t size)
{ {
void* ptr = (void*) memalign(64, size+64); void* ptr = (void*) memalign(32, size+32);
memset(ptr, 0, size); memset(ptr, 0, size);
return ptr; return ptr;
} }
...@@ -56,16 +56,16 @@ static inline void* malloc64_clear(size_t size) ...@@ -56,16 +56,16 @@ static inline void* malloc64_clear(size_t size)
*/ */
static inline t_nrLDPC_procBuf* nrLDPC_init_mem(void) static inline t_nrLDPC_procBuf* nrLDPC_init_mem(void)
{ {
t_nrLDPC_procBuf* p_procBuf = (t_nrLDPC_procBuf*) malloc64_clear(sizeof(t_nrLDPC_procBuf)); t_nrLDPC_procBuf* p_procBuf = (t_nrLDPC_procBuf*) malloc32_clear(sizeof(t_nrLDPC_procBuf));
if (p_procBuf) if (p_procBuf)
{ {
p_procBuf->cnProcBuf = (int8_t*) malloc64_clear(NR_LDPC_SIZE_CN_PROC_BUF*sizeof(int8_t)); p_procBuf->cnProcBuf = (int8_t*) malloc32_clear(NR_LDPC_SIZE_CN_PROC_BUF*sizeof(int8_t));
p_procBuf->cnProcBufRes = (int8_t*) malloc64_clear(NR_LDPC_SIZE_CN_PROC_BUF*sizeof(int8_t)); p_procBuf->cnProcBufRes = (int8_t*) malloc32_clear(NR_LDPC_SIZE_CN_PROC_BUF*sizeof(int8_t));
p_procBuf->bnProcBuf = (int8_t*) malloc64_clear(NR_LDPC_SIZE_BN_PROC_BUF*sizeof(int8_t)); p_procBuf->bnProcBuf = (int8_t*) malloc32_clear(NR_LDPC_SIZE_BN_PROC_BUF*sizeof(int8_t));
p_procBuf->bnProcBufRes = (int8_t*) malloc64_clear(NR_LDPC_SIZE_BN_PROC_BUF*sizeof(int8_t)); p_procBuf->bnProcBufRes = (int8_t*) malloc32_clear(NR_LDPC_SIZE_BN_PROC_BUF*sizeof(int8_t));
p_procBuf->llrRes = (int8_t*) malloc64_clear(NR_LDPC_MAX_NUM_LLR *sizeof(int8_t)); p_procBuf->llrRes = (int8_t*) malloc32_clear(NR_LDPC_MAX_NUM_LLR *sizeof(int8_t));
p_procBuf->llrProcBuf = (int8_t*) malloc64_clear(NR_LDPC_MAX_NUM_LLR *sizeof(int8_t)); p_procBuf->llrProcBuf = (int8_t*) malloc32_clear(NR_LDPC_MAX_NUM_LLR *sizeof(int8_t));
} }
return(p_procBuf); return(p_procBuf);
......
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
#include <string.h> #include <string.h>
#include "nrLDPCdecoder_defs.h" #include "nrLDPCdecoder_defs.h"
//#include <omp.h>
/** /**
\brief Circular memcpy \brief Circular memcpy
|<- rem->|<- circular shift ->| |<- rem->|<- circular shift ->|
...@@ -171,6 +171,9 @@ static inline void nrLDPC_llr2CnProcBuf_BG1(t_nrLDPC_lut* p_lut, int8_t* llr, t_ ...@@ -171,6 +171,9 @@ static inline void nrLDPC_llr2CnProcBuf_BG1(t_nrLDPC_lut* p_lut, int8_t* llr, t_
bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[0]*NR_LDPC_ZMAX; bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[0]*NR_LDPC_ZMAX;
// #pragma omp simd
// #pragma omp parallel for schedule(dynamic)
for (j=0; j<3; j++) for (j=0; j<3; j++)
{ {
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[0] + j*bitOffsetInGroup]; p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[0] + j*bitOffsetInGroup];
...@@ -202,8 +205,10 @@ static inline void nrLDPC_llr2CnProcBuf_BG1(t_nrLDPC_lut* p_lut, int8_t* llr, t_ ...@@ -202,8 +205,10 @@ static inline void nrLDPC_llr2CnProcBuf_BG1(t_nrLDPC_lut* p_lut, int8_t* llr, t_
// ===================================================================== // =====================================================================
// CN group with 5 BNs // CN group with 5 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[2]*NR_LDPC_ZMAX; bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[2]*NR_LDPC_ZMAX;
for (j=0; j<5; j++) for (j=0; j<5; j++)
{ {
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[2] + j*bitOffsetInGroup]; p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[2] + j*bitOffsetInGroup];
...@@ -236,8 +241,10 @@ static inline void nrLDPC_llr2CnProcBuf_BG1(t_nrLDPC_lut* p_lut, int8_t* llr, t_ ...@@ -236,8 +241,10 @@ static inline void nrLDPC_llr2CnProcBuf_BG1(t_nrLDPC_lut* p_lut, int8_t* llr, t_
// ===================================================================== // =====================================================================
// CN group with 7 BNs // CN group with 7 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[4]*NR_LDPC_ZMAX; bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[4]*NR_LDPC_ZMAX;
for (j=0; j<7; j++) for (j=0; j<7; j++)
{ {
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[4] + j*bitOffsetInGroup]; p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[4] + j*bitOffsetInGroup];
...@@ -255,6 +262,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG1(t_nrLDPC_lut* p_lut, int8_t* llr, t_ ...@@ -255,6 +262,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG1(t_nrLDPC_lut* p_lut, int8_t* llr, t_
bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[5]*NR_LDPC_ZMAX; bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[5]*NR_LDPC_ZMAX;
for (j=0; j<8; j++) for (j=0; j<8; j++)
{ {
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[5] + j*bitOffsetInGroup]; p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[5] + j*bitOffsetInGroup];
...@@ -304,6 +312,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG1(t_nrLDPC_lut* p_lut, int8_t* llr, t_ ...@@ -304,6 +312,7 @@ static inline void nrLDPC_llr2CnProcBuf_BG1(t_nrLDPC_lut* p_lut, int8_t* llr, t_
// ===================================================================== // =====================================================================
// CN group with 19 BNs // CN group with 19 BNs
bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[8]*NR_LDPC_ZMAX; bitOffsetInGroup = lut_numCnInCnGroups_BG1_R13[8]*NR_LDPC_ZMAX;
for (j=0; j<19; j++) for (j=0; j<19; j++)
......
...@@ -8,20 +8,20 @@ OBJ= $(SRC:.c=.o) ...@@ -8,20 +8,20 @@ OBJ= $(SRC:.c=.o)
all: $(EXEC) all: $(EXEC)
cnProc_gen_avx2: $(OBJ) cnProc_gen_avx2: $(OBJ)
$(CC) -o $@ $^ $(LDFLAGS) -O2 -pg @$(CC) -o $@ $^ $(LDFLAGS) -O2
main.o: cnProc_gen_avx2.h #main.o: cnProc_gen_avx2.h
%.o: %.c %.o: %.c
$(CC) -o $@ -c $< $(CFLAGS) -I ${OPENAIR_HOME}/openair1 -g -pg @$(CC) -o $@ -c $< $(CFLAGS) -I ${OPENAIR_HOME}/openair1 -g
.PHONY: clean mrproper .PHONY: clean mrproper
clean: clean:
rm -rf *.o @rm -rf *.o
mrproper: clean mrproper: clean
rm -rf $(EXEC) @rm -rf $(EXEC)
zip: zip:
tar -zcvf sauvegarde.tar.gz main.c cnProc_gen_avx2.c cnProc_gen_avx2.h Makefile @tar -zcvf sauvegarde.tar.gz main.c cnProc_gen_avx2.c cnProc_gen_avx2.h Makefile
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#include "../../nrLDPCdecoder_defs.h" #include "../../nrLDPCdecoder_defs.h"
#include "../../nrLDPC_types.h" #include "../../nrLDPC_types.h"
#include "../../nrLDPC_bnProc.h" #include "../../nrLDPC_bnProc.h"
#include "cnProc_gen_avx2.h" //#include "cnProc_gen_avx2.h"
void nrLDPC_cnProc_BG1_generator_AVX2(uint16_t Z,int R) void nrLDPC_cnProc_BG1_generator_AVX2(uint16_t Z,int R)
{ {
...@@ -12,12 +12,12 @@ void nrLDPC_cnProc_BG1_generator_AVX2(uint16_t Z,int R) ...@@ -12,12 +12,12 @@ void nrLDPC_cnProc_BG1_generator_AVX2(uint16_t Z,int R)
if (R<0 || R>2) {printf("Illegal R %d\n",R); abort();} if (R<0 || R>2) {printf("Illegal R %d\n",R); abort();}
// system("mkdir -p ldpc_gen_files"); // system("mkdir -p ldpc_gen_files/avx2");
char fname[50]; char fname[50];
sprintf(fname,"../ldpc_gen_files/nrLDPC_cnProc_BG1_Z%d_%s_AVX2.c",Z,ratestr[R]); sprintf(fname,"../ldpc_gen_files/avx2/nrLDPC_cnProc_BG1_Z%d_%s_AVX2.c",Z,ratestr[R]);
FILE *fd=fopen(fname,"w"); FILE *fd=fopen(fname,"w");
if (fd == NULL) {printf("Cannot create %s\n");abort();} if (fd == NULL) {printf("Cannot create \n");abort();}
fprintf(fd,"#include <stdint.h>\n"); fprintf(fd,"#include <stdint.h>\n");
fprintf(fd,"#include <immintrin.h>\n"); fprintf(fd,"#include <immintrin.h>\n");
...@@ -79,7 +79,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(uint16_t Z,int R) ...@@ -79,7 +79,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(uint16_t Z,int R)
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[0]]; //p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[0]];
// Loop over every BN // Loop over every BN
int iprime=0;
for (j=0; j<3; j++) for (j=0; j<3; j++)
{ {
// Set of results pointer to correct BN address // Set of results pointer to correct BN address
...@@ -165,7 +165,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(uint16_t Z,int R) ...@@ -165,7 +165,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(uint16_t Z,int R)
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]]; //p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN // Loop over every BN
int iprime=0;
for (j=0; j<4; j++) for (j=0; j<4; j++)
{ {
// Set of results pointer to correct BN address // Set of results pointer to correct BN address
...@@ -230,7 +230,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(uint16_t Z,int R) ...@@ -230,7 +230,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(uint16_t Z,int R)
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]]; //p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN // Loop over every BN
int iprime=0;
for (j=0; j<5; j++) for (j=0; j<5; j++)
{ {
// Set of results pointer to correct BN address // Set of results pointer to correct BN address
...@@ -295,7 +295,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(uint16_t Z,int R) ...@@ -295,7 +295,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(uint16_t Z,int R)
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]]; //p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN // Loop over every BN
int iprime=0;
for (j=0; j<6; j++) for (j=0; j<6; j++)
{ {
// Set of results pointer to correct BN address // Set of results pointer to correct BN address
...@@ -363,7 +363,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(uint16_t Z,int R) ...@@ -363,7 +363,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(uint16_t Z,int R)
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]]; //p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN // Loop over every BN
int iprime=0;
for (j=0; j<7; j++) for (j=0; j<7; j++)
{ {
// Set of results pointer to correct BN address // Set of results pointer to correct BN address
...@@ -432,7 +432,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(uint16_t Z,int R) ...@@ -432,7 +432,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(uint16_t Z,int R)
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]]; //p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN // Loop over every BN
int iprime=0;
for (j=0; j<8; j++) for (j=0; j<8; j++)
{ {
// Set of results pointer to correct BN address // Set of results pointer to correct BN address
...@@ -502,7 +502,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(uint16_t Z,int R) ...@@ -502,7 +502,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(uint16_t Z,int R)
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]]; //p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN // Loop over every BN
int iprime=0;
for (j=0; j<9; j++) for (j=0; j<9; j++)
{ {
// Set of results pointer to correct BN address // Set of results pointer to correct BN address
...@@ -572,7 +572,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(uint16_t Z,int R) ...@@ -572,7 +572,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(uint16_t Z,int R)
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]]; //p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN // Loop over every BN
int iprime=0;
for (j=0; j<10; j++) for (j=0; j<10; j++)
{ {
// Set of results pointer to correct BN address // Set of results pointer to correct BN address
...@@ -645,7 +645,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(uint16_t Z,int R) ...@@ -645,7 +645,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(uint16_t Z,int R)
//p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]]; //p_cnProcBufRes = (__m256i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN // Loop over every BN
int iprime=0;
for (j=0; j<19; j++) for (j=0; j<19; j++)
{ {
// Set of results pointer to correct BN address // Set of results pointer to correct BN address
......
#ifndef NRLDPC_CN_GEN
#define NRLDPC_CN_GEN
void nrLDPC_cnProc_BG1_generator_AVX2(uint16_t Z,int R);
#endif
...@@ -2,16 +2,18 @@ ...@@ -2,16 +2,18 @@
#include <immintrin.h> #include <immintrin.h>
#include "../../nrLDPC_types.h" #include "../../nrLDPC_types.h"
#include "../../nrLDPC_init.h" #include "../../nrLDPC_init.h"
#include "../../nrLDPC_bnProc.h" #include "../../nrLDPC_bnProc.h"
#include "cnProc_gen_avx2.h" #define NB_Z 51
void nrLDPC_cnProc_BG1_generator_AVX2(uint16_t,int);
int main(int argc, char *argv []) int main()
{ {
uint16_t Z[NB_Z]={2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,18,20,22,24,26,28,30,32,36,40,44,48,52,56,60,64,72,80,88,96,104,112,120,128,144,160,176,192,208,224,240,256,288,320,352,384};
// Z=384, R=1/3
nrLDPC_cnProc_BG1_generator_AVX2(384,0);
for(int i=0; i<NB_Z;i++){
nrLDPC_cnProc_BG1_generator_AVX2(Z[i],0);
}
return(0); return(0);
} }
...@@ -8,20 +8,20 @@ OBJ= $(SRC:.c=.o) ...@@ -8,20 +8,20 @@ OBJ= $(SRC:.c=.o)
all: $(EXEC) all: $(EXEC)
cnProc_gen_avx512: $(OBJ) cnProc_gen_avx512: $(OBJ)
$(CC) -o $@ $^ $(LDFLAGS) -O2 -pg @$(CC) -o $@ $^ $(LDFLAGS) -O2
main.o: cnProc_gen_avx512.h #main.o: cnProc_gen_avx512.h
%.o: %.c %.o: %.c
$(CC) -o $@ -c $< $(CFLAGS) -I ${OPENAIR_HOME}/openair1 -g -pg @$(CC) -o $@ -c $< $(CFLAGS) -I ${OPENAIR_HOME}/openair1 -g
.PHONY: clean mrproper .PHONY: clean mrproper
clean: clean:
rm -rf *.o @rm -rf *.o
mrproper: clean mrproper: clean
rm -rf $(EXEC) @rm -rf $(EXEC)
zip: zip:
tar -zcvf sauvegarde.tar.gz main.c cnProc_gen_avx512.c cnProc_gen_avx512.h Makefile tar -zcvf sauvegarde.tar.gz main.c cnProc_gen_avx512.c Makefile
...@@ -3,7 +3,6 @@ ...@@ -3,7 +3,6 @@
#include "../../nrLDPCdecoder_defs.h" #include "../../nrLDPCdecoder_defs.h"
#include "../../nrLDPC_types.h" #include "../../nrLDPC_types.h"
#include "../../nrLDPC_bnProc.h" #include "../../nrLDPC_bnProc.h"
#include "cnProc_gen_avx512.h"
void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R) void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
{ {
...@@ -12,12 +11,12 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R) ...@@ -12,12 +11,12 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
if (R<0 || R>2) {printf("Illegal R %d\n",R); abort();} if (R<0 || R>2) {printf("Illegal R %d\n",R); abort();}
// system("mkdir -p ldpc_gen_files"); // system("mkdir -p ../ldpc_gen_files");
char fname[50]; char fname[50];
sprintf(fname,"../ldpc_gen_files/nrLDPC_cnProc_BG1_Z%d_%s_AVX512.c",Z,ratestr[R]); sprintf(fname,"../ldpc_gen_files/nrLDPC_cnProc_BG1_Z%d_%s_AVX512.c",Z,ratestr[R]);
FILE *fd=fopen(fname,"w"); FILE *fd=fopen(fname,"w");
if (fd == NULL) {printf("Cannot create %s\n");abort();} if (fd == NULL) {printf("Cannot create \n");abort();}
fprintf(fd,"#include <stdint.h>\n"); fprintf(fd,"#include <stdint.h>\n");
fprintf(fd,"#include <immintrin.h>\n"); fprintf(fd,"#include <immintrin.h>\n");
...@@ -92,7 +91,7 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R) ...@@ -92,7 +91,7 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
//p_cnProcBufRes = (__m512i*) &cnProcBufRes[lut_startAddrCnGroups[0]]; //p_cnProcBufRes = (__m512i*) &cnProcBufRes[lut_startAddrCnGroups[0]];
// Loop over every BN // Loop over every BN
int iprime=0;
for (j=0; j<3; j++) for (j=0; j<3; j++)
{ {
// Set of results pointer to correct BN address // Set of results pointer to correct BN address
...@@ -178,7 +177,6 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R) ...@@ -178,7 +177,6 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
//p_cnProcBufRes = (__m512i*) &cnProcBufRes[lut_startAddrCnGroups[1]]; //p_cnProcBufRes = (__m512i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN // Loop over every BN
int iprime=0;
for (j=0; j<4; j++) for (j=0; j<4; j++)
{ {
// Set of results pointer to correct BN address // Set of results pointer to correct BN address
...@@ -243,7 +241,7 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R) ...@@ -243,7 +241,7 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
//p_cnProcBufRes = (__m512i*) &cnProcBufRes[lut_startAddrCnGroups[1]]; //p_cnProcBufRes = (__m512i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN // Loop over every BN
int iprime=0;
for (j=0; j<5; j++) for (j=0; j<5; j++)
{ {
// Set of results pointer to correct BN address // Set of results pointer to correct BN address
...@@ -308,7 +306,7 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R) ...@@ -308,7 +306,7 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
//p_cnProcBufRes = (__m512i*) &cnProcBufRes[lut_startAddrCnGroups[1]]; //p_cnProcBufRes = (__m512i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN // Loop over every BN
int iprime=0;
for (j=0; j<6; j++) for (j=0; j<6; j++)
{ {
// Set of results pointer to correct BN address // Set of results pointer to correct BN address
...@@ -375,7 +373,7 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R) ...@@ -375,7 +373,7 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
//p_cnProcBufRes = (__m512i*) &cnProcBufRes[lut_startAddrCnGroups[1]]; //p_cnProcBufRes = (__m512i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN // Loop over every BN
int iprime=0;
for (j=0; j<7; j++) for (j=0; j<7; j++)
{ {
// Set of results pointer to correct BN address // Set of results pointer to correct BN address
...@@ -443,7 +441,7 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R) ...@@ -443,7 +441,7 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
//p_cnProcBufRes = (__m512i*) &cnProcBufRes[lut_startAddrCnGroups[1]]; //p_cnProcBufRes = (__m512i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN // Loop over every BN
int iprime=0;
for (j=0; j<8; j++) for (j=0; j<8; j++)
{ {
// Set of results pointer to correct BN address // Set of results pointer to correct BN address
...@@ -512,7 +510,7 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R) ...@@ -512,7 +510,7 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
//p_cnProcBufRes = (__m512i*) &cnProcBufRes[lut_startAddrCnGroups[1]]; //p_cnProcBufRes = (__m512i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN // Loop over every BN
int iprime=0;
for (j=0; j<9; j++) for (j=0; j<9; j++)
{ {
// Set of results pointer to correct BN address // Set of results pointer to correct BN address
...@@ -582,7 +580,7 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R) ...@@ -582,7 +580,7 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
//p_cnProcBufRes = (__m512i*) &cnProcBufRes[lut_startAddrCnGroups[1]]; //p_cnProcBufRes = (__m512i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN // Loop over every BN
int iprime=0;
for (j=0; j<10; j++) for (j=0; j<10; j++)
{ {
// Set of results pointer to correct BN address // Set of results pointer to correct BN address
...@@ -655,7 +653,7 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R) ...@@ -655,7 +653,7 @@ void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R)
//p_cnProcBufRes = (__m512i*) &cnProcBufRes[lut_startAddrCnGroups[1]]; //p_cnProcBufRes = (__m512i*) &cnProcBufRes[lut_startAddrCnGroups[1]];
// Loop over every BN // Loop over every BN
int iprime=0;
for (j=0; j<19; j++) for (j=0; j<19; j++)
{ {
// Set of results pointer to correct BN address // Set of results pointer to correct BN address
......
#ifndef NRLDPC_CN_GEN
#define NRLDPC_CN_GEN
void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t Z,int R);
#endif
#include <stdio.h> #include <stdio.h>
#include <immintrin.h> #include <immintrin.h>
//#include "../nrLDPCdecoder_defs.h"
#include "../../nrLDPC_types.h" #include "../../nrLDPC_types.h"
#include "../../nrLDPC_init.h" #include "../../nrLDPC_init.h"
#include "../../nrLDPC_bnProc.h" #include "../../nrLDPC_bnProc.h"
#include "cnProc_gen_avx512.h" #define NB_Z 51
void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t,int);
int main(int argc, char *argv []) int main()
{ {
uint16_t Z[NB_Z]={2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,18,20,22,24,26,28,30,32,36,40,44,48,52,56,60,64,72,80,88,96,104,112,120,128,144,160,176,192,208,224,240,256,288,320,352,384};
// Z=384, R=1/3
nrLDPC_cnProc_BG1_generator_AVX512(384,0);
for(int i=0; i<NB_Z;i++){
nrLDPC_cnProc_BG1_generator_AVX512(Z[i],0);
}
return(0); return(0);
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment