Commit 8663ee69 authored by Raymond Knopp's avatar Raymond Knopp

chages in AVX512 code generator. Yields small improvement compared to AVX2,...

chages in AVX512 code generator. Yields small improvement compared to AVX2, BLER performance is still degraded.
parent c67550e5
......@@ -138,8 +138,8 @@ if (CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7l")
else (CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7l")
if(EXISTS "/proc/cpuinfo")
file(STRINGS "/proc/cpuinfo" CPUINFO REGEX flags LIMIT_COUNT 1)
if (CPUINFO MATCHES "avx512f")
set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -march=skylake-avx512 -mtune=skylake-avx512")
if (CPUINFO MATCHES "avx512bw")
set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -mavx512bw")
set(COMPILATION_AVX2 "True")
else()
if (CPUINFO MATCHES "avx2")
......
......@@ -333,8 +333,8 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP
{
case 384:
{
// nrLDPC_cnProc_BG1_Z384_13_AVX2(p_procBuf->cnProcBuf,p_procBuf->cnProcBufRes);
nrLDPC_cnProc_BG1_Z384_13_AVX512(p_procBuf->cnProcBuf,p_procBuf->cnProcBufRes); //we test here
//nrLDPC_cnProc_BG1_Z384_13_AVX2(p_procBuf->cnProcBuf,p_procBuf->cnProcBufRes);
nrLDPC_cnProc_BG1_Z384_13_AVX512(p_procBuf->cnProcBuf,p_procBuf->cnProcBufRes); //we test here
break;
}
case 352:
......@@ -1475,7 +1475,7 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP
case 384:
{
nrLDPC_cnProc_BG2_Z384_13_AVX2(p_procBuf->cnProcBuf,p_procBuf->cnProcBufRes);
// nrLDPC_cnProc_BG2_Z384_13_AVX512(p_procBuf->cnProcBuf,p_procBuf->cnProcBufRes);
//nrLDPC_cnProc_BG2_Z384_13_AVX512(p_procBuf->cnProcBuf,p_procBuf->cnProcBufRes);
break;
}
case 352:
......@@ -1922,7 +1922,7 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP
{
case 384:
{
// nrLDPC_cnProc_BG1_Z384_13_AVX2(p_procBuf->cnProcBuf,p_procBuf->cnProcBufRes);
//nrLDPC_cnProc_BG1_Z384_13_AVX2(p_procBuf->cnProcBuf,p_procBuf->cnProcBufRes);
nrLDPC_cnProc_BG1_Z384_13_AVX512(p_procBuf->cnProcBuf,p_procBuf->cnProcBufRes); //we test here
break;
}
......@@ -2273,7 +2273,7 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP
case 384:
{
nrLDPC_cnProc_BG2_Z384_13_AVX2(p_procBuf->cnProcBuf,p_procBuf->cnProcBufRes);
// nrLDPC_cnProc_BG2_Z384_13_AVX512(p_procBuf->cnProcBuf,p_procBuf->cnProcBufRes);
//nrLDPC_cnProc_BG2_Z384_13_AVX512(p_procBuf->cnProcBuf,p_procBuf->cnProcBufRes);
break;
}
case 352:
......
......@@ -42,7 +42,7 @@
*/
static inline void* malloc32_clear(size_t size)
{
void* ptr = (void*) memalign(32, size+32);
void* ptr = (void*) memalign(64, size+64);
memset(ptr, 0, size);
return ptr;
}
......
......@@ -8,12 +8,12 @@ OBJ= $(SRC:.c=.o)
all: $(EXEC)
cnProc_gen_avx2: $(OBJ)
@$(CC) -o $@ $^ $(LDFLAGS) -O2
@$(CC) -o $@ $^ $(LDFLAGS) -O2
#main.o: cnProc_gen_avx2.h
%.o: %.c
@$(CC) -o $@ -c $< $(CFLAGS) -I ${OPENAIR_HOME}/openair1 -g
@$(CC) -o $@ -c $< $(CFLAGS) -I ${OPENAIR_HOME}/openair1 -g -std=c99
.PHONY: clean mrproper
......
......@@ -21,7 +21,6 @@ void nrLDPC_cnProc_BG1_generator_AVX2(uint16_t Z,int R)
fprintf(fd,"#include <stdint.h>\n");
fprintf(fd,"#include <immintrin.h>\n");
fprintf(fd,"#include "\"../include/avx512fintrin.h>\"\n");
fprintf(fd,"void nrLDPC_cnProc_BG1_Z%d_%s_AVX2(int8_t* cnProcBuf,int8_t* cnProcBufRes) {\n",Z,ratestr[R]);
......@@ -411,10 +410,10 @@ void nrLDPC_cnProc_BG1_generator_AVX2(uint16_t Z,int R)
// Process group with 8 BNs
fprintf(fd,"//Process group with 8 BNs\n");
// Offset is 2*384/32 = 24
const uint8_t lut_idxCnProcG8[8][7] = {{24,48,72,96,120,144,168}, {0,48,72,96,120,144,168},
{0,24,72,96,120,144,168}, {0,24,48,96,120,144,168},
{0,24,48,72,120,144,168}, {0,24,48,72,96,144,168},
{0,24,48,72,96,120,168}, {0,24,48,72,96,120,144}};
const uint8_t lut_idxCnProcG8[8][7] = {{24,48,72,96,112,144,168}, {0,48,72,96,112,144,168},
{0,24,72,96,112,144,168}, {0,24,48,96,112,144,168},
{0,24,48,72,112,144,168}, {0,24,48,72,96,144,168},
{0,24,48,72,96,112,168}, {0,24,48,72,96,112,144}};
......
......@@ -13,7 +13,7 @@ cnProc_gen_avx512: $(OBJ)
#main.o: cnProc_gen_avx512.h
%.o: %.c
@$(CC) -o $@ -c $< $(CFLAGS) -I ${OPENAIR_HOME}/openair1 -g
@$(CC) -o $@ -c $< $(CFLAGS) -I ${OPENAIR_HOME}/openair1 -g -std=c99
.PHONY: clean mrproper
......
#include <stdio.h>
#include <stdint.h>
#include <immintrin.h>
#include "../../nrLDPCdecoder_defs.h"
#include "../../nrLDPC_types.h"
#include "../../nrLDPC_bnProc.h"
void nrLDPC_cnProc_BG2_generator_AVX512(uint16_t Z,int R)
{
......
#include <stdio.h>
#include <immintrin.h>
#include "../../nrLDPC_types.h"
#include "../../nrLDPC_init.h"
#include "../../nrLDPC_bnProc.h"
#include <stdint.h>
#define NB_Z 51
void nrLDPC_cnProc_BG1_generator_AVX512(uint16_t,int);
void nrLDPC_cnProc_BG2_generator_AVX512(uint16_t,int);
......
#include <stdint.h>
#include <immintrin.h>
#include "../include/avx512fintrin.h"
__m512i _mm512_sign_epi16(__m512i a, __m512i b){
b = _mm512_min_epi16(b, _mm512_set1_epi16(1));
b = _mm512_max_epi16(b, _mm512_set1_epi16(-1));
......
#include <stdint.h>
#include <immintrin.h>
#include "../include/avx512fintrin.h"
__m512i _mm512_sign_epi16(__m512i a, __m512i b){
b = _mm512_min_epi16(b, _mm512_set1_epi16(1));
b = _mm512_max_epi16(b, _mm512_set1_epi16(-1));
......
#include <stdint.h>
#include <immintrin.h>
#include "../include/avx512fintrin.h"
__m512i _mm512_sign_epi16(__m512i a, __m512i b){
b = _mm512_min_epi16(b, _mm512_set1_epi16(1));
b = _mm512_max_epi16(b, _mm512_set1_epi16(-1));
......
#include <stdint.h>
#include <immintrin.h>
#include "../include/avx512fintrin.h"
__m512i _mm512_sign_epi16(__m512i a, __m512i b){
b = _mm512_min_epi16(b, _mm512_set1_epi16(1));
b = _mm512_max_epi16(b, _mm512_set1_epi16(-1));
......
#include <stdint.h>
#include <immintrin.h>
#include "../include/avx512fintrin.h"
__m512i _mm512_sign_epi16(__m512i a, __m512i b){
b = _mm512_min_epi16(b, _mm512_set1_epi16(1));
b = _mm512_max_epi16(b, _mm512_set1_epi16(-1));
......
#include <stdint.h>
#include <immintrin.h>
#include "../include/avx512fintrin.h"
__m512i _mm512_sign_epi16(__m512i a, __m512i b){
b = _mm512_min_epi16(b, _mm512_set1_epi16(1));
b = _mm512_max_epi16(b, _mm512_set1_epi16(-1));
......
#include <stdint.h>
#include <immintrin.h>
#include "../include/avx512fintrin.h"
__m512i _mm512_sign_epi16(__m512i a, __m512i b){
b = _mm512_min_epi16(b, _mm512_set1_epi16(1));
b = _mm512_max_epi16(b, _mm512_set1_epi16(-1));
......
#include <stdint.h>
#include <immintrin.h>
#include "../include/avx512fintrin.h"
__m512i _mm512_sign_epi16(__m512i a, __m512i b){
b = _mm512_min_epi16(b, _mm512_set1_epi16(1));
b = _mm512_max_epi16(b, _mm512_set1_epi16(-1));
......
#include <stdint.h>
#include <immintrin.h>
#include "../include/avx512fintrin.h"
__m512i _mm512_sign_epi16(__m512i a, __m512i b){
b = _mm512_min_epi16(b, _mm512_set1_epi16(1));
b = _mm512_max_epi16(b, _mm512_set1_epi16(-1));
......
#include <stdint.h>
#include <immintrin.h>
#include "../include/avx512fintrin.h"
__m512i _mm512_sign_epi16(__m512i a, __m512i b){
b = _mm512_min_epi16(b, _mm512_set1_epi16(1));
b = _mm512_max_epi16(b, _mm512_set1_epi16(-1));
......
#include <stdint.h>
#include <immintrin.h>
#include "../include/avx512fintrin.h"
__m512i _mm512_sign_epi16(__m512i a, __m512i b){
b = _mm512_min_epi16(b, _mm512_set1_epi16(1));
b = _mm512_max_epi16(b, _mm512_set1_epi16(-1));
......
#include <stdint.h>
#include <immintrin.h>
#include "../include/avx512fintrin.h"
__m512i _mm512_sign_epi16(__m512i a, __m512i b){
b = _mm512_min_epi16(b, _mm512_set1_epi16(1));
b = _mm512_max_epi16(b, _mm512_set1_epi16(-1));
......
#include <stdint.h>
#include <immintrin.h>
#include "../include/avx512fintrin.h"
__m512i _mm512_sign_epi16(__m512i a, __m512i b){
b = _mm512_min_epi16(b, _mm512_set1_epi16(1));
b = _mm512_max_epi16(b, _mm512_set1_epi16(-1));
......
#include <stdint.h>
#include <immintrin.h>
#include "../include/avx512fintrin.h"
__m512i _mm512_sign_epi16(__m512i a, __m512i b){
b = _mm512_min_epi16(b, _mm512_set1_epi16(1));
b = _mm512_max_epi16(b, _mm512_set1_epi16(-1));
......
#include <stdint.h>
#include <immintrin.h>
#include "../include/avx512fintrin.h"
__m512i _mm512_sign_epi16(__m512i a, __m512i b){
b = _mm512_min_epi16(b, _mm512_set1_epi16(1));
b = _mm512_max_epi16(b, _mm512_set1_epi16(-1));
......
#include <stdint.h>
#include <immintrin.h>
#include "../include/avx512fintrin.h"
__m512i _mm512_sign_epi16(__m512i a, __m512i b){
b = _mm512_min_epi16(b, _mm512_set1_epi16(1));
b = _mm512_max_epi16(b, _mm512_set1_epi16(-1));
......
#include <stdint.h>
#include <immintrin.h>
#include "../include/avx512fintrin.h"
__m512i _mm512_sign_epi16(__m512i a, __m512i b){
b = _mm512_min_epi16(b, _mm512_set1_epi16(1));
b = _mm512_max_epi16(b, _mm512_set1_epi16(-1));
......
#include <stdint.h>
#include <immintrin.h>
#include "../include/avx512fintrin.h"
__m512i _mm512_sign_epi16(__m512i a, __m512i b){
b = _mm512_min_epi16(b, _mm512_set1_epi16(1));
b = _mm512_max_epi16(b, _mm512_set1_epi16(-1));
......
#include <stdint.h>
#include <immintrin.h>
#include "../include/avx512fintrin.h"
__m512i _mm512_sign_epi16(__m512i a, __m512i b){
b = _mm512_min_epi16(b, _mm512_set1_epi16(1));
b = _mm512_max_epi16(b, _mm512_set1_epi16(-1));
......
#include <stdint.h>
#include <immintrin.h>
#include "../include/avx512fintrin.h"
__m512i _mm512_sign_epi16(__m512i a, __m512i b){
b = _mm512_min_epi16(b, _mm512_set1_epi16(1));
b = _mm512_max_epi16(b, _mm512_set1_epi16(-1));
......
#include <stdint.h>
#include <immintrin.h>
#include "../include/avx512fintrin.h"
__m512i _mm512_sign_epi16(__m512i a, __m512i b){
b = _mm512_min_epi16(b, _mm512_set1_epi16(1));
b = _mm512_max_epi16(b, _mm512_set1_epi16(-1));
......
#include <stdint.h>
#include <immintrin.h>
#include "../include/avx512fintrin.h"
__m512i _mm512_sign_epi16(__m512i a, __m512i b){
b = _mm512_min_epi16(b, _mm512_set1_epi16(1));
b = _mm512_max_epi16(b, _mm512_set1_epi16(-1));
......
#include <stdint.h>
#include <immintrin.h>
#include "../include/avx512fintrin.h"
__m512i _mm512_sign_epi16(__m512i a, __m512i b){
b = _mm512_min_epi16(b, _mm512_set1_epi16(1));
b = _mm512_max_epi16(b, _mm512_set1_epi16(-1));
......
#include <stdint.h>
#include <immintrin.h>
#include "../include/avx512fintrin.h"
__m512i _mm512_sign_epi16(__m512i a, __m512i b){
b = _mm512_min_epi16(b, _mm512_set1_epi16(1));
b = _mm512_max_epi16(b, _mm512_set1_epi16(-1));
......
#include <stdint.h>
#include <immintrin.h>
#include "../include/avx512fintrin.h"
__m512i _mm512_sign_epi16(__m512i a, __m512i b){
b = _mm512_min_epi16(b, _mm512_set1_epi16(1));
b = _mm512_max_epi16(b, _mm512_set1_epi16(-1));
......
#include <stdint.h>
#include <immintrin.h>
#include "../include/avx512fintrin.h"
__m512i _mm512_sign_epi16(__m512i a, __m512i b){
b = _mm512_min_epi16(b, _mm512_set1_epi16(1));
b = _mm512_max_epi16(b, _mm512_set1_epi16(-1));
......
#include <stdint.h>
#include <immintrin.h>
#include "../include/avx512fintrin.h"
__m512i _mm512_sign_epi16(__m512i a, __m512i b){
b = _mm512_min_epi16(b, _mm512_set1_epi16(1));
b = _mm512_max_epi16(b, _mm512_set1_epi16(-1));
......
#include <stdint.h>
#include <immintrin.h>
#include "../include/avx512fintrin.h"
__m512i _mm512_sign_epi16(__m512i a, __m512i b){
b = _mm512_min_epi16(b, _mm512_set1_epi16(1));
b = _mm512_max_epi16(b, _mm512_set1_epi16(-1));
......
#include <stdint.h>
#include <immintrin.h>
#include "../include/avx512fintrin.h"
__m512i _mm512_sign_epi16(__m512i a, __m512i b){
b = _mm512_min_epi16(b, _mm512_set1_epi16(1));
b = _mm512_max_epi16(b, _mm512_set1_epi16(-1));
......
#include <stdint.h>
#include <immintrin.h>
#include "../include/avx512fintrin.h"
__m512i _mm512_sign_epi16(__m512i a, __m512i b){
b = _mm512_min_epi16(b, _mm512_set1_epi16(1));
b = _mm512_max_epi16(b, _mm512_set1_epi16(-1));
......
#include <stdint.h>
#include <immintrin.h>
#include "../include/avx512fintrin.h"
__m512i _mm512_sign_epi16(__m512i a, __m512i b){
b = _mm512_min_epi16(b, _mm512_set1_epi16(1));
b = _mm512_max_epi16(b, _mm512_set1_epi16(-1));
......
#include <stdint.h>
#include <immintrin.h>
#include "../include/avx512fintrin.h"
__m512i _mm512_sign_epi16(__m512i a, __m512i b){
b = _mm512_min_epi16(b, _mm512_set1_epi16(1));
b = _mm512_max_epi16(b, _mm512_set1_epi16(-1));
......
#include <stdint.h>
#include <immintrin.h>
#include "../include/avx512fintrin.h"
__m512i _mm512_sign_epi16(__m512i a, __m512i b){
b = _mm512_min_epi16(b, _mm512_set1_epi16(1));
b = _mm512_max_epi16(b, _mm512_set1_epi16(-1));
......
#include <stdint.h>
#include <immintrin.h>
#include "../include/avx512fintrin.h"
__m512i _mm512_sign_epi16(__m512i a, __m512i b){
b = _mm512_min_epi16(b, _mm512_set1_epi16(1));
b = _mm512_max_epi16(b, _mm512_set1_epi16(-1));
......
#include <stdint.h>
#include <immintrin.h>
#include "../include/avx512fintrin.h"
__m512i _mm512_sign_epi16(__m512i a, __m512i b){
b = _mm512_min_epi16(b, _mm512_set1_epi16(1));
b = _mm512_max_epi16(b, _mm512_set1_epi16(-1));
......
#include <stdint.h>
#include <immintrin.h>
#include "../include/avx512fintrin.h"
__m512i _mm512_sign_epi16(__m512i a, __m512i b){
b = _mm512_min_epi16(b, _mm512_set1_epi16(1));
b = _mm512_max_epi16(b, _mm512_set1_epi16(-1));
......
#include <stdint.h>
#include <immintrin.h>
#include "../include/avx512fintrin.h"
__m512i _mm512_sign_epi16(__m512i a, __m512i b){
b = _mm512_min_epi16(b, _mm512_set1_epi16(1));
b = _mm512_max_epi16(b, _mm512_set1_epi16(-1));
......
#include <stdint.h>
#include <immintrin.h>
#include "../include/avx512fintrin.h"
__m512i _mm512_sign_epi16(__m512i a, __m512i b){
b = _mm512_min_epi16(b, _mm512_set1_epi16(1));
b = _mm512_max_epi16(b, _mm512_set1_epi16(-1));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment