Commit 6f351c3d authored by sebastian's avatar sebastian

Fixed bug in llr2CnProcBuf, started updating documentation

parent d2875d02
......@@ -542,9 +542,9 @@ In this section, the performance in terms of BLER and decoding latency of the cu
\subsection{BLER Performance}
\label{sec:bler-performance}
In all simulations, we assume AWGN, QPSK modulation and 8-bit input LLRs. The results are averaged over at least $10\,000$ channel realizations.
In all simulations, we assume AWGN, QPSK modulation and 8-bit input LLRs, i.e. $-127$ until $+127$. The results are averaged over at least $10\,000$ channel realizations.
The first set of simulations in Figure \ref{fig:bler-bg2-15} compares the current LDPC decoder implementation to the reference implementation developed by Kien. This reference implementation is called \textit{LDPC Ref} and uses the min-sum algorithm with 2 layers and 16 bit for processing. Out current optimized decoder implementation is referred to as \textit{LDPC Opt}. Moreover, reference results provided by Huawei are also shown.
The first set of simulations in Figure \ref{fig:bler-bg2-15} compares the current LDPC decoder implementation to the reference implementation developed by Kien. This reference implementation is called \textit{LDPC Ref} and uses the min-sum algorithm with 2 layers and 16 bit for processing. Our current optimized decoder implementation is referred to as \textit{LDPC Opt}. Moreover, reference results provided by Huawei are also shown.
\begin{figure}[ht]
\centering
......@@ -566,9 +566,10 @@ The first set of simulations in Figure \ref{fig:bler-bg2-15} compares the curren
% Kien's 2-layer 16bit code
\addplot[red, solid, mark=o] plot coordinates { (-2.750000,0.915500) (-2.500000,0.576000) (-2.250000,0.165000) (-2.000000,0.017100) (-1.750000,0.000600) (-1.500000,0.000000) (-1.250000,0.000000) (-1.000000,0.000000)};
% LDPC opt with 16bit BN processing
\addplot[blue, solid, mark=square] plot coordinates { (-2.750000,0.998600) (-2.500000,0.953600) (-2.250000,0.718800) (-2.000000,0.299300) (-1.750000,0.053700) (-1.500000,0.005100) (-1.250000,0.000500) (-1.000000,0.000100)};
% 10 iterations
%\addplot[blue, solid, mark=square] plot coordinates { (-2.750000,0.998600) (-2.500000,0.953600) (-2.250000,0.718800) (-2.000000,0.299300) (-1.750000,0.053700) (-1.500000,0.005100) (-1.250000,0.000500) (-1.000000,0.000100)};
\addplot[blue, solid, mark=square] plot coordinates { (-2.750000,0.997200) (-2.500000,0.955000) (-2.250000,0.710900) (-2.000000,0.270400) (-1.750000,0.042400) (-1.500000,0.002200) (-1.250000,0.000000) (-1.000000,0.000000)};
% Matlab
\addplot[green, solid, mark=triangle] plot coordinates {(-2.750000,0.318200) (-2.500000,0.135900) (-2.250000,0.102000) (-2.000000,0.092300) (-1.750000,0.079200) (-1.500000,0.063100) (-1.250000,0.041400) (-1.000000,0.029600) (-0.750000,0.017500) (-0.500000,0.011800) (-0.250000,0.006100) (0.000000,0.004100) (0.250000,0.002800) (0.500000,0.000800) (0.750000,0.000300) (1.000000,0.000400) };
......@@ -577,13 +578,30 @@ The first set of simulations in Figure \ref{fig:bler-bg2-15} compares the curren
% 20 iterations
\addplot[red, solid, mark=o] plot coordinates { (-2.750000,0.330300) (-2.500000,0.067800) (-2.250000,0.006000) (-2.000000,0.000100) (-1.750000,0.000000) (-1.500000,0.000000) (-1.250000,0.000000) (-1.000000,0.000000)};
%\addplot[blue, solid, mark=square] plot coordinates {(-2.750000,0.341300) (-2.500000,0.065100) (-2.250000,0.004100) (-2.000000,0.000200) (-1.750000,0.000100) (-1.500000,0.000000) (-1.250000,0.000000) (-1.000000,0.000000)};
\addplot[blue, dashed, mark=square] plot coordinates {(-2.750000,0.337900) (-2.500000,0.058300) (-2.250000,0.004000) (-2.000000,0.000200) (-1.750000,0.000000) (-1.500000,0.000000) };
% LDPC base
%(-2.750000,0.369000) (-2.500000,0.068200) (-2.250000,0.002900) (-2.000000,0.000200) (-1.750000,0.000000) (-1.500000,0.000000)
% New LDPC with llr2cn of old implementation
% (-2.750000,0.716900) (-2.500000,0.300500) (-2.250000,0.045300) (-2.000000,0.003300) (-1.750000,0.000000) (-1.500000,0.000000)
% saturation in both bnproc and bnprocpc
% (-2.750000,0.668900) (-2.500000,0.248800) (-2.250000,0.039100) (-2.000000,0.002100) (-1.750000,0.000100) (-1.500000,0.000000) };
% saturation with parity check
% (-2.750000,0.672100) (-2.500000,0.245100) (-2.250000,0.037100) (-2.000000,0.001900) (-1.750000,0.000000) (-1.500000,0.000000) };
%(-2.750000,0.343100) (-2.500000,0.064300) (-2.250000,0.003400) (-2.000000,0.000100) (-1.750000,0.000000) (-1.500000,0.000100) };
%(-2.750000,0.334400) (-2.500000,0.059400) (-2.250000,0.003400) (-2.000000,0.000000) (-1.750000,0.000100) (-1.500000,0.000000) };
\addplot[blue, solid, mark=square] plot coordinates {(-2.750000,0.341300) (-2.500000,0.065100) (-2.250000,0.004100) (-2.000000,0.000200) (-1.750000,0.000100) (-1.500000,0.000000) (-1.250000,0.000000) (-1.000000,0.000000)};
% 5 iterations
\addplot[red, solid, mark=o] plot coordinates {(-1.250000,0.781300) (-1.000000,0.421000) (-0.750000,0.140400) (-0.500000,0.028900) (-0.250000,0.003300) (0.000000,0.000300) (0.250000,0.000000) (0.500000,0.000000)};
\addplot[blue, solid, mark=square] plot coordinates {(-0.250000,0.705000) (0.000000,0.406200) (0.250000,0.181300) (0.500000,0.061600) (0.750000,0.015900) (1.000000,0.004900) (1.250000,0.000900) (1.500000,0.000200)};
\addplot[green, solid, mark=triangle] plot coordinates {(-1.000000,0.778900) (-0.500000,0.226400) (0.000000,0.027400) (0.500000,0.002600) (1.000000,0.000300) };
\addplot[red, solid, mark=o] plot coordinates {(-1.250000,0.781300) (-1.000000,0.421000) (-0.750000,0.140400) (-0.500000,0.028900) (-0.250000,0.003300) (0.000000,0.000300) (0.250000,0.000000) (0.500000,0.000000)};
%\addplot[blue, solid, mark=square] plot coordinates {(-0.250000,0.705000) (0.000000,0.406200) (0.250000,0.181300) (0.500000,0.061600) (0.750000,0.015900) (1.000000,0.004900) (1.250000,0.000900) (1.500000,0.000200)};
\addplot[blue, solid, mark=square] plot coordinates {(-1.000000,0.700400) (-0.750000,0.370600) (-0.500000,0.136000) (-0.250000,0.039000) (0.000000,0.008500) (0.250000,0.002400) (0.500000,0.000700) (0.750000,0.000000) (1.000000,0.000000) };
\addplot[green, solid, mark=triangle] plot coordinates {(-1.000000,0.778900) (-0.500000,0.226400) (0.000000,0.027400) (0.500000,0.002600) (1.000000,0.000300) };
% Parity check 50 iterations
\addplot[blue, solid, mark=square] plot coordinates {(-2.750000,0.214600) (-2.500000,0.029200) (-2.250000,0.001500) (-2.000000,0.000100) (-1.750000,0.000000) (-1.500000,0.000000) };
% 30 iterations
% \addplot[blue, dashed, mark=square] plot coordinates {(-3.000000,0.623800) (-2.750000,0.224100) (-2.500000,0.031600) (-2.250000,0.001100) (-2.000000,0.000000)};
......@@ -707,17 +725,27 @@ The results in Table \ref{tab:lat-bg2-r15} show the impact of the number of iter
\toprule
\textbf{Function} & \textbf{Time [$\mu s$] (5 it)} & \textbf{Time [$\mu s$] (10 it)} & \textbf{Time [$\mu s$] (20 it)}\\
\midrule
\texttt{llr2llrProcBuf} & 1.1 & 1.1 & 1.1 \\
\texttt{llr2CnProcBuf} & 12.4 & 12.0 & 12.0 \\
\texttt{cnProc} & 11.7 & 22.1 & 43.5 \\
\texttt{bnProcPc} & 6.6 & 12.1 & 23.8 \\
\texttt{bnProc} & 4.2 & 8.1 & 16.2 \\
\texttt{cn2bnProcBuf} & 61.3 & 118.3 & 234.9 \\
\texttt{bn2cnProcBuf} & 38.1 & 82.5 & 172.3 \\
\texttt{llrRes2llrOut} & 3.5 & 3.4 & 3.4 \\
\texttt{llr2bit} & 0.2 & 0.1 & 0.1 \\
% \texttt{llr2llrProcBuf} & 1.1 & 1.1 & 1.1 \\
% \texttt{llr2CnProcBuf} & 12.4 & 12.0 & 12.0 \\
% \texttt{cnProc} & 11.7 & 22.1 & 43.5 \\
% \texttt{bnProcPc} & 6.6 & 12.1 & 23.8 \\
% \texttt{bnProc} & 4.2 & 8.1 & 16.2 \\
% \texttt{cn2bnProcBuf} & 61.3 & 118.3 & 234.9 \\
% \texttt{bn2cnProcBuf} & 38.1 & 82.5 & 172.3 \\
% \texttt{llrRes2llrOut} & 3.5 & 3.4 & 3.4 \\
% \texttt{llr2bit} & 0.2 & 0.1 & 0.1 \\
\texttt{llr2llrProcBuf} & 0.5 & 0.5 & 0.5 \\
\texttt{llr2CnProcBuf} & 5.0 & 4.8 & 4.9 \\
\texttt{cnProc} & 12.4 & 23.0 & 42.7 \\
\texttt{bnProcPc} & 8.4 & 14.8 & 27.0 \\
\texttt{bnProc} & 5.5 & 10.1 & 19.0 \\
\texttt{cn2bnProcBuf} & 14.9 & 24.4 & 44.0 \\
\texttt{bn2cnProcBuf} & 10.5 & 17.8 & 31.8 \\
\texttt{llrRes2llrOut} & 0.3 & 0.3 & 0.3 \\
\texttt{llr2bit} & 0.2 & 0.2 & 0.2 \\
\midrule
\textbf{Total} & \textbf{139.4} & \textbf{260.3} & \textbf{508.4} \\
% \textbf{Total} & \textbf{139.4} & \textbf{260.3} & \textbf{508.4} \\
\textbf{Total} & \textbf{58.5} & \textbf{97.1} & \textbf{172.6} \\
\bottomrule
\end{tabular}
\caption{BG2, Z=128, R=1/5, B=1280, LDPC Opt}
......@@ -732,17 +760,27 @@ Table \ref{tab:lat-bg2-i5} shows the impact of the code rate on the latency for
\toprule
\textbf{Function} & \textbf{Time [$\mu s$] (R=1/5)} & \textbf{Time [$\mu s$] (R=1/3)} & \textbf{Time [$\mu s$] (R=2/3)}\\
\midrule
\texttt{llr2llrProcBuf} & 3.2 & 2.9 & 2.6 \\
\texttt{llr2CnProcBuf} & 36.5 & 25.4 & 14.8 \\
\texttt{cnProc} & 33.6 & 25.2 & 13.3 \\
\texttt{bnProcPc} & 17.6 & 10.2 & 4.5 \\
\texttt{bnProc} & 8.5 & 5.4 & 2.5 \\
\texttt{cn2bnProcBuf} & 175.3 & 110.6 & 50.7 \\
\texttt{bn2cnProcBuf} & 106.6 & 71.2 & 36.1 \\
\texttt{llrRes2llrOut} & 10.2 & 6.3 & 3.3 \\
\texttt{llr2bit} & 0.4 & 0.2 & 0.1 \\
% \texttt{llr2llrProcBuf} & 3.2 & 2.9 & 2.6 \\
% \texttt{llr2CnProcBuf} & 36.5 & 25.4 & 14.8 \\
% \texttt{cnProc} & 33.6 & 25.2 & 13.3 \\
% \texttt{bnProcPc} & 17.6 & 10.2 & 4.5 \\
% \texttt{bnProc} & 8.5 & 5.4 & 2.5 \\
% \texttt{cn2bnProcBuf} & 175.3 & 110.6 & 50.7 \\
% \texttt{bn2cnProcBuf} & 106.6 & 71.2 & 36.1 \\
% \texttt{llrRes2llrOut} & 10.2 & 6.3 & 3.3 \\
% \texttt{llr2bit} & 0.4 & 0.2 & 0.1 \\
\texttt{llr2llrProcBuf} & 1.5 & 0.9 & 0.5 \\
\texttt{llr2CnProcBuf} & 6.0 & 4.1 & 2.2 \\
\texttt{cnProc} & 32.2 & 23.7 & 14.4 \\
\texttt{bnProcPc} & 21.2 & 12.1 & 5.5 \\
\texttt{bnProc} & 9.8 & 5.9 & 2.9 \\
\texttt{cn2bnProcBuf} & 23.3 & 13.9 & 6.8 \\
\texttt{bn2cnProcBuf} & 14.8 & 9.7 & 5.0 \\
\texttt{llrRes2llrOut} & 0.6 & 0.4 & 0.3 \\
\texttt{llr2bit} & 0.7 & 0.4 & 0.2 \\
\midrule
\textbf{Total} & \textbf{392.4} & \textbf{258.0} & \textbf{128.2} \\
% \textbf{Total} & \textbf{392.4} & \textbf{258.0} & \textbf{128.2} \\
\textbf{Total} & \textbf{111.0} & \textbf{71.8} & \textbf{38.5} \\
\bottomrule
\end{tabular}
\caption{BG2, Z=384, B=3840, LDPC Opt, 5 iterations}
......@@ -757,17 +795,27 @@ Table \ref{tab:lat-bg1-i5} shows the results for BG1, larges block size and diff
\toprule
\textbf{Function} & \textbf{Time [$\mu s$] (R=1/3)} & \textbf{Time [$\mu s$] (R=2/3)} & \textbf{Time [$\mu s$] (R=8/9)}\\
\midrule
\texttt{llr2llrProcBuf} & 5.5 & 4.9 & 4.6 \\
\texttt{llr2CnProcBuf} & 60.6 & 34.1 & 24.4 \\
\texttt{cnProc} & 102.0 & 74.1 & 56.0 \\
\texttt{bnProcPc} & 26.0 & 11.0 & 6.4 \\
\texttt{bnProc} & 15.7 & 7.4 & 4.5 \\
\texttt{cn2bnProcBuf} & 291.0 & 140.8 & 83.1 \\
\texttt{bn2cnProcBuf} & 193.6 & 100.5 & 63.0 \\
\texttt{llrRes2llrOut} & 13.3 & 6.9 & 5.2 \\
\texttt{llr2bit} & 0.4 & 0.2 & 0.2 \\
% \texttt{llr2llrProcBuf} & 5.5 & 4.9 & 4.6 \\
% \texttt{llr2CnProcBuf} & 60.6 & 34.1 & 24.4 \\
% \texttt{cnProc} & 102.0 & 74.1 & 56.0 \\
% \texttt{bnProcPc} & 26.0 & 11.0 & 6.4 \\
% \texttt{bnProc} & 15.7 & 7.4 & 4.5 \\
% \texttt{cn2bnProcBuf} & 291.0 & 140.8 & 83.1 \\
% \texttt{bn2cnProcBuf} & 193.6 & 100.5 & 63.0 \\
% \texttt{llrRes2llrOut} & 13.3 & 6.9 & 5.2 \\
% \texttt{llr2bit} & 0.4 & 0.2 & 0.2 \\
\texttt{llr2llrProcBuf} & 2.1 & 1.2 & 0.9 \\
\texttt{llr2CnProcBuf} & 10.6 & 5.4 & 2.9 \\
\texttt{cnProc} & 89.8 & 66.3 & 50.0 \\
\texttt{bnProcPc} & 28.1 & 12.4 & 7.1 \\
\texttt{bnProc} & 17.1 & 8.1 & 4.8 \\
\texttt{cn2bnProcBuf} & 38.7 & 17.1 & 9.3 \\
\texttt{bn2cnProcBuf} & 25.6 & 12.7 & 7.2 \\
\texttt{llrRes2llrOut} & 0.8 & 0.4 & 0.3 \\
\texttt{llr2bit} & 0.9 & 0.4 & 0.3 \\
\midrule
\textbf{Total} & \textbf{708.9} & \textbf{380.6} & \textbf{248.1}\\
% \textbf{Total} & \textbf{708.9} & \textbf{380.6} & \textbf{248.1}\\
\textbf{Total} & \textbf{214.6} & \textbf{124.6} & \textbf{83.6}\\
\bottomrule
\end{tabular}
\caption{BG1, Z=384, B=8448, LDPC Opt, 5 iterations}
......
......@@ -47,7 +47,7 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
int8_t* bnProcBufRes = p_procBuf->bnProcBufRes;
int8_t* llrRes = p_procBuf->llrRes;
int8_t* llrProcBuf = p_procBuf->llrProcBuf;
__m128i* p_bnProcBuf;
__m256i* p_bnProcBufRes;
__m128i* p_llrProcBuf;
......@@ -99,6 +99,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
......@@ -152,6 +154,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
......@@ -206,6 +210,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
......@@ -260,6 +266,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
......@@ -314,6 +322,15 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
//mexPrintf("ymm0: ");
//nrLDPC_debug_print256i_epi8(&ymm0);
////ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
//mexPrintf("\n");
//mexPrintf("ymmX: ");
//nrLDPC_debug_print256i_epi8(&ymm0);
//mexPrintf("\n");
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
......@@ -368,6 +385,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
......@@ -422,6 +441,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
......@@ -476,6 +497,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
......@@ -530,6 +553,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
......@@ -584,6 +609,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
......@@ -638,6 +665,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
......@@ -692,6 +721,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
......@@ -746,6 +777,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
......@@ -800,6 +833,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
......@@ -854,6 +889,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
......@@ -908,6 +945,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
......@@ -962,6 +1001,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
......@@ -1016,6 +1057,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
......@@ -1070,6 +1113,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
......@@ -1124,6 +1169,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
......@@ -1178,6 +1225,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
......@@ -1232,6 +1281,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
......@@ -1286,6 +1337,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
......@@ -1340,6 +1393,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
......@@ -1394,6 +1449,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
......@@ -1448,6 +1505,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
......@@ -1502,6 +1561,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
......@@ -1556,6 +1617,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
......@@ -1610,6 +1673,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
......@@ -1664,6 +1729,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
......@@ -1693,7 +1760,7 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
int8_t* bnProcBuf = p_procBuf->bnProcBuf;
int8_t* bnProcBufRes = p_procBuf->bnProcBufRes;
int8_t* llrRes = p_procBuf->llrRes;
__m256i* p_bnProcBuf;
__m256i* p_bnProcBufRes;
__m256i* p_llrRes;
......@@ -1708,6 +1775,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
uint32_t cnOffsetInGroup;
uint8_t idxBnGroup = 0;
const __m256i* p_minLLR = (__m256i*) minLLR256_epi8;
// =====================================================================
// Process group with 1 CN
// Already done in bnProcBufPc
......@@ -1740,7 +1809,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++)
{
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++;
p_llrRes++;
}
......@@ -1775,7 +1845,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++)
{
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++;
p_llrRes++;
}
......@@ -1810,7 +1881,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++)
{
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++;
p_llrRes++;
}
......@@ -1845,7 +1917,17 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++)
{
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
/*
mexPrintf("res: ");
nrLDPC_debug_print256i_epi8(p_res);
mexPrintf(" llrRes: ");
nrLDPC_debug_print256i_epi8(p_llrRes);
mexPrintf(" bnProcBuf: ");
nrLDPC_debug_print256i_epi8(&p_bnProcBuf[k*cnOffsetInGroup + i]);
mexPrintf("\n");
*/
p_res++;
p_llrRes++;
}
......@@ -1880,6 +1962,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++)
{
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++;
p_llrRes++;
......@@ -1915,6 +1999,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++)
{
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++;
p_llrRes++;
......@@ -1950,6 +2036,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++)
{
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++;
p_llrRes++;
......@@ -1985,6 +2073,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++)
{
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++;
p_llrRes++;
......@@ -2020,6 +2110,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++)
{
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++;
p_llrRes++;
......@@ -2055,6 +2147,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++)
{
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++;
p_llrRes++;
......@@ -2090,6 +2184,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++)
{
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++;
p_llrRes++;
......@@ -2125,6 +2221,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++)
{
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++;
p_llrRes++;
......@@ -2160,6 +2258,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++)
{
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++;
p_llrRes++;
......@@ -2195,6 +2295,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++)
{
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++;
p_llrRes++;
......@@ -2230,6 +2332,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++)
{
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++;
p_llrRes++;
......@@ -2265,6 +2369,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++)
{
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++;
p_llrRes++;
......@@ -2300,6 +2406,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++)
{
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++;
p_llrRes++;
......@@ -2335,6 +2443,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++)
{
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++;
p_llrRes++;
......@@ -2370,6 +2480,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++)
{
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++;
p_llrRes++;
......@@ -2405,6 +2517,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++)
{
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++;
p_llrRes++;
......@@ -2440,6 +2554,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++)
{
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++;
p_llrRes++;
......@@ -2475,6 +2591,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++)
{
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++;
p_llrRes++;
......@@ -2510,6 +2628,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++)
{
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++;
p_llrRes++;
......@@ -2545,6 +2665,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++)
{
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++;
p_llrRes++;
......@@ -2580,6 +2702,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++)
{
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++;
p_llrRes++;
......@@ -2615,6 +2739,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++)
{
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++;
p_llrRes++;
......@@ -2650,6 +2776,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++)
{
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++;
p_llrRes++;
......@@ -2685,6 +2813,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++)
{
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++;
p_llrRes++;
......@@ -2720,6 +2850,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++)
{
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++;
p_llrRes++;
......
......@@ -38,7 +38,7 @@
#include "nrLDPC_cnProc.h"
#include "nrLDPC_bnProc.h"
#define NR_LDPC_ENABLE_PARITY_CHECK
//#define NR_LDPC_ENABLE_PARITY_CHECK
//#define NR_LDPC_PROFILER_DETAIL
#ifdef NR_LDPC_DEBUG_MODE
......@@ -96,6 +96,8 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP
{
// Use LLR processing buffer as temporary output buffer
p_llrOut = p_procBuf->llrProcBuf;
// Clear llrProcBuf
memset(p_llrOut,0, NR_LDPC_MAX_NUM_LLR*sizeof(int8_t));
}
......@@ -232,7 +234,6 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP
while ( (i < (numMaxIter-1)) && (pcRes != 0) )
{
//mexPrintf("Start Main Loop: i=%d, numMaxIter=%d, pcRes = %d\n",i,numMaxIter,pcRes);
// Increase iteration counter
i++;
......@@ -334,12 +335,10 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP
{
pcRes = nrLDPC_cnProcPc_BG2(p_lut, p_procBuf, Z);
}
//mexPrintf("End Main Loop: Iter: i=%d, pcRes=%d\n",i,pcRes);
#ifdef NR_LDPC_PROFILER_DETAIL
stop_meas(&p_profiler->cnProcPc);
#endif
#endif
}
// Last iteration
......@@ -347,7 +346,6 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP
{
// Increase iteration counter
i++;
//mexPrintf("Start Last Iter: i=%d, numMaxIter=%d, pcRes = %d\n",i,numMaxIter,pcRes);
// CN processing
#ifdef NR_LDPC_PROFILER_DETAIL
......@@ -458,11 +456,13 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP
// If maximum number of iterations reached an PC still fails increase number of iterations
// Thus, i > numMaxIter indicates that PC has failed
//mexPrintf("End: Iter: i=%d, pcRes=%d\n",i,pcRes);
#ifdef NR_LDPC_ENABLE_PARITY_CHECK
if (pcRes != 0)
{
i++;
}
#endif
// Assign results from processing buffer to output
#ifdef NR_LDPC_PROFILER_DETAIL
......
......@@ -197,5 +197,6 @@ static const int8_t ones256_epi8[32] __attribute__ ((aligned(32))) = {1,1,1,1,1,
static const int8_t zeros256_epi8[32] __attribute__ ((aligned(32))) = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
/** Vector of 32 '127' in int8 for application with AVX2 */
static const int8_t maxLLR256_epi8[32] __attribute__ ((aligned(32))) = {127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127};
static const int8_t minLLR256_epi8[32] __attribute__ ((aligned(32))) = {-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127};
#endif
......@@ -86,15 +86,13 @@ static inline uint32_t nrLDPC_init(t_nrLDPC_dec_params* p_decParams, t_nrLDPC_lu
p_lut->bnPosBnProcBuf[8] = NULL;
p_lut->llr2llrProcBufAddr = llr2llrProcBufAddr_BG2_R15;
p_lut->llr2llrProcBufNumBn = llr2llrProcBufNumBn_BG2_R15;
p_lut->llr2llrProcBufNumEl = &llr2llrProcBufNumEl_BG2_R15;
p_lut->llr2llrProcBufBnPos = llr2llrProcBufBnPos_BG2_R15;
p_lut->numCnInCnGroups = lut_numCnInCnGroups_BG2_R15;
p_lut->numBnInBnGroups = lut_numBnInBnGroups_BG2_R15;
p_lut->startAddrBnGroups = lut_startAddrBnGroups_BG2_R15;
p_lut->startAddrBnGroupsLlr = lut_startAddrBnGroupsLlr_BG2_R15;
p_lut->numEdgesPerBn = lut_numEdgesPerBn_BG2_R15;
numLLR = NR_LDPC_NCOL_BG2_R15*Z;
}
else if (R == 13)
......@@ -120,14 +118,13 @@ static inline uint32_t nrLDPC_init(t_nrLDPC_dec_params* p_decParams, t_nrLDPC_lu
p_lut->bnPosBnProcBuf[8] = NULL;
p_lut->llr2llrProcBufAddr = llr2llrProcBufAddr_BG2_R13;
p_lut->llr2llrProcBufNumBn = llr2llrProcBufNumBn_BG2_R13;
p_lut->llr2llrProcBufNumEl = &llr2llrProcBufNumEl_BG2_R13;
p_lut->llr2llrProcBufBnPos = llr2llrProcBufBnPos_BG2_R13;
p_lut->numCnInCnGroups = lut_numCnInCnGroups_BG2_R13;
p_lut->numBnInBnGroups = lut_numBnInBnGroups_BG2_R13;
p_lut->startAddrBnGroups = lut_startAddrBnGroups_BG2_R13;
p_lut->startAddrBnGroupsLlr = lut_startAddrBnGroupsLlr_BG2_R13;
p_lut->numEdgesPerBn = lut_numEdgesPerBn_BG2_R13;
numLLR = NR_LDPC_NCOL_BG2_R13*Z;
}
else if (R == 23)
......@@ -153,14 +150,13 @@ static inline uint32_t nrLDPC_init(t_nrLDPC_dec_params* p_decParams, t_nrLDPC_lu
p_lut->bnPosBnProcBuf[8] = NULL;
p_lut->llr2llrProcBufAddr = llr2llrProcBufAddr_BG2_R23;
p_lut->llr2llrProcBufNumBn = llr2llrProcBufNumBn_BG2_R23;
p_lut->llr2llrProcBufNumEl = &llr2llrProcBufNumEl_BG2_R23;
p_lut->llr2llrProcBufBnPos = llr2llrProcBufBnPos_BG2_R23;
p_lut->numCnInCnGroups = lut_numCnInCnGroups_BG2_R23;
p_lut->numBnInBnGroups = lut_numBnInBnGroups_BG2_R23;
p_lut->startAddrBnGroups = lut_startAddrBnGroups_BG2_R23;
p_lut->startAddrBnGroupsLlr = lut_startAddrBnGroupsLlr_BG2_R23;
p_lut->numEdgesPerBn = lut_numEdgesPerBn_BG2_R23;
numLLR = NR_LDPC_NCOL_BG2_R23*Z;
}
......@@ -883,8 +879,7 @@ static inline uint32_t nrLDPC_init(t_nrLDPC_dec_params* p_decParams, t_nrLDPC_lu
p_lut->bnPosBnProcBuf[8] = (const uint8_t**) bnPosBnProcBuf_BG1_R13_CNG19;
p_lut->llr2llrProcBufAddr = llr2llrProcBufAddr_BG1_R13;
p_lut->llr2llrProcBufNumBn = llr2llrProcBufNumBn_BG1_R13;
p_lut->llr2llrProcBufNumEl = &llr2llrProcBufNumEl_BG1_R13;
p_lut->llr2llrProcBufBnPos = llr2llrProcBufBnPos_BG1_R13;
p_lut->numCnInCnGroups = lut_numCnInCnGroups_BG1_R13;
p_lut->numBnInBnGroups = lut_numBnInBnGroups_BG1_R13;
......@@ -916,8 +911,7 @@ static inline uint32_t nrLDPC_init(t_nrLDPC_dec_params* p_decParams, t_nrLDPC_lu
p_lut->bnPosBnProcBuf[8] = (const uint8_t**) bnPosBnProcBuf_BG1_R23_CNG19;
p_lut->llr2llrProcBufAddr = llr2llrProcBufAddr_BG1_R23;
p_lut->llr2llrProcBufNumBn = llr2llrProcBufNumBn_BG1_R23;
p_lut->llr2llrProcBufNumEl = &llr2llrProcBufNumEl_BG1_R23;
p_lut->llr2llrProcBufBnPos = llr2llrProcBufBnPos_BG1_R23;
p_lut->numCnInCnGroups = lut_numCnInCnGroups_BG1_R23;
p_lut->numBnInBnGroups = lut_numBnInBnGroups_BG1_R23;
......@@ -949,8 +943,7 @@ static inline uint32_t nrLDPC_init(t_nrLDPC_dec_params* p_decParams, t_nrLDPC_lu
p_lut->bnPosBnProcBuf[8] = (const uint8_t**) bnPosBnProcBuf_BG1_R89_CNG19;
p_lut->llr2llrProcBufAddr = llr2llrProcBufAddr_BG1_R89;
p_lut->llr2llrProcBufNumBn = llr2llrProcBufNumBn_BG1_R89;
p_lut->llr2llrProcBufNumEl = &llr2llrProcBufNumEl_BG1_R89;
p_lut->llr2llrProcBufBnPos = llr2llrProcBufBnPos_BG1_R89;
p_lut->numCnInCnGroups = lut_numCnInCnGroups_BG1_R89;
p_lut->numBnInBnGroups = lut_numBnInBnGroups_BG1_R89;
......
......@@ -951,31 +951,25 @@ static const uint8_t bnPosBnProcBuf_BG2_R23_CNG10[10][2] = {{0, 0},{0, 0},{1, 1}
// LUT for llr2llrProcBuf
// BG1
// R13
static const uint16_t llr2llrProcBufAddr_BG1_R13[24] = {25728,25344,17664,22272,20352,16128,19200,23424,19584,18048,23808,20736,24192,22656,21120,18432,21504,24960,18816,19968,23040,24576,16512,17280};
static const uint8_t llr2llrProcBufNumBn_BG1_R13[24] = {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,2,1};
static const uint8_t llr2llrProcBufNumEl_BG1_R13 = 24;
static const uint16_t llr2llrProcBufAddr_BG1_R13[26] = {25728,25344,17664,22272,20352,16128,19200,23424,19200,17664,23424,20736,23424,22272,20736,17664,20736,20736,24960,17664,19200,22272,23424,16512,16896,16896};
static const uint8_t llr2llrProcBufBnPos_BG1_R13[26] = {0,0,0,0,0,0,0,0,1,1,1,0,2,1,1,2,2,3,0,3,2,2,3,0,0,1};
// R23
static const uint16_t llr2llrProcBufAddr_BG1_R23[19] = {13056,12672,5760,6912,3840,6144,7680,4224,9600,8448,4608,11136,8832,11520,6528,9216,11904,4992,3456};
static const uint8_t llr2llrProcBufNumBn_BG1_R23[19] = {1,1,1,2,1,1,2,1,4,1,1,1,1,1,1,1,2,2,1};
static const uint8_t llr2llrProcBufNumEl_BG1_R23 = 19;
static const uint16_t llr2llrProcBufAddr_BG1_R23[26] = {13056,12672,5760,6912,6912,3840,5760,6912,6912,3840,9600,9600,9600,9600,6912,3840,9600,6912,9600,5760,6912,9600,9600,3840,3840,3456};
static const uint8_t llr2llrProcBufBnPos_BG1_R23[26] = {0,0,0,0,1,0,1,2,3,1,0,1,2,3,4,2,4,5,5,2,6,6,7,3,4,0};
// R89
static const uint16_t llr2llrProcBufAddr_BG1_R89[4] = {9984,9600,1536,384};
static const uint8_t llr2llrProcBufNumBn_BG1_R89[4] = {1,1,21,3};
static const uint8_t llr2llrProcBufNumEl_BG1_R89 = 4;
static const uint16_t llr2llrProcBufAddr_BG1_R89[26] = {9984,9600,1536,1536,1536,1536,1536,1536,1536,1536,1536,1536,1536,1536,1536,1536,1536,1536,1536,1536,1536,1536,1536,384,384,384};
static const uint8_t llr2llrProcBufBnPos_BG1_R89[26] = {0,0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,0,1,2};
// BG2
// R15
static const uint16_t llr2llrProcBufAddr_BG2_R15[13] = {19200,19584,17280,14592,18432,15744,18048,15360,16128,16512,18816,16896,17664};
static const uint8_t llr2llrProcBufNumBn_BG2_R15[13] = {1,1,1,2,1,1,1,1,1,1,1,1,1};
static const uint8_t llr2llrProcBufNumEl_BG2_R15 = 13;
static const uint16_t llr2llrProcBufAddr_BG2_R15[14] = {19200,19584,17280,14592,14592,18432,15744,18048,15360,16128,16512,18816,16512,17664};
static const uint8_t llr2llrProcBufBnPos_BG2_R15[14] = {0,0,0,0,1,0,0,0,0,0,0,0,1,0};
// R13
static const uint16_t llr2llrProcBufAddr_BG2_R13[10] = {11520,11904,6912,7296,8448,10752,9216,11136,8064,10368};
static const uint8_t llr2llrProcBufNumBn_BG2_R13[10] = {1,1,1,2,2,1,3,1,1,1};
static const uint8_t llr2llrProcBufNumEl_BG2_R13 = 10;
static const uint16_t llr2llrProcBufAddr_BG2_R13[14] = {11520,11904,6912,7296,7296,8448,8448,10752,8448,8448,8448,11136,8064,10368};
static const uint8_t llr2llrProcBufBnPos_BG2_R13[14] = {0,0,0,0,1,0,1,0,2,3,4,0,0,0};
// R23
static const uint16_t llr2llrProcBufAddr_BG2_R23[12] = {6144,5376,1152,2304,4224,3072,4608,3456,4992,3840,5760,1536};
static const uint8_t llr2llrProcBufNumBn_BG2_R23[12] = {1,1,1,2,1,1,1,1,1,1,1,2};
static const uint8_t llr2llrProcBufNumEl_BG2_R23 = 12;
static const uint16_t llr2llrProcBufAddr_BG2_R23[14] = {6144,5376,1152,2304,2304,4224,2304,4224,2304,4224,2304,5376,1152,1152};
static const uint8_t llr2llrProcBufBnPos_BG2_R23[14] = {0,0,0,0,1,0,2,1,3,2,4,1,1,2};
// LUT for llr2cnProcBuf
// LUTs are not rate dependent, however the number of CN in a CNG varies with rate
......
......@@ -84,19 +84,15 @@ static inline void nrLDPC_llr2llrProcBuf(t_nrLDPC_lut* p_lut, int8_t* llr, t_nrL
{
uint32_t i;
const uint8_t numBn2CnG1 = p_lut->numBnInBnGroups[0];
uint32_t colG1 = NR_LDPC_START_COL_PARITY_BG1*Z;
uint32_t startColParity = (BG ==1 ) ? (NR_LDPC_START_COL_PARITY_BG1) : (NR_LDPC_START_COL_PARITY_BG2);
const uint16_t* lut_llr2llrProcBufAddr = p_lut->llr2llrProcBufAddr;
const uint8_t* lut_llr2llrProcBufNumBn = p_lut->llr2llrProcBufNumBn;
const uint8_t* lut_llr2llrProcBufNumEl = p_lut->llr2llrProcBufNumEl;
uint32_t colG1 = startColParity*Z;
uint16_t numLlr = 0;
int8_t* llrProcBuf = p_procBuf->llrProcBuf;
const uint16_t* lut_llr2llrProcBufAddr = p_lut->llr2llrProcBufAddr;
const uint8_t* lut_llr2llrProcBufBnPos = p_lut->llr2llrProcBufBnPos;
if (BG == 2)
{
colG1 = NR_LDPC_START_COL_PARITY_BG2*Z;
}
uint32_t idxBn;
int8_t* llrProcBuf = p_procBuf->llrProcBuf;
// Copy LLRs connected to 1 CN
if (numBn2CnG1 > 0)
......@@ -105,11 +101,11 @@ static inline void nrLDPC_llr2llrProcBuf(t_nrLDPC_lut* p_lut, int8_t* llr, t_nrL
}
// First 2 columns might be set to zero directly if it's true they always belong to the groups with highest number of connected CNs...
for (i=0; i<(*lut_llr2llrProcBufNumEl); i++)
for (i=0; i<startColParity; i++)
{
numLlr = lut_llr2llrProcBufNumBn[i]*Z;
memcpy(&llrProcBuf[lut_llr2llrProcBufAddr[i]], llr, numLlr);
llr+=numLlr;
idxBn = lut_llr2llrProcBufAddr[i] + lut_llr2llrProcBufBnPos[i]*Z;
memcpy(&llrProcBuf[idxBn], llr, Z);
llr += Z;
}
}
......@@ -835,7 +831,7 @@ static inline void nrLDPC_bn2cnProcBuf_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
for (j=0; j<2; j++)
{
p_cnProcBuf = &cnProcBuf[lut_startAddrCnGroups[0] + j*bitOffsetInGroup];
for (i=0; i<lut_numCnInCnGroups[0]; i++)
{
idxBn = lut_startAddrBnProcBuf_CNG3[j][i] + lut_bnPosBnProcBuf_CNG3[j][i]*Z;
......@@ -927,7 +923,7 @@ static inline void nrLDPC_bn2cnProcBuf_BG2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf
nrLDPC_inv_circ_memcpy(p_cnProcBuf, &bnProcBufRes[idxBn], Z, lut_circShift_CNG10[j][i]);
p_cnProcBuf += Z;
}
}
}
}
/**
......@@ -1141,20 +1137,16 @@ static inline void nrLDPC_llrRes2llrOut(t_nrLDPC_lut* p_lut, int8_t* llrOut, t_n
{
uint32_t i;
const uint8_t numBn2CnG1 = p_lut->numBnInBnGroups[0];
uint32_t colG1 = NR_LDPC_START_COL_PARITY_BG1*Z;
uint32_t startColParity = (BG ==1 ) ? (NR_LDPC_START_COL_PARITY_BG1) : (NR_LDPC_START_COL_PARITY_BG2);
uint32_t colG1 = startColParity*Z;
const uint16_t* lut_llr2llrProcBufAddr = p_lut->llr2llrProcBufAddr;
const uint8_t* lut_llr2llrProcBufNumBn = p_lut->llr2llrProcBufNumBn;
const uint8_t* lut_llr2llrProcBufNumEl = p_lut->llr2llrProcBufNumEl;
const uint8_t* lut_llr2llrProcBufBnPos = p_lut->llr2llrProcBufBnPos;
uint16_t numLlr = 0;
int8_t* llrRes = p_procBuf->llrRes;
int8_t* p_llrOut = &llrOut[0];
if (BG == 2)
{
colG1 = NR_LDPC_START_COL_PARITY_BG2*Z;
}
uint32_t idxBn;
// Copy LLRs connected to 1 CN
if (numBn2CnG1 > 0)
......@@ -1162,13 +1154,14 @@ static inline void nrLDPC_llrRes2llrOut(t_nrLDPC_lut* p_lut, int8_t* llrOut, t_n
memcpy(&llrOut[colG1], llrRes, numBn2CnG1*Z);
}
// First 2 columns might be set to zero directly if it's true they always belong to the groups with highest number of connected CNs...
for (i=0; i<(*lut_llr2llrProcBufNumEl); i++)
for (i=0; i<startColParity; i++)
{
numLlr = lut_llr2llrProcBufNumBn[i]*Z;
memcpy(p_llrOut, &llrRes[lut_llr2llrProcBufAddr[i]], numLlr);
p_llrOut+=numLlr;
idxBn = lut_llr2llrProcBufAddr[i] + lut_llr2llrProcBufBnPos[i]*Z;
memcpy(p_llrOut, &llrRes[idxBn], Z);
p_llrOut += Z;
}
}
#endif
......@@ -46,16 +46,11 @@ typedef struct nrLDPC_lut {
const uint8_t* numBnInBnGroups; /**< Number of CNs in every BN group */
const uint32_t* startAddrBnGroups; /**< Start addresses for BN groups in BN processing buffer */
const uint16_t* startAddrBnGroupsLlr; /**< Start addresses for BN groups in LLR processing buffer */
const uint32_t* llr2CnProcBuf; /**< LUT for input LLRs to CN processing buffer */
const uint8_t* numEdgesPerBn; /**< LUT with number of edges per BN */
const uint32_t* cn2bnProcBuf; /**< LUT for transferring CN processing results to BN processing buffer */
const uint16_t* llr2llrProcBuf; /**< LUT for transferring input LLRs to LLR processing buffer */
const uint16_t** circShift[NR_LDPC_NUM_CN_GROUPS_BG1]; /**< LUT for circular shift values for all CN groups and Z's */
const uint32_t** startAddrBnProcBuf[NR_LDPC_NUM_CN_GROUPS_BG1]; /**< LUT for circular shift values for all CN groups and Z's */
const uint8_t** bnPosBnProcBuf[NR_LDPC_NUM_CN_GROUPS_BG1]; /**< LUT for circular shift values for all CN groups and Z's */
const uint16_t* llr2llrProcBufAddr; /**< LUT for transferring input LLRs to LLR processing buffer */
const uint8_t* llr2llrProcBufNumBn; /**< LUT for transferring input LLRs to LLR processing buffer */
const uint8_t* llr2llrProcBufNumEl; /**< LUT for transferring input LLRs to LLR processing buffer */
const uint8_t* llr2llrProcBufBnPos; /**< LUT for transferring input LLRs to LLR processing buffer */
const uint8_t** posBnInCnProcBuf[NR_LDPC_NUM_CN_GROUPS_BG1]; /**< LUT for llr2cnProcBuf */
} t_nrLDPC_lut;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment