Commit 6f351c3d authored by sebastian's avatar sebastian

Fixed bug in llr2CnProcBuf, started updating documentation

parent d2875d02
...@@ -542,9 +542,9 @@ In this section, the performance in terms of BLER and decoding latency of the cu ...@@ -542,9 +542,9 @@ In this section, the performance in terms of BLER and decoding latency of the cu
\subsection{BLER Performance} \subsection{BLER Performance}
\label{sec:bler-performance} \label{sec:bler-performance}
In all simulations, we assume AWGN, QPSK modulation and 8-bit input LLRs. The results are averaged over at least $10\,000$ channel realizations. In all simulations, we assume AWGN, QPSK modulation and 8-bit input LLRs, i.e. $-127$ until $+127$. The results are averaged over at least $10\,000$ channel realizations.
The first set of simulations in Figure \ref{fig:bler-bg2-15} compares the current LDPC decoder implementation to the reference implementation developed by Kien. This reference implementation is called \textit{LDPC Ref} and uses the min-sum algorithm with 2 layers and 16 bit for processing. Out current optimized decoder implementation is referred to as \textit{LDPC Opt}. Moreover, reference results provided by Huawei are also shown. The first set of simulations in Figure \ref{fig:bler-bg2-15} compares the current LDPC decoder implementation to the reference implementation developed by Kien. This reference implementation is called \textit{LDPC Ref} and uses the min-sum algorithm with 2 layers and 16 bit for processing. Our current optimized decoder implementation is referred to as \textit{LDPC Opt}. Moreover, reference results provided by Huawei are also shown.
\begin{figure}[ht] \begin{figure}[ht]
\centering \centering
...@@ -566,9 +566,10 @@ The first set of simulations in Figure \ref{fig:bler-bg2-15} compares the curren ...@@ -566,9 +566,10 @@ The first set of simulations in Figure \ref{fig:bler-bg2-15} compares the curren
% Kien's 2-layer 16bit code % Kien's 2-layer 16bit code
\addplot[red, solid, mark=o] plot coordinates { (-2.750000,0.915500) (-2.500000,0.576000) (-2.250000,0.165000) (-2.000000,0.017100) (-1.750000,0.000600) (-1.500000,0.000000) (-1.250000,0.000000) (-1.000000,0.000000)}; \addplot[red, solid, mark=o] plot coordinates { (-2.750000,0.915500) (-2.500000,0.576000) (-2.250000,0.165000) (-2.000000,0.017100) (-1.750000,0.000600) (-1.500000,0.000000) (-1.250000,0.000000) (-1.000000,0.000000)};
% LDPC opt with 16bit BN processing
\addplot[blue, solid, mark=square] plot coordinates { (-2.750000,0.998600) (-2.500000,0.953600) (-2.250000,0.718800) (-2.000000,0.299300) (-1.750000,0.053700) (-1.500000,0.005100) (-1.250000,0.000500) (-1.000000,0.000100)};
% 10 iterations
%\addplot[blue, solid, mark=square] plot coordinates { (-2.750000,0.998600) (-2.500000,0.953600) (-2.250000,0.718800) (-2.000000,0.299300) (-1.750000,0.053700) (-1.500000,0.005100) (-1.250000,0.000500) (-1.000000,0.000100)};
\addplot[blue, solid, mark=square] plot coordinates { (-2.750000,0.997200) (-2.500000,0.955000) (-2.250000,0.710900) (-2.000000,0.270400) (-1.750000,0.042400) (-1.500000,0.002200) (-1.250000,0.000000) (-1.000000,0.000000)};
% Matlab % Matlab
\addplot[green, solid, mark=triangle] plot coordinates {(-2.750000,0.318200) (-2.500000,0.135900) (-2.250000,0.102000) (-2.000000,0.092300) (-1.750000,0.079200) (-1.500000,0.063100) (-1.250000,0.041400) (-1.000000,0.029600) (-0.750000,0.017500) (-0.500000,0.011800) (-0.250000,0.006100) (0.000000,0.004100) (0.250000,0.002800) (0.500000,0.000800) (0.750000,0.000300) (1.000000,0.000400) }; \addplot[green, solid, mark=triangle] plot coordinates {(-2.750000,0.318200) (-2.500000,0.135900) (-2.250000,0.102000) (-2.000000,0.092300) (-1.750000,0.079200) (-1.500000,0.063100) (-1.250000,0.041400) (-1.000000,0.029600) (-0.750000,0.017500) (-0.500000,0.011800) (-0.250000,0.006100) (0.000000,0.004100) (0.250000,0.002800) (0.500000,0.000800) (0.750000,0.000300) (1.000000,0.000400) };
...@@ -577,13 +578,30 @@ The first set of simulations in Figure \ref{fig:bler-bg2-15} compares the curren ...@@ -577,13 +578,30 @@ The first set of simulations in Figure \ref{fig:bler-bg2-15} compares the curren
% 20 iterations % 20 iterations
\addplot[red, solid, mark=o] plot coordinates { (-2.750000,0.330300) (-2.500000,0.067800) (-2.250000,0.006000) (-2.000000,0.000100) (-1.750000,0.000000) (-1.500000,0.000000) (-1.250000,0.000000) (-1.000000,0.000000)}; \addplot[red, solid, mark=o] plot coordinates { (-2.750000,0.330300) (-2.500000,0.067800) (-2.250000,0.006000) (-2.000000,0.000100) (-1.750000,0.000000) (-1.500000,0.000000) (-1.250000,0.000000) (-1.000000,0.000000)};
%\addplot[blue, solid, mark=square] plot coordinates {(-2.750000,0.341300) (-2.500000,0.065100) (-2.250000,0.004100) (-2.000000,0.000200) (-1.750000,0.000100) (-1.500000,0.000000) (-1.250000,0.000000) (-1.000000,0.000000)};
\addplot[blue, dashed, mark=square] plot coordinates {(-2.750000,0.337900) (-2.500000,0.058300) (-2.250000,0.004000) (-2.000000,0.000200) (-1.750000,0.000000) (-1.500000,0.000000) };
% LDPC base
%(-2.750000,0.369000) (-2.500000,0.068200) (-2.250000,0.002900) (-2.000000,0.000200) (-1.750000,0.000000) (-1.500000,0.000000)
% New LDPC with llr2cn of old implementation
% (-2.750000,0.716900) (-2.500000,0.300500) (-2.250000,0.045300) (-2.000000,0.003300) (-1.750000,0.000000) (-1.500000,0.000000)
% saturation in both bnproc and bnprocpc
% (-2.750000,0.668900) (-2.500000,0.248800) (-2.250000,0.039100) (-2.000000,0.002100) (-1.750000,0.000100) (-1.500000,0.000000) };
% saturation with parity check
% (-2.750000,0.672100) (-2.500000,0.245100) (-2.250000,0.037100) (-2.000000,0.001900) (-1.750000,0.000000) (-1.500000,0.000000) };
%(-2.750000,0.343100) (-2.500000,0.064300) (-2.250000,0.003400) (-2.000000,0.000100) (-1.750000,0.000000) (-1.500000,0.000100) };
%(-2.750000,0.334400) (-2.500000,0.059400) (-2.250000,0.003400) (-2.000000,0.000000) (-1.750000,0.000100) (-1.500000,0.000000) };
\addplot[blue, solid, mark=square] plot coordinates {(-2.750000,0.341300) (-2.500000,0.065100) (-2.250000,0.004100) (-2.000000,0.000200) (-1.750000,0.000100) (-1.500000,0.000000) (-1.250000,0.000000) (-1.000000,0.000000)};
% 5 iterations % 5 iterations
\addplot[red, solid, mark=o] plot coordinates {(-1.250000,0.781300) (-1.000000,0.421000) (-0.750000,0.140400) (-0.500000,0.028900) (-0.250000,0.003300) (0.000000,0.000300) (0.250000,0.000000) (0.500000,0.000000)}; \addplot[red, solid, mark=o] plot coordinates {(-1.250000,0.781300) (-1.000000,0.421000) (-0.750000,0.140400) (-0.500000,0.028900) (-0.250000,0.003300) (0.000000,0.000300) (0.250000,0.000000) (0.500000,0.000000)};
\addplot[blue, solid, mark=square] plot coordinates {(-0.250000,0.705000) (0.000000,0.406200) (0.250000,0.181300) (0.500000,0.061600) (0.750000,0.015900) (1.000000,0.004900) (1.250000,0.000900) (1.500000,0.000200)}; %\addplot[blue, solid, mark=square] plot coordinates {(-0.250000,0.705000) (0.000000,0.406200) (0.250000,0.181300) (0.500000,0.061600) (0.750000,0.015900) (1.000000,0.004900) (1.250000,0.000900) (1.500000,0.000200)};
\addplot[green, solid, mark=triangle] plot coordinates {(-1.000000,0.778900) (-0.500000,0.226400) (0.000000,0.027400) (0.500000,0.002600) (1.000000,0.000300) }; \addplot[blue, solid, mark=square] plot coordinates {(-1.000000,0.700400) (-0.750000,0.370600) (-0.500000,0.136000) (-0.250000,0.039000) (0.000000,0.008500) (0.250000,0.002400) (0.500000,0.000700) (0.750000,0.000000) (1.000000,0.000000) };
\addplot[green, solid, mark=triangle] plot coordinates {(-1.000000,0.778900) (-0.500000,0.226400) (0.000000,0.027400) (0.500000,0.002600) (1.000000,0.000300) };
% Parity check 50 iterations
\addplot[blue, solid, mark=square] plot coordinates {(-2.750000,0.214600) (-2.500000,0.029200) (-2.250000,0.001500) (-2.000000,0.000100) (-1.750000,0.000000) (-1.500000,0.000000) };
% 30 iterations % 30 iterations
% \addplot[blue, dashed, mark=square] plot coordinates {(-3.000000,0.623800) (-2.750000,0.224100) (-2.500000,0.031600) (-2.250000,0.001100) (-2.000000,0.000000)}; % \addplot[blue, dashed, mark=square] plot coordinates {(-3.000000,0.623800) (-2.750000,0.224100) (-2.500000,0.031600) (-2.250000,0.001100) (-2.000000,0.000000)};
...@@ -707,17 +725,27 @@ The results in Table \ref{tab:lat-bg2-r15} show the impact of the number of iter ...@@ -707,17 +725,27 @@ The results in Table \ref{tab:lat-bg2-r15} show the impact of the number of iter
\toprule \toprule
\textbf{Function} & \textbf{Time [$\mu s$] (5 it)} & \textbf{Time [$\mu s$] (10 it)} & \textbf{Time [$\mu s$] (20 it)}\\ \textbf{Function} & \textbf{Time [$\mu s$] (5 it)} & \textbf{Time [$\mu s$] (10 it)} & \textbf{Time [$\mu s$] (20 it)}\\
\midrule \midrule
\texttt{llr2llrProcBuf} & 1.1 & 1.1 & 1.1 \\ % \texttt{llr2llrProcBuf} & 1.1 & 1.1 & 1.1 \\
\texttt{llr2CnProcBuf} & 12.4 & 12.0 & 12.0 \\ % \texttt{llr2CnProcBuf} & 12.4 & 12.0 & 12.0 \\
\texttt{cnProc} & 11.7 & 22.1 & 43.5 \\ % \texttt{cnProc} & 11.7 & 22.1 & 43.5 \\
\texttt{bnProcPc} & 6.6 & 12.1 & 23.8 \\ % \texttt{bnProcPc} & 6.6 & 12.1 & 23.8 \\
\texttt{bnProc} & 4.2 & 8.1 & 16.2 \\ % \texttt{bnProc} & 4.2 & 8.1 & 16.2 \\
\texttt{cn2bnProcBuf} & 61.3 & 118.3 & 234.9 \\ % \texttt{cn2bnProcBuf} & 61.3 & 118.3 & 234.9 \\
\texttt{bn2cnProcBuf} & 38.1 & 82.5 & 172.3 \\ % \texttt{bn2cnProcBuf} & 38.1 & 82.5 & 172.3 \\
\texttt{llrRes2llrOut} & 3.5 & 3.4 & 3.4 \\ % \texttt{llrRes2llrOut} & 3.5 & 3.4 & 3.4 \\
\texttt{llr2bit} & 0.2 & 0.1 & 0.1 \\ % \texttt{llr2bit} & 0.2 & 0.1 & 0.1 \\
\texttt{llr2llrProcBuf} & 0.5 & 0.5 & 0.5 \\
\texttt{llr2CnProcBuf} & 5.0 & 4.8 & 4.9 \\
\texttt{cnProc} & 12.4 & 23.0 & 42.7 \\
\texttt{bnProcPc} & 8.4 & 14.8 & 27.0 \\
\texttt{bnProc} & 5.5 & 10.1 & 19.0 \\
\texttt{cn2bnProcBuf} & 14.9 & 24.4 & 44.0 \\
\texttt{bn2cnProcBuf} & 10.5 & 17.8 & 31.8 \\
\texttt{llrRes2llrOut} & 0.3 & 0.3 & 0.3 \\
\texttt{llr2bit} & 0.2 & 0.2 & 0.2 \\
\midrule \midrule
\textbf{Total} & \textbf{139.4} & \textbf{260.3} & \textbf{508.4} \\ % \textbf{Total} & \textbf{139.4} & \textbf{260.3} & \textbf{508.4} \\
\textbf{Total} & \textbf{58.5} & \textbf{97.1} & \textbf{172.6} \\
\bottomrule \bottomrule
\end{tabular} \end{tabular}
\caption{BG2, Z=128, R=1/5, B=1280, LDPC Opt} \caption{BG2, Z=128, R=1/5, B=1280, LDPC Opt}
...@@ -732,17 +760,27 @@ Table \ref{tab:lat-bg2-i5} shows the impact of the code rate on the latency for ...@@ -732,17 +760,27 @@ Table \ref{tab:lat-bg2-i5} shows the impact of the code rate on the latency for
\toprule \toprule
\textbf{Function} & \textbf{Time [$\mu s$] (R=1/5)} & \textbf{Time [$\mu s$] (R=1/3)} & \textbf{Time [$\mu s$] (R=2/3)}\\ \textbf{Function} & \textbf{Time [$\mu s$] (R=1/5)} & \textbf{Time [$\mu s$] (R=1/3)} & \textbf{Time [$\mu s$] (R=2/3)}\\
\midrule \midrule
\texttt{llr2llrProcBuf} & 3.2 & 2.9 & 2.6 \\ % \texttt{llr2llrProcBuf} & 3.2 & 2.9 & 2.6 \\
\texttt{llr2CnProcBuf} & 36.5 & 25.4 & 14.8 \\ % \texttt{llr2CnProcBuf} & 36.5 & 25.4 & 14.8 \\
\texttt{cnProc} & 33.6 & 25.2 & 13.3 \\ % \texttt{cnProc} & 33.6 & 25.2 & 13.3 \\
\texttt{bnProcPc} & 17.6 & 10.2 & 4.5 \\ % \texttt{bnProcPc} & 17.6 & 10.2 & 4.5 \\
\texttt{bnProc} & 8.5 & 5.4 & 2.5 \\ % \texttt{bnProc} & 8.5 & 5.4 & 2.5 \\
\texttt{cn2bnProcBuf} & 175.3 & 110.6 & 50.7 \\ % \texttt{cn2bnProcBuf} & 175.3 & 110.6 & 50.7 \\
\texttt{bn2cnProcBuf} & 106.6 & 71.2 & 36.1 \\ % \texttt{bn2cnProcBuf} & 106.6 & 71.2 & 36.1 \\
\texttt{llrRes2llrOut} & 10.2 & 6.3 & 3.3 \\ % \texttt{llrRes2llrOut} & 10.2 & 6.3 & 3.3 \\
\texttt{llr2bit} & 0.4 & 0.2 & 0.1 \\ % \texttt{llr2bit} & 0.4 & 0.2 & 0.1 \\
\texttt{llr2llrProcBuf} & 1.5 & 0.9 & 0.5 \\
\texttt{llr2CnProcBuf} & 6.0 & 4.1 & 2.2 \\
\texttt{cnProc} & 32.2 & 23.7 & 14.4 \\
\texttt{bnProcPc} & 21.2 & 12.1 & 5.5 \\
\texttt{bnProc} & 9.8 & 5.9 & 2.9 \\
\texttt{cn2bnProcBuf} & 23.3 & 13.9 & 6.8 \\
\texttt{bn2cnProcBuf} & 14.8 & 9.7 & 5.0 \\
\texttt{llrRes2llrOut} & 0.6 & 0.4 & 0.3 \\
\texttt{llr2bit} & 0.7 & 0.4 & 0.2 \\
\midrule \midrule
\textbf{Total} & \textbf{392.4} & \textbf{258.0} & \textbf{128.2} \\ % \textbf{Total} & \textbf{392.4} & \textbf{258.0} & \textbf{128.2} \\
\textbf{Total} & \textbf{111.0} & \textbf{71.8} & \textbf{38.5} \\
\bottomrule \bottomrule
\end{tabular} \end{tabular}
\caption{BG2, Z=384, B=3840, LDPC Opt, 5 iterations} \caption{BG2, Z=384, B=3840, LDPC Opt, 5 iterations}
...@@ -757,17 +795,27 @@ Table \ref{tab:lat-bg1-i5} shows the results for BG1, larges block size and diff ...@@ -757,17 +795,27 @@ Table \ref{tab:lat-bg1-i5} shows the results for BG1, larges block size and diff
\toprule \toprule
\textbf{Function} & \textbf{Time [$\mu s$] (R=1/3)} & \textbf{Time [$\mu s$] (R=2/3)} & \textbf{Time [$\mu s$] (R=8/9)}\\ \textbf{Function} & \textbf{Time [$\mu s$] (R=1/3)} & \textbf{Time [$\mu s$] (R=2/3)} & \textbf{Time [$\mu s$] (R=8/9)}\\
\midrule \midrule
\texttt{llr2llrProcBuf} & 5.5 & 4.9 & 4.6 \\ % \texttt{llr2llrProcBuf} & 5.5 & 4.9 & 4.6 \\
\texttt{llr2CnProcBuf} & 60.6 & 34.1 & 24.4 \\ % \texttt{llr2CnProcBuf} & 60.6 & 34.1 & 24.4 \\
\texttt{cnProc} & 102.0 & 74.1 & 56.0 \\ % \texttt{cnProc} & 102.0 & 74.1 & 56.0 \\
\texttt{bnProcPc} & 26.0 & 11.0 & 6.4 \\ % \texttt{bnProcPc} & 26.0 & 11.0 & 6.4 \\
\texttt{bnProc} & 15.7 & 7.4 & 4.5 \\ % \texttt{bnProc} & 15.7 & 7.4 & 4.5 \\
\texttt{cn2bnProcBuf} & 291.0 & 140.8 & 83.1 \\ % \texttt{cn2bnProcBuf} & 291.0 & 140.8 & 83.1 \\
\texttt{bn2cnProcBuf} & 193.6 & 100.5 & 63.0 \\ % \texttt{bn2cnProcBuf} & 193.6 & 100.5 & 63.0 \\
\texttt{llrRes2llrOut} & 13.3 & 6.9 & 5.2 \\ % \texttt{llrRes2llrOut} & 13.3 & 6.9 & 5.2 \\
\texttt{llr2bit} & 0.4 & 0.2 & 0.2 \\ % \texttt{llr2bit} & 0.4 & 0.2 & 0.2 \\
\texttt{llr2llrProcBuf} & 2.1 & 1.2 & 0.9 \\
\texttt{llr2CnProcBuf} & 10.6 & 5.4 & 2.9 \\
\texttt{cnProc} & 89.8 & 66.3 & 50.0 \\
\texttt{bnProcPc} & 28.1 & 12.4 & 7.1 \\
\texttt{bnProc} & 17.1 & 8.1 & 4.8 \\
\texttt{cn2bnProcBuf} & 38.7 & 17.1 & 9.3 \\
\texttt{bn2cnProcBuf} & 25.6 & 12.7 & 7.2 \\
\texttt{llrRes2llrOut} & 0.8 & 0.4 & 0.3 \\
\texttt{llr2bit} & 0.9 & 0.4 & 0.3 \\
\midrule \midrule
\textbf{Total} & \textbf{708.9} & \textbf{380.6} & \textbf{248.1}\\ % \textbf{Total} & \textbf{708.9} & \textbf{380.6} & \textbf{248.1}\\
\textbf{Total} & \textbf{214.6} & \textbf{124.6} & \textbf{83.6}\\
\bottomrule \bottomrule
\end{tabular} \end{tabular}
\caption{BG1, Z=384, B=8448, LDPC Opt, 5 iterations} \caption{BG1, Z=384, B=8448, LDPC Opt, 5 iterations}
......
...@@ -99,6 +99,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc ...@@ -99,6 +99,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8 // Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1); ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8); *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
...@@ -152,6 +154,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc ...@@ -152,6 +154,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8 // Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1); ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8); *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
...@@ -206,6 +210,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc ...@@ -206,6 +210,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8 // Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1); ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8); *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
...@@ -260,6 +266,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc ...@@ -260,6 +266,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8 // Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1); ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8); *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
...@@ -314,6 +322,15 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc ...@@ -314,6 +322,15 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8 // Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1); ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
//mexPrintf("ymm0: ");
//nrLDPC_debug_print256i_epi8(&ymm0);
////ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
//mexPrintf("\n");
//mexPrintf("ymmX: ");
//nrLDPC_debug_print256i_epi8(&ymm0);
//mexPrintf("\n");
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8); *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
...@@ -368,6 +385,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc ...@@ -368,6 +385,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8 // Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1); ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8); *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
...@@ -422,6 +441,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc ...@@ -422,6 +441,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8 // Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1); ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8); *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
...@@ -476,6 +497,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc ...@@ -476,6 +497,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8 // Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1); ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8); *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
...@@ -530,6 +553,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc ...@@ -530,6 +553,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8 // Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1); ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8); *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
...@@ -584,6 +609,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc ...@@ -584,6 +609,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8 // Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1); ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8); *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
...@@ -638,6 +665,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc ...@@ -638,6 +665,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8 // Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1); ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8); *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
...@@ -692,6 +721,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc ...@@ -692,6 +721,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8 // Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1); ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8); *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
...@@ -746,6 +777,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc ...@@ -746,6 +777,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8 // Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1); ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8); *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
...@@ -800,6 +833,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc ...@@ -800,6 +833,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8 // Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1); ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8); *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
...@@ -854,6 +889,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc ...@@ -854,6 +889,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8 // Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1); ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8); *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
...@@ -908,6 +945,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc ...@@ -908,6 +945,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8 // Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1); ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8); *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
...@@ -962,6 +1001,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc ...@@ -962,6 +1001,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8 // Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1); ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8); *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
...@@ -1016,6 +1057,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc ...@@ -1016,6 +1057,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8 // Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1); ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8); *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
...@@ -1070,6 +1113,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc ...@@ -1070,6 +1113,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8 // Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1); ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8); *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
...@@ -1124,6 +1169,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc ...@@ -1124,6 +1169,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8 // Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1); ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8); *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
...@@ -1178,6 +1225,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc ...@@ -1178,6 +1225,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8 // Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1); ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8); *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
...@@ -1232,6 +1281,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc ...@@ -1232,6 +1281,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8 // Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1); ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8); *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
...@@ -1286,6 +1337,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc ...@@ -1286,6 +1337,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8 // Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1); ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8); *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
...@@ -1340,6 +1393,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc ...@@ -1340,6 +1393,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8 // Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1); ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8); *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
...@@ -1394,6 +1449,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc ...@@ -1394,6 +1449,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8 // Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1); ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8); *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
...@@ -1448,6 +1505,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc ...@@ -1448,6 +1505,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8 // Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1); ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8); *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
...@@ -1502,6 +1561,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc ...@@ -1502,6 +1561,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8 // Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1); ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8); *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
...@@ -1556,6 +1617,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc ...@@ -1556,6 +1617,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8 // Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1); ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8); *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
...@@ -1610,6 +1673,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc ...@@ -1610,6 +1673,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8 // Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1); ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8); *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
...@@ -1664,6 +1729,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc ...@@ -1664,6 +1729,8 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_proc
// Pack results back to epi8 // Pack results back to epi8
ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1); ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
// Limit to minLLR -127
//ymm0 = _mm256_max_epi8(ymm0, *p_minLLR);
// ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
// p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
*p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8); *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
...@@ -1708,6 +1775,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu ...@@ -1708,6 +1775,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
uint32_t cnOffsetInGroup; uint32_t cnOffsetInGroup;
uint8_t idxBnGroup = 0; uint8_t idxBnGroup = 0;
const __m256i* p_minLLR = (__m256i*) minLLR256_epi8;
// ===================================================================== // =====================================================================
// Process group with 1 CN // Process group with 1 CN
// Already done in bnProcBufPc // Already done in bnProcBufPc
...@@ -1740,7 +1809,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu ...@@ -1740,7 +1809,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++) for (i=0; i<M; i++)
{ {
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]); *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++; p_res++;
p_llrRes++; p_llrRes++;
} }
...@@ -1775,7 +1845,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu ...@@ -1775,7 +1845,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++) for (i=0; i<M; i++)
{ {
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]); *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++; p_res++;
p_llrRes++; p_llrRes++;
} }
...@@ -1810,7 +1881,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu ...@@ -1810,7 +1881,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++) for (i=0; i<M; i++)
{ {
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]); *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++; p_res++;
p_llrRes++; p_llrRes++;
} }
...@@ -1845,7 +1917,17 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu ...@@ -1845,7 +1917,17 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++) for (i=0; i<M; i++)
{ {
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]); *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
/*
mexPrintf("res: ");
nrLDPC_debug_print256i_epi8(p_res);
mexPrintf(" llrRes: ");
nrLDPC_debug_print256i_epi8(p_llrRes);
mexPrintf(" bnProcBuf: ");
nrLDPC_debug_print256i_epi8(&p_bnProcBuf[k*cnOffsetInGroup + i]);
mexPrintf("\n");
*/
p_res++; p_res++;
p_llrRes++; p_llrRes++;
} }
...@@ -1880,6 +1962,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu ...@@ -1880,6 +1962,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++) for (i=0; i<M; i++)
{ {
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]); *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++; p_res++;
p_llrRes++; p_llrRes++;
...@@ -1915,6 +1999,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu ...@@ -1915,6 +1999,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++) for (i=0; i<M; i++)
{ {
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]); *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++; p_res++;
p_llrRes++; p_llrRes++;
...@@ -1950,6 +2036,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu ...@@ -1950,6 +2036,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++) for (i=0; i<M; i++)
{ {
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]); *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++; p_res++;
p_llrRes++; p_llrRes++;
...@@ -1985,6 +2073,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu ...@@ -1985,6 +2073,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++) for (i=0; i<M; i++)
{ {
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]); *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++; p_res++;
p_llrRes++; p_llrRes++;
...@@ -2020,6 +2110,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu ...@@ -2020,6 +2110,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++) for (i=0; i<M; i++)
{ {
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]); *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++; p_res++;
p_llrRes++; p_llrRes++;
...@@ -2055,6 +2147,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu ...@@ -2055,6 +2147,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++) for (i=0; i<M; i++)
{ {
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]); *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++; p_res++;
p_llrRes++; p_llrRes++;
...@@ -2090,6 +2184,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu ...@@ -2090,6 +2184,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++) for (i=0; i<M; i++)
{ {
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]); *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++; p_res++;
p_llrRes++; p_llrRes++;
...@@ -2125,6 +2221,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu ...@@ -2125,6 +2221,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++) for (i=0; i<M; i++)
{ {
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]); *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++; p_res++;
p_llrRes++; p_llrRes++;
...@@ -2160,6 +2258,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu ...@@ -2160,6 +2258,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++) for (i=0; i<M; i++)
{ {
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]); *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++; p_res++;
p_llrRes++; p_llrRes++;
...@@ -2195,6 +2295,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu ...@@ -2195,6 +2295,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++) for (i=0; i<M; i++)
{ {
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]); *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++; p_res++;
p_llrRes++; p_llrRes++;
...@@ -2230,6 +2332,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu ...@@ -2230,6 +2332,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++) for (i=0; i<M; i++)
{ {
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]); *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++; p_res++;
p_llrRes++; p_llrRes++;
...@@ -2265,6 +2369,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu ...@@ -2265,6 +2369,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++) for (i=0; i<M; i++)
{ {
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]); *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++; p_res++;
p_llrRes++; p_llrRes++;
...@@ -2300,6 +2406,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu ...@@ -2300,6 +2406,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++) for (i=0; i<M; i++)
{ {
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]); *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++; p_res++;
p_llrRes++; p_llrRes++;
...@@ -2335,6 +2443,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu ...@@ -2335,6 +2443,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++) for (i=0; i<M; i++)
{ {
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]); *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++; p_res++;
p_llrRes++; p_llrRes++;
...@@ -2370,6 +2480,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu ...@@ -2370,6 +2480,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++) for (i=0; i<M; i++)
{ {
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]); *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++; p_res++;
p_llrRes++; p_llrRes++;
...@@ -2405,6 +2517,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu ...@@ -2405,6 +2517,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++) for (i=0; i<M; i++)
{ {
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]); *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++; p_res++;
p_llrRes++; p_llrRes++;
...@@ -2440,6 +2554,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu ...@@ -2440,6 +2554,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++) for (i=0; i<M; i++)
{ {
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]); *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++; p_res++;
p_llrRes++; p_llrRes++;
...@@ -2475,6 +2591,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu ...@@ -2475,6 +2591,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++) for (i=0; i<M; i++)
{ {
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]); *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++; p_res++;
p_llrRes++; p_llrRes++;
...@@ -2510,6 +2628,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu ...@@ -2510,6 +2628,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++) for (i=0; i<M; i++)
{ {
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]); *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++; p_res++;
p_llrRes++; p_llrRes++;
...@@ -2545,6 +2665,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu ...@@ -2545,6 +2665,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++) for (i=0; i<M; i++)
{ {
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]); *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++; p_res++;
p_llrRes++; p_llrRes++;
...@@ -2580,6 +2702,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu ...@@ -2580,6 +2702,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++) for (i=0; i<M; i++)
{ {
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]); *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++; p_res++;
p_llrRes++; p_llrRes++;
...@@ -2615,6 +2739,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu ...@@ -2615,6 +2739,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++) for (i=0; i<M; i++)
{ {
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]); *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++; p_res++;
p_llrRes++; p_llrRes++;
...@@ -2650,6 +2776,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu ...@@ -2650,6 +2776,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++) for (i=0; i<M; i++)
{ {
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]); *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++; p_res++;
p_llrRes++; p_llrRes++;
...@@ -2685,6 +2813,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu ...@@ -2685,6 +2813,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++) for (i=0; i<M; i++)
{ {
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]); *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++; p_res++;
p_llrRes++; p_llrRes++;
...@@ -2720,6 +2850,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu ...@@ -2720,6 +2850,8 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBu
for (i=0; i<M; i++) for (i=0; i<M; i++)
{ {
*p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]); *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
// Limit to minLLR -127
*p_res = _mm256_max_epi8(*p_res, *p_minLLR);
p_res++; p_res++;
p_llrRes++; p_llrRes++;
......
...@@ -38,7 +38,7 @@ ...@@ -38,7 +38,7 @@
#include "nrLDPC_cnProc.h" #include "nrLDPC_cnProc.h"
#include "nrLDPC_bnProc.h" #include "nrLDPC_bnProc.h"
#define NR_LDPC_ENABLE_PARITY_CHECK //#define NR_LDPC_ENABLE_PARITY_CHECK
//#define NR_LDPC_PROFILER_DETAIL //#define NR_LDPC_PROFILER_DETAIL
#ifdef NR_LDPC_DEBUG_MODE #ifdef NR_LDPC_DEBUG_MODE
...@@ -96,6 +96,8 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP ...@@ -96,6 +96,8 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP
{ {
// Use LLR processing buffer as temporary output buffer // Use LLR processing buffer as temporary output buffer
p_llrOut = p_procBuf->llrProcBuf; p_llrOut = p_procBuf->llrProcBuf;
// Clear llrProcBuf
memset(p_llrOut,0, NR_LDPC_MAX_NUM_LLR*sizeof(int8_t));
} }
...@@ -232,7 +234,6 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP ...@@ -232,7 +234,6 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP
while ( (i < (numMaxIter-1)) && (pcRes != 0) ) while ( (i < (numMaxIter-1)) && (pcRes != 0) )
{ {
//mexPrintf("Start Main Loop: i=%d, numMaxIter=%d, pcRes = %d\n",i,numMaxIter,pcRes);
// Increase iteration counter // Increase iteration counter
i++; i++;
...@@ -334,12 +335,10 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP ...@@ -334,12 +335,10 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP
{ {
pcRes = nrLDPC_cnProcPc_BG2(p_lut, p_procBuf, Z); pcRes = nrLDPC_cnProcPc_BG2(p_lut, p_procBuf, Z);
} }
//mexPrintf("End Main Loop: Iter: i=%d, pcRes=%d\n",i,pcRes);
#ifdef NR_LDPC_PROFILER_DETAIL #ifdef NR_LDPC_PROFILER_DETAIL
stop_meas(&p_profiler->cnProcPc); stop_meas(&p_profiler->cnProcPc);
#endif #endif
#endif #endif
} }
// Last iteration // Last iteration
...@@ -347,7 +346,6 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP ...@@ -347,7 +346,6 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP
{ {
// Increase iteration counter // Increase iteration counter
i++; i++;
//mexPrintf("Start Last Iter: i=%d, numMaxIter=%d, pcRes = %d\n",i,numMaxIter,pcRes);
// CN processing // CN processing
#ifdef NR_LDPC_PROFILER_DETAIL #ifdef NR_LDPC_PROFILER_DETAIL
...@@ -458,11 +456,13 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP ...@@ -458,11 +456,13 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP
// If maximum number of iterations reached an PC still fails increase number of iterations // If maximum number of iterations reached an PC still fails increase number of iterations
// Thus, i > numMaxIter indicates that PC has failed // Thus, i > numMaxIter indicates that PC has failed
//mexPrintf("End: Iter: i=%d, pcRes=%d\n",i,pcRes);
#ifdef NR_LDPC_ENABLE_PARITY_CHECK
if (pcRes != 0) if (pcRes != 0)
{ {
i++; i++;
} }
#endif
// Assign results from processing buffer to output // Assign results from processing buffer to output
#ifdef NR_LDPC_PROFILER_DETAIL #ifdef NR_LDPC_PROFILER_DETAIL
......
...@@ -197,5 +197,6 @@ static const int8_t ones256_epi8[32] __attribute__ ((aligned(32))) = {1,1,1,1,1, ...@@ -197,5 +197,6 @@ static const int8_t ones256_epi8[32] __attribute__ ((aligned(32))) = {1,1,1,1,1,
static const int8_t zeros256_epi8[32] __attribute__ ((aligned(32))) = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; static const int8_t zeros256_epi8[32] __attribute__ ((aligned(32))) = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
/** Vector of 32 '127' in int8 for application with AVX2 */ /** Vector of 32 '127' in int8 for application with AVX2 */
static const int8_t maxLLR256_epi8[32] __attribute__ ((aligned(32))) = {127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127}; static const int8_t maxLLR256_epi8[32] __attribute__ ((aligned(32))) = {127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127};
static const int8_t minLLR256_epi8[32] __attribute__ ((aligned(32))) = {-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127};
#endif #endif
...@@ -86,15 +86,13 @@ static inline uint32_t nrLDPC_init(t_nrLDPC_dec_params* p_decParams, t_nrLDPC_lu ...@@ -86,15 +86,13 @@ static inline uint32_t nrLDPC_init(t_nrLDPC_dec_params* p_decParams, t_nrLDPC_lu
p_lut->bnPosBnProcBuf[8] = NULL; p_lut->bnPosBnProcBuf[8] = NULL;
p_lut->llr2llrProcBufAddr = llr2llrProcBufAddr_BG2_R15; p_lut->llr2llrProcBufAddr = llr2llrProcBufAddr_BG2_R15;
p_lut->llr2llrProcBufNumBn = llr2llrProcBufNumBn_BG2_R15; p_lut->llr2llrProcBufBnPos = llr2llrProcBufBnPos_BG2_R15;
p_lut->llr2llrProcBufNumEl = &llr2llrProcBufNumEl_BG2_R15;
p_lut->numCnInCnGroups = lut_numCnInCnGroups_BG2_R15; p_lut->numCnInCnGroups = lut_numCnInCnGroups_BG2_R15;
p_lut->numBnInBnGroups = lut_numBnInBnGroups_BG2_R15; p_lut->numBnInBnGroups = lut_numBnInBnGroups_BG2_R15;
p_lut->startAddrBnGroups = lut_startAddrBnGroups_BG2_R15; p_lut->startAddrBnGroups = lut_startAddrBnGroups_BG2_R15;
p_lut->startAddrBnGroupsLlr = lut_startAddrBnGroupsLlr_BG2_R15; p_lut->startAddrBnGroupsLlr = lut_startAddrBnGroupsLlr_BG2_R15;
p_lut->numEdgesPerBn = lut_numEdgesPerBn_BG2_R15;
numLLR = NR_LDPC_NCOL_BG2_R15*Z; numLLR = NR_LDPC_NCOL_BG2_R15*Z;
} }
else if (R == 13) else if (R == 13)
...@@ -120,14 +118,13 @@ static inline uint32_t nrLDPC_init(t_nrLDPC_dec_params* p_decParams, t_nrLDPC_lu ...@@ -120,14 +118,13 @@ static inline uint32_t nrLDPC_init(t_nrLDPC_dec_params* p_decParams, t_nrLDPC_lu
p_lut->bnPosBnProcBuf[8] = NULL; p_lut->bnPosBnProcBuf[8] = NULL;
p_lut->llr2llrProcBufAddr = llr2llrProcBufAddr_BG2_R13; p_lut->llr2llrProcBufAddr = llr2llrProcBufAddr_BG2_R13;
p_lut->llr2llrProcBufNumBn = llr2llrProcBufNumBn_BG2_R13; p_lut->llr2llrProcBufBnPos = llr2llrProcBufBnPos_BG2_R13;
p_lut->llr2llrProcBufNumEl = &llr2llrProcBufNumEl_BG2_R13;
p_lut->numCnInCnGroups = lut_numCnInCnGroups_BG2_R13; p_lut->numCnInCnGroups = lut_numCnInCnGroups_BG2_R13;
p_lut->numBnInBnGroups = lut_numBnInBnGroups_BG2_R13; p_lut->numBnInBnGroups = lut_numBnInBnGroups_BG2_R13;
p_lut->startAddrBnGroups = lut_startAddrBnGroups_BG2_R13; p_lut->startAddrBnGroups = lut_startAddrBnGroups_BG2_R13;
p_lut->startAddrBnGroupsLlr = lut_startAddrBnGroupsLlr_BG2_R13; p_lut->startAddrBnGroupsLlr = lut_startAddrBnGroupsLlr_BG2_R13;
p_lut->numEdgesPerBn = lut_numEdgesPerBn_BG2_R13;
numLLR = NR_LDPC_NCOL_BG2_R13*Z; numLLR = NR_LDPC_NCOL_BG2_R13*Z;
} }
else if (R == 23) else if (R == 23)
...@@ -153,14 +150,13 @@ static inline uint32_t nrLDPC_init(t_nrLDPC_dec_params* p_decParams, t_nrLDPC_lu ...@@ -153,14 +150,13 @@ static inline uint32_t nrLDPC_init(t_nrLDPC_dec_params* p_decParams, t_nrLDPC_lu
p_lut->bnPosBnProcBuf[8] = NULL; p_lut->bnPosBnProcBuf[8] = NULL;
p_lut->llr2llrProcBufAddr = llr2llrProcBufAddr_BG2_R23; p_lut->llr2llrProcBufAddr = llr2llrProcBufAddr_BG2_R23;
p_lut->llr2llrProcBufNumBn = llr2llrProcBufNumBn_BG2_R23; p_lut->llr2llrProcBufBnPos = llr2llrProcBufBnPos_BG2_R23;
p_lut->llr2llrProcBufNumEl = &llr2llrProcBufNumEl_BG2_R23;
p_lut->numCnInCnGroups = lut_numCnInCnGroups_BG2_R23; p_lut->numCnInCnGroups = lut_numCnInCnGroups_BG2_R23;
p_lut->numBnInBnGroups = lut_numBnInBnGroups_BG2_R23; p_lut->numBnInBnGroups = lut_numBnInBnGroups_BG2_R23;
p_lut->startAddrBnGroups = lut_startAddrBnGroups_BG2_R23; p_lut->startAddrBnGroups = lut_startAddrBnGroups_BG2_R23;
p_lut->startAddrBnGroupsLlr = lut_startAddrBnGroupsLlr_BG2_R23; p_lut->startAddrBnGroupsLlr = lut_startAddrBnGroupsLlr_BG2_R23;
p_lut->numEdgesPerBn = lut_numEdgesPerBn_BG2_R23;
numLLR = NR_LDPC_NCOL_BG2_R23*Z; numLLR = NR_LDPC_NCOL_BG2_R23*Z;
} }
...@@ -883,8 +879,7 @@ static inline uint32_t nrLDPC_init(t_nrLDPC_dec_params* p_decParams, t_nrLDPC_lu ...@@ -883,8 +879,7 @@ static inline uint32_t nrLDPC_init(t_nrLDPC_dec_params* p_decParams, t_nrLDPC_lu
p_lut->bnPosBnProcBuf[8] = (const uint8_t**) bnPosBnProcBuf_BG1_R13_CNG19; p_lut->bnPosBnProcBuf[8] = (const uint8_t**) bnPosBnProcBuf_BG1_R13_CNG19;
p_lut->llr2llrProcBufAddr = llr2llrProcBufAddr_BG1_R13; p_lut->llr2llrProcBufAddr = llr2llrProcBufAddr_BG1_R13;
p_lut->llr2llrProcBufNumBn = llr2llrProcBufNumBn_BG1_R13; p_lut->llr2llrProcBufBnPos = llr2llrProcBufBnPos_BG1_R13;
p_lut->llr2llrProcBufNumEl = &llr2llrProcBufNumEl_BG1_R13;
p_lut->numCnInCnGroups = lut_numCnInCnGroups_BG1_R13; p_lut->numCnInCnGroups = lut_numCnInCnGroups_BG1_R13;
p_lut->numBnInBnGroups = lut_numBnInBnGroups_BG1_R13; p_lut->numBnInBnGroups = lut_numBnInBnGroups_BG1_R13;
...@@ -916,8 +911,7 @@ static inline uint32_t nrLDPC_init(t_nrLDPC_dec_params* p_decParams, t_nrLDPC_lu ...@@ -916,8 +911,7 @@ static inline uint32_t nrLDPC_init(t_nrLDPC_dec_params* p_decParams, t_nrLDPC_lu
p_lut->bnPosBnProcBuf[8] = (const uint8_t**) bnPosBnProcBuf_BG1_R23_CNG19; p_lut->bnPosBnProcBuf[8] = (const uint8_t**) bnPosBnProcBuf_BG1_R23_CNG19;
p_lut->llr2llrProcBufAddr = llr2llrProcBufAddr_BG1_R23; p_lut->llr2llrProcBufAddr = llr2llrProcBufAddr_BG1_R23;
p_lut->llr2llrProcBufNumBn = llr2llrProcBufNumBn_BG1_R23; p_lut->llr2llrProcBufBnPos = llr2llrProcBufBnPos_BG1_R23;
p_lut->llr2llrProcBufNumEl = &llr2llrProcBufNumEl_BG1_R23;
p_lut->numCnInCnGroups = lut_numCnInCnGroups_BG1_R23; p_lut->numCnInCnGroups = lut_numCnInCnGroups_BG1_R23;
p_lut->numBnInBnGroups = lut_numBnInBnGroups_BG1_R23; p_lut->numBnInBnGroups = lut_numBnInBnGroups_BG1_R23;
...@@ -949,8 +943,7 @@ static inline uint32_t nrLDPC_init(t_nrLDPC_dec_params* p_decParams, t_nrLDPC_lu ...@@ -949,8 +943,7 @@ static inline uint32_t nrLDPC_init(t_nrLDPC_dec_params* p_decParams, t_nrLDPC_lu
p_lut->bnPosBnProcBuf[8] = (const uint8_t**) bnPosBnProcBuf_BG1_R89_CNG19; p_lut->bnPosBnProcBuf[8] = (const uint8_t**) bnPosBnProcBuf_BG1_R89_CNG19;
p_lut->llr2llrProcBufAddr = llr2llrProcBufAddr_BG1_R89; p_lut->llr2llrProcBufAddr = llr2llrProcBufAddr_BG1_R89;
p_lut->llr2llrProcBufNumBn = llr2llrProcBufNumBn_BG1_R89; p_lut->llr2llrProcBufBnPos = llr2llrProcBufBnPos_BG1_R89;
p_lut->llr2llrProcBufNumEl = &llr2llrProcBufNumEl_BG1_R89;
p_lut->numCnInCnGroups = lut_numCnInCnGroups_BG1_R89; p_lut->numCnInCnGroups = lut_numCnInCnGroups_BG1_R89;
p_lut->numBnInBnGroups = lut_numBnInBnGroups_BG1_R89; p_lut->numBnInBnGroups = lut_numBnInBnGroups_BG1_R89;
......
...@@ -951,31 +951,25 @@ static const uint8_t bnPosBnProcBuf_BG2_R23_CNG10[10][2] = {{0, 0},{0, 0},{1, 1} ...@@ -951,31 +951,25 @@ static const uint8_t bnPosBnProcBuf_BG2_R23_CNG10[10][2] = {{0, 0},{0, 0},{1, 1}
// LUT for llr2llrProcBuf // LUT for llr2llrProcBuf
// BG1 // BG1
// R13 // R13
static const uint16_t llr2llrProcBufAddr_BG1_R13[24] = {25728,25344,17664,22272,20352,16128,19200,23424,19584,18048,23808,20736,24192,22656,21120,18432,21504,24960,18816,19968,23040,24576,16512,17280}; static const uint16_t llr2llrProcBufAddr_BG1_R13[26] = {25728,25344,17664,22272,20352,16128,19200,23424,19200,17664,23424,20736,23424,22272,20736,17664,20736,20736,24960,17664,19200,22272,23424,16512,16896,16896};
static const uint8_t llr2llrProcBufNumBn_BG1_R13[24] = {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,2,1}; static const uint8_t llr2llrProcBufBnPos_BG1_R13[26] = {0,0,0,0,0,0,0,0,1,1,1,0,2,1,1,2,2,3,0,3,2,2,3,0,0,1};
static const uint8_t llr2llrProcBufNumEl_BG1_R13 = 24;
// R23 // R23
static const uint16_t llr2llrProcBufAddr_BG1_R23[19] = {13056,12672,5760,6912,3840,6144,7680,4224,9600,8448,4608,11136,8832,11520,6528,9216,11904,4992,3456}; static const uint16_t llr2llrProcBufAddr_BG1_R23[26] = {13056,12672,5760,6912,6912,3840,5760,6912,6912,3840,9600,9600,9600,9600,6912,3840,9600,6912,9600,5760,6912,9600,9600,3840,3840,3456};
static const uint8_t llr2llrProcBufNumBn_BG1_R23[19] = {1,1,1,2,1,1,2,1,4,1,1,1,1,1,1,1,2,2,1}; static const uint8_t llr2llrProcBufBnPos_BG1_R23[26] = {0,0,0,0,1,0,1,2,3,1,0,1,2,3,4,2,4,5,5,2,6,6,7,3,4,0};
static const uint8_t llr2llrProcBufNumEl_BG1_R23 = 19;
// R89 // R89
static const uint16_t llr2llrProcBufAddr_BG1_R89[4] = {9984,9600,1536,384}; static const uint16_t llr2llrProcBufAddr_BG1_R89[26] = {9984,9600,1536,1536,1536,1536,1536,1536,1536,1536,1536,1536,1536,1536,1536,1536,1536,1536,1536,1536,1536,1536,1536,384,384,384};
static const uint8_t llr2llrProcBufNumBn_BG1_R89[4] = {1,1,21,3}; static const uint8_t llr2llrProcBufBnPos_BG1_R89[26] = {0,0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,0,1,2};
static const uint8_t llr2llrProcBufNumEl_BG1_R89 = 4;
// BG2 // BG2
// R15 // R15
static const uint16_t llr2llrProcBufAddr_BG2_R15[13] = {19200,19584,17280,14592,18432,15744,18048,15360,16128,16512,18816,16896,17664}; static const uint16_t llr2llrProcBufAddr_BG2_R15[14] = {19200,19584,17280,14592,14592,18432,15744,18048,15360,16128,16512,18816,16512,17664};
static const uint8_t llr2llrProcBufNumBn_BG2_R15[13] = {1,1,1,2,1,1,1,1,1,1,1,1,1}; static const uint8_t llr2llrProcBufBnPos_BG2_R15[14] = {0,0,0,0,1,0,0,0,0,0,0,0,1,0};
static const uint8_t llr2llrProcBufNumEl_BG2_R15 = 13;
// R13 // R13
static const uint16_t llr2llrProcBufAddr_BG2_R13[10] = {11520,11904,6912,7296,8448,10752,9216,11136,8064,10368}; static const uint16_t llr2llrProcBufAddr_BG2_R13[14] = {11520,11904,6912,7296,7296,8448,8448,10752,8448,8448,8448,11136,8064,10368};
static const uint8_t llr2llrProcBufNumBn_BG2_R13[10] = {1,1,1,2,2,1,3,1,1,1}; static const uint8_t llr2llrProcBufBnPos_BG2_R13[14] = {0,0,0,0,1,0,1,0,2,3,4,0,0,0};
static const uint8_t llr2llrProcBufNumEl_BG2_R13 = 10;
// R23 // R23
static const uint16_t llr2llrProcBufAddr_BG2_R23[12] = {6144,5376,1152,2304,4224,3072,4608,3456,4992,3840,5760,1536}; static const uint16_t llr2llrProcBufAddr_BG2_R23[14] = {6144,5376,1152,2304,2304,4224,2304,4224,2304,4224,2304,5376,1152,1152};
static const uint8_t llr2llrProcBufNumBn_BG2_R23[12] = {1,1,1,2,1,1,1,1,1,1,1,2}; static const uint8_t llr2llrProcBufBnPos_BG2_R23[14] = {0,0,0,0,1,0,2,1,3,2,4,1,1,2};
static const uint8_t llr2llrProcBufNumEl_BG2_R23 = 12;
// LUT for llr2cnProcBuf // LUT for llr2cnProcBuf
// LUTs are not rate dependent, however the number of CN in a CNG varies with rate // LUTs are not rate dependent, however the number of CN in a CNG varies with rate
......
...@@ -84,20 +84,16 @@ static inline void nrLDPC_llr2llrProcBuf(t_nrLDPC_lut* p_lut, int8_t* llr, t_nrL ...@@ -84,20 +84,16 @@ static inline void nrLDPC_llr2llrProcBuf(t_nrLDPC_lut* p_lut, int8_t* llr, t_nrL
{ {
uint32_t i; uint32_t i;
const uint8_t numBn2CnG1 = p_lut->numBnInBnGroups[0]; const uint8_t numBn2CnG1 = p_lut->numBnInBnGroups[0];
uint32_t colG1 = NR_LDPC_START_COL_PARITY_BG1*Z; uint32_t startColParity = (BG ==1 ) ? (NR_LDPC_START_COL_PARITY_BG1) : (NR_LDPC_START_COL_PARITY_BG2);
uint32_t colG1 = startColParity*Z;
const uint16_t* lut_llr2llrProcBufAddr = p_lut->llr2llrProcBufAddr; const uint16_t* lut_llr2llrProcBufAddr = p_lut->llr2llrProcBufAddr;
const uint8_t* lut_llr2llrProcBufNumBn = p_lut->llr2llrProcBufNumBn; const uint8_t* lut_llr2llrProcBufBnPos = p_lut->llr2llrProcBufBnPos;
const uint8_t* lut_llr2llrProcBufNumEl = p_lut->llr2llrProcBufNumEl;
uint16_t numLlr = 0; uint32_t idxBn;
int8_t* llrProcBuf = p_procBuf->llrProcBuf; int8_t* llrProcBuf = p_procBuf->llrProcBuf;
if (BG == 2)
{
colG1 = NR_LDPC_START_COL_PARITY_BG2*Z;
}
// Copy LLRs connected to 1 CN // Copy LLRs connected to 1 CN
if (numBn2CnG1 > 0) if (numBn2CnG1 > 0)
{ {
...@@ -105,11 +101,11 @@ static inline void nrLDPC_llr2llrProcBuf(t_nrLDPC_lut* p_lut, int8_t* llr, t_nrL ...@@ -105,11 +101,11 @@ static inline void nrLDPC_llr2llrProcBuf(t_nrLDPC_lut* p_lut, int8_t* llr, t_nrL
} }
// First 2 columns might be set to zero directly if it's true they always belong to the groups with highest number of connected CNs... // First 2 columns might be set to zero directly if it's true they always belong to the groups with highest number of connected CNs...
for (i=0; i<(*lut_llr2llrProcBufNumEl); i++) for (i=0; i<startColParity; i++)
{ {
numLlr = lut_llr2llrProcBufNumBn[i]*Z; idxBn = lut_llr2llrProcBufAddr[i] + lut_llr2llrProcBufBnPos[i]*Z;
memcpy(&llrProcBuf[lut_llr2llrProcBufAddr[i]], llr, numLlr); memcpy(&llrProcBuf[idxBn], llr, Z);
llr+=numLlr; llr += Z;
} }
} }
...@@ -1141,20 +1137,16 @@ static inline void nrLDPC_llrRes2llrOut(t_nrLDPC_lut* p_lut, int8_t* llrOut, t_n ...@@ -1141,20 +1137,16 @@ static inline void nrLDPC_llrRes2llrOut(t_nrLDPC_lut* p_lut, int8_t* llrOut, t_n
{ {
uint32_t i; uint32_t i;
const uint8_t numBn2CnG1 = p_lut->numBnInBnGroups[0]; const uint8_t numBn2CnG1 = p_lut->numBnInBnGroups[0];
uint32_t colG1 = NR_LDPC_START_COL_PARITY_BG1*Z; uint32_t startColParity = (BG ==1 ) ? (NR_LDPC_START_COL_PARITY_BG1) : (NR_LDPC_START_COL_PARITY_BG2);
uint32_t colG1 = startColParity*Z;
const uint16_t* lut_llr2llrProcBufAddr = p_lut->llr2llrProcBufAddr; const uint16_t* lut_llr2llrProcBufAddr = p_lut->llr2llrProcBufAddr;
const uint8_t* lut_llr2llrProcBufNumBn = p_lut->llr2llrProcBufNumBn; const uint8_t* lut_llr2llrProcBufBnPos = p_lut->llr2llrProcBufBnPos;
const uint8_t* lut_llr2llrProcBufNumEl = p_lut->llr2llrProcBufNumEl;
uint16_t numLlr = 0;
int8_t* llrRes = p_procBuf->llrRes; int8_t* llrRes = p_procBuf->llrRes;
int8_t* p_llrOut = &llrOut[0]; int8_t* p_llrOut = &llrOut[0];
uint32_t idxBn;
if (BG == 2)
{
colG1 = NR_LDPC_START_COL_PARITY_BG2*Z;
}
// Copy LLRs connected to 1 CN // Copy LLRs connected to 1 CN
if (numBn2CnG1 > 0) if (numBn2CnG1 > 0)
...@@ -1162,13 +1154,14 @@ static inline void nrLDPC_llrRes2llrOut(t_nrLDPC_lut* p_lut, int8_t* llrOut, t_n ...@@ -1162,13 +1154,14 @@ static inline void nrLDPC_llrRes2llrOut(t_nrLDPC_lut* p_lut, int8_t* llrOut, t_n
memcpy(&llrOut[colG1], llrRes, numBn2CnG1*Z); memcpy(&llrOut[colG1], llrRes, numBn2CnG1*Z);
} }
// First 2 columns might be set to zero directly if it's true they always belong to the groups with highest number of connected CNs...
for (i=0; i<(*lut_llr2llrProcBufNumEl); i++) for (i=0; i<startColParity; i++)
{ {
numLlr = lut_llr2llrProcBufNumBn[i]*Z; idxBn = lut_llr2llrProcBufAddr[i] + lut_llr2llrProcBufBnPos[i]*Z;
memcpy(p_llrOut, &llrRes[lut_llr2llrProcBufAddr[i]], numLlr); memcpy(p_llrOut, &llrRes[idxBn], Z);
p_llrOut+=numLlr; p_llrOut += Z;
} }
} }
#endif #endif
...@@ -46,16 +46,11 @@ typedef struct nrLDPC_lut { ...@@ -46,16 +46,11 @@ typedef struct nrLDPC_lut {
const uint8_t* numBnInBnGroups; /**< Number of CNs in every BN group */ const uint8_t* numBnInBnGroups; /**< Number of CNs in every BN group */
const uint32_t* startAddrBnGroups; /**< Start addresses for BN groups in BN processing buffer */ const uint32_t* startAddrBnGroups; /**< Start addresses for BN groups in BN processing buffer */
const uint16_t* startAddrBnGroupsLlr; /**< Start addresses for BN groups in LLR processing buffer */ const uint16_t* startAddrBnGroupsLlr; /**< Start addresses for BN groups in LLR processing buffer */
const uint32_t* llr2CnProcBuf; /**< LUT for input LLRs to CN processing buffer */
const uint8_t* numEdgesPerBn; /**< LUT with number of edges per BN */
const uint32_t* cn2bnProcBuf; /**< LUT for transferring CN processing results to BN processing buffer */
const uint16_t* llr2llrProcBuf; /**< LUT for transferring input LLRs to LLR processing buffer */
const uint16_t** circShift[NR_LDPC_NUM_CN_GROUPS_BG1]; /**< LUT for circular shift values for all CN groups and Z's */ const uint16_t** circShift[NR_LDPC_NUM_CN_GROUPS_BG1]; /**< LUT for circular shift values for all CN groups and Z's */
const uint32_t** startAddrBnProcBuf[NR_LDPC_NUM_CN_GROUPS_BG1]; /**< LUT for circular shift values for all CN groups and Z's */ const uint32_t** startAddrBnProcBuf[NR_LDPC_NUM_CN_GROUPS_BG1]; /**< LUT for circular shift values for all CN groups and Z's */
const uint8_t** bnPosBnProcBuf[NR_LDPC_NUM_CN_GROUPS_BG1]; /**< LUT for circular shift values for all CN groups and Z's */ const uint8_t** bnPosBnProcBuf[NR_LDPC_NUM_CN_GROUPS_BG1]; /**< LUT for circular shift values for all CN groups and Z's */
const uint16_t* llr2llrProcBufAddr; /**< LUT for transferring input LLRs to LLR processing buffer */ const uint16_t* llr2llrProcBufAddr; /**< LUT for transferring input LLRs to LLR processing buffer */
const uint8_t* llr2llrProcBufNumBn; /**< LUT for transferring input LLRs to LLR processing buffer */ const uint8_t* llr2llrProcBufBnPos; /**< LUT for transferring input LLRs to LLR processing buffer */
const uint8_t* llr2llrProcBufNumEl; /**< LUT for transferring input LLRs to LLR processing buffer */
const uint8_t** posBnInCnProcBuf[NR_LDPC_NUM_CN_GROUPS_BG1]; /**< LUT for llr2cnProcBuf */ const uint8_t** posBnInCnProcBuf[NR_LDPC_NUM_CN_GROUPS_BG1]; /**< LUT for llr2cnProcBuf */
} t_nrLDPC_lut; } t_nrLDPC_lut;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment