Commit f6bb869c authored by Raymond Knopp's avatar Raymond Knopp

fixed AVX2 issue for cnProc code generator. Some cleanup in formatting and deleting unused files.

parent aabd9c6c
This diff is collapsed.
......@@ -41,15 +41,6 @@
\param Z Lifting size
\param cshift Circular shift
*/
//more faster memcpy by using "rep movsb", which on modern processors is highly optimized
void *memcpy1(void *dst, const void *src, size_t n)
{
void *ret = dst;
asm volatile("rep movsb" : "+D" (dst) : "c"(n), "S"(src) : "cc", "memory");
return ret;
}
static inline void *nrLDPC_inv_circ_memcpy(int8_t *str1, const int8_t *str2, uint16_t Z, uint16_t cshift)
{
......
......@@ -96,7 +96,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
// for (i=0; i<M; i++,iprime++)
// {
fprintf(fd," for (int i=0;i<M;i+=2) {\n");
fprintf(fd," for (int i=0;i<M;i++) {\n");
// Abs and sign of 32 CNs (first BN)
// ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
fprintf(fd," ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[0]>>5)+lut_idxCnProcG3[j][0]);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment