constsimde__m256iimags=simde_mm256_slli_epi32(simde_mm256_madd_epi16(x,w_s),1);// (int32_t) .i = (x.r * w.i + x.i * w.r) << 1, since higher 16 bit of each 32 bit is taken by blend_epi16
constsimde__m256iimags=simde_mm256_slli_epi32(simde_mm256_madd_epi16(x,w_s),1);// (int32_t) .i = (x.r * w.i + x.i * w.r) << 1, since higher 16 bit of each 32 bit is taken by blend_epi16