cdot_prod.c 4.02 KB
Newer Older
1 2 3 4 5
/*
 * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The OpenAirInterface Software Alliance licenses this file to You under
6
 * the OAI Public License, Version 1.1  (the "License"); you may not use this file
7 8 9 10 11 12 13 14 15 16 17 18 19 20
 * except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.openairinterface.org/?page_id=698
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *-------------------------------------------------------------------------------
 * For more information about the OpenAirInterface (OAI) Software Alliance:
 *      contact@openairinterface.org
 */
ghaddab's avatar
ghaddab committed
21

22
#include "tools_defs.h"
23
#include "PHY/sse_intrin.h"
24

25
// returns the complex dot product of x and y
26

laurent's avatar
laurent committed
27 28 29 30 31 32 33 34
/*! \brief Complex number dot_product
*/

c32_t dot_product(const c16_t *x,//! input vector
                  const c16_t *y,//! input vector
                  const uint32_t N,//! size of the vectors
                  const int output_shift //! normalization afer int16 multiplication
                  )
35
{
laurent's avatar
laurent committed
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
  const int16_t reflip[32] __attribute__((aligned(32))) = {1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1};
  const int8_t imshuffle[64] __attribute__((aligned(32))) = {2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13, 18, 19, 16, 17, 22, 23, 20, 21, 26, 27, 24, 25, 30, 31, 28, 29};
  const c16_t *end = x + N;
  __m256i cumul_re = {0}, cumul_im = {0};
  while (x < end) {
    const __m256i in1 = simde_mm256_loadu_si256((__m256i *)x);
    const __m256i in2 = simde_mm256_loadu_si256((__m256i *)y);
    const __m256i tmpRe = simde_mm256_madd_epi16(in1, in2);
    cumul_re = simde_mm256_add_epi32(cumul_re, simde_mm256_srai_epi32(tmpRe, output_shift));
    const __m256i tmp1 = simde_mm256_shuffle_epi8(in2, *(__m256i *)imshuffle);
    const __m256i tmp2 = simde_mm256_sign_epi16(tmp1, *(__m256i *)reflip);
    const __m256i tmpIm = simde_mm256_madd_epi16(in1, tmp2);
    cumul_im = simde_mm256_add_epi32(cumul_im, simde_mm256_srai_epi32(tmpIm, output_shift));
    x += 8;
    y += 8;
51 52
  }

laurent's avatar
laurent committed
53 54 55 56 57 58 59 60 61 62 63 64 65 66
  // this gives Re Re Im Im Re Re Im Im
  const __m256i cumulTmp = simde_mm256_hadd_epi32(cumul_re, cumul_im);
  const __m256i cumul = simde_mm256_hadd_epi32(cumulTmp, cumulTmp);

  c32_t ret;
  ret.r = simde_mm256_extract_epi32(cumul, 0) + simde_mm256_extract_epi32(cumul, 4);
  ret.i = simde_mm256_extract_epi32(cumul, 1) + simde_mm256_extract_epi32(cumul, 5);
  if (x!=end) {
    x-=8;
    y-=8;
    for ( ; x <end; x++,y++ ) {
      ret.r += ((x->r*y->r)>>output_shift) + ((x->i*y->i)>>output_shift);
      ret.i += ((x->r*y->i)>>output_shift) - ((x->i*y->r)>>output_shift);
    }
Raymond Knopp's avatar
Raymond Knopp committed
67
  }
laurent's avatar
laurent committed
68
  return ret;
Raymond Knopp's avatar
Raymond Knopp committed
69 70
}

71
#ifdef MAIN
laurent's avatar
laurent committed
72 73 74 75 76
//gcc -DMAIN openair1/PHY/TOOLS/cdot_prod.c -Iopenair1 -I. -Iopenair2/COMMON -march=native -lm
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <math.h>
77

78 79
void main(void)
{
laurent's avatar
laurent committed
80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101
  const int multiply_reduction=15;
  const int arraySize=16;
  const int signalAmplitude=3000;

  c32_t result={0};
  cd_t resDouble={0};
  c16_t x[arraySize] __attribute__((aligned(16)));
  c16_t y[arraySize] __attribute__((aligned(16)));
  int fd=open("/dev/urandom",0);
  read(fd,x,sizeof(x));
  read(fd,y,sizeof(y));
  close(fd);
  for (int i=0; i<arraySize; i++) {
    x[i].r%=signalAmplitude;
    x[i].i%=signalAmplitude;
    y[i].r%=signalAmplitude;
    y[i].i%=signalAmplitude;
    resDouble.r+=x[i].r*(double)y[i].r+x[i].i*(double)y[i].i;
    resDouble.i+=x[i].r*(double)y[i].i-x[i].i*(double)y[i].r;
  }
  resDouble.r/=pow(2,multiply_reduction);
  resDouble.i/=pow(2,multiply_reduction);
102 103 104

  result = dot_product(x,y,8*2,15);

laurent's avatar
laurent committed
105
  printf("result = %d (double: %f), %d (double %f)\n", result.r, resDouble.r,  result.i, resDouble.i);
106 107
}
#endif