Commit 4922eea5 authored by Guy De Souza's avatar Guy De Souza

idft/dft 3072 fix

parent 921b7283
dev 1 128-ues 256_QAM_demod 375-syrtem-sdr-platform 408-reworked 408-ue-main-threads 445-LDPC-implementation-on-GPU 459-pusch-based-ta-updates 464-ru_beamforming_in_gpu 464-ru_beamforming_in_gpu-CPUsubfunction 472-add-pusch-dmrs-modes 481-ldpc-decoder-on-gpu 512-dataplane-bug-in-l2nfapi_nos1 5g_fapi_scf FR2_NSA Fix_SA_SIB1 LTE_TRX_on_single_port NCTU_CS_ISIP NCTU_CS_ISIP_CPU NCTU_CS_ISIP_GPU NCTU_OpinConnect_LDPC NR-PHY-MAC-IF-multi-UE NRPRACH_highSpeed_saankhya NRUE_usedlschparallel NR_10MHz NR_2port_CSIRS NR_CSIRS_tomerge NR_CSI_reporting NR_DCI_01 NR_DLUL_PF NR_DLUL_PF_4UL NR_DLUL_PF_rebased NR_DL_MIMO NR_DL_sched_fixes NR_DL_scheduler NR_F1C_F1U_extensions NR_FAPI_beamindex_SSB_RO NR_FAPI_beamindex_SSB_RO_SEMPROJ NR_FDD_FIX NR_FR2_RA NR_FR2_RRC_SSB NR_FR2_initsync_fixes NR_MAC_CE_GlobalEdge NR_MAC_Multi_Rach_GlobalEdge NR_MAC_Multi_Rach_GlobalEdge-old NR_MAC_SSB NR_MAC_SSB_RO_GlobalEdge NR_MAC_SSB_RO_UE_IDCC NR_MAC_SSB_RO_merge NR_MAC_TCI_UCI_GlobalEdge NR_MCS_BLER NR_NGAP NR_PDCP_noS1 NR_PUCCH_MultiUE NR_RA_cleanup NR_RA_updates NR_RRCConfiguragion_FR2 NR_RRCConfiguration NR_RRCConfiguration_FR2 NR_RRCConfiguration_S1U NR_RRCConfiguration_merge_develop NR_RRCConfiguration_sync_source NR_RRCConfiguration_trx_thread NR_RRC_CP_bugfix NR_RRC_PDCP NR_RRC_PRACH_procedures NR_RRC_PRACH_procedures_todevelop NR_RRC_PUSCH NR_RRC_TA NR_RRC_X2AP_AMBR_Change_Global_edge NR_RRC_X2AP_RemoveHardcodings_GlobalEdge NR_RRC_config_simplified NR_RRC_harq NR_RRC_harq_b NR_RRC_harq_hacks NR_RRC_harq_newdcipdu NR_SA_F1AP_5GRECORDS NR_SA_F1AP_5GRECORDS-USIM NR_SA_F1AP_5GRECORDS-wf-0623 NR_SA_F1AP_5GRECORDS_lts NR_SA_F1AP_RFSIMULATOR NR_SA_F1AP_RFSIMULATOR2 NR_SA_F1AP_RFSIMULATOR2_SRB NR_SA_F1AP_RFSIMULATOR3 NR_SA_F1AP_RFSIMULATOR3_tmp NR_SA_F1AP_RFSIMULATOR3_wf NR_SA_F1AP_RFSIMULATOR_w5GCN NR_SA_F1AP_dev NR_SA_NGAP_RRC NR_SA_NGAP_RRC_wk42 NR_SA_itti_sim_wk48 NR_SA_itti_sim_wk48_hs NR_SA_itti_sim_wk48_hs1 NR_SA_w5GCN_new_gtpu NR_SCHED NR_SCHED_HARQ NR_SCHED_PDCCH_PUCCH_HARQ NR_SCHED_PDCCH_PUCCH_HARQ_rebased NR_SCHED_fixes NR_SRB_Config NR_TRX_on_single_port NR_TRX_on_single_port2 NR_UE_CONFIG_REQ_FIXES NR_UE_MAC_scheduler NR_UE_PUCCH_bugfixes NR_UE_RA_fixes NR_UE_SA NR_UE_UL_DCI_improvements NR_UE_dlsch_bugfix NR_UE_enable_parallelization NR_UE_rework_test NR_UE_reworking_UCI_procedures NR_UE_stability_fixes NR_UL_FAPI_programming NR_UL_SCFDMA_100MHz NR_UL_scheduler NR_UL_scheduler_rebased NR_UL_scheduling NR_Wireshark NR_beam_simulation NR_beamforming_test NR_cleanup_PUCCH_resources NR_gNB_SCF_Indication NR_gNB_initial_MIB_fix NR_ipaccess_testing NR_mac_uci_functions_rework NR_msg2_phytest NR_multiplexing_HARQ_CSI_PUCCH NR_phytest_bugfixes NR_reworking_UL_antennaports NR_scheduling_CSIRS NR_scheduling_request NR_scheduling_request2 NR_scheduling_request3 NR_test_S1U_RRC_PRACH_procedures NR_ue_dlsch_dmrs_cdm OpInConnect_ISIP PBCHNRTCFIX PUSCH_TA_update RA_CI_test RFquality Saankhya_NRPRACH_HighSpeed Test_SA_5GREC UE_DL_DCI_hotfix add-dmrs-test add-ru-docker-image addoptions_nr_USRPdevice avxllr bandwidth-testing bch-fixes-bitmap benetel_5g_prach_fix benetel_config_file_fix benetel_dpdk20 benetel_driver_uldl_pf_merge benetel_driver_update benetel_fixes benetel_phase_rotation benetel_phase_rotation_old bsr-fix bugfix-free-ra-process bugfix-minor-remove-wrong-log bugfix-nr-bands bugfix-nr-ldpc-post-processing bugfix-nr-ldpc-size-typo bugfix-nr-pdcp-sn-size bugfix-nr-rate-matching-assertion bugfix-nr-t-reordering bugfix-x2-SgNBAdditionRequest bugfix_gnb_rt_stats_html bupt-sa-merge cce_indexing_fix cce_indexing_fix2 ci-deploy-asterix ci-deploy-docker-compose ci-fix-module-ul-iperf ci-new-docker-pipeline ci-rd-july-improvements ci-reduce-nb-vms ci-test ci-ul-iperf-from-trf-container ci_benetel_longrun_limits ci_benetel_test ci_fix_iperf_for_module ci_hotfix_module_ue_ip_address ci_improve_module_ctl ci_nsa_benetel ci_nsa_fixes ci_nsa_pipes_improve ci_nsa_test_integration_2021_wk19 ci_nsa_traces ci_nsa_uplink ci_phytest ci_quectel_support ci_sa_rfsim_test ci_solve_ul_for_module ci_test_5GREC ci_test_nsa_fix_quectel_nic ci_test_nsa_on_develop ci_test_ra_fr2 ci_testinfra_as_code ci_vm_resource_fix clean-5G-scope-round2 cleanup_softmodem_main constant_power debug-UL-5GRECORDS debug_UL_signal debug_branch_init_sync detached-w16-test develop develop-CBRA-v3 develop-CCE develop-NR_SA_F1AP_5GRECORDS develop-NR_SA_F1AP_5GRECORDS-abs develop-NR_SA_F1AP_5GRECORDS-hs develop-NR_SA_F1AP_5GRECORDS-hs1 develop-NR_SA_F1AP_5GRECORDS-lts develop-NR_SA_F1AP_5GRECORDS-lts-wf develop-NR_SA_F1AP_5GRECORDS-v3 develop-NR_SA_F1AP_5GRECORDS_100M develop-NR_SA_F1AP_5GRECORDS_LDPC_FPGA develop-NR_SA_F1AP_5GRECORDS_lfq_0607 develop-SA-CBRA develop-SA-CBRA-CUDU develop-SA-CBRA-Msg5 develop-SA-CBRA-lts develop-SA-CBRA-ulsch-lts develop-SA-RA develop-SnT develop-aw2sori develop-ci develop-nr develop-nr-adding-2018-09-asn1 develop-nr-fr2 develop-nr-fr2-rework develop-nr_cppcheck develop-oriecpriupdates develop-sib1 develop-sib1-local develop-sib1-lts develop-sib1-update develop-sib1-update-test1 develop-sib1-update-ue develop-wf-du develop_inria_ci_deployment develop_inria_ci_deployment_gp develop_integration_2020_w15 develop_integration_2020_w19 develop_integration_w08 develop_stable dfts_alternatives disable_CSI_measrep dlsch-all-dlslots dlsch_encode_mthread dlsch_parallel docker-improvements-2021-april docker-no-cache-option docupdate_tools dongzhanyi-zte-develop dongzhanyi-zte-develop1 dongzhanyi-zte-develop2 dreibh/apt-auth-fix dreibh/device-load-fix dreibh/device-load-fix-develop-branch dual-connectivity edrx enhance-rfsim episys-merge episys/nsa_baseline episys/nsa_development extend_sharedlibusage extend_sharedlibusage2 fapi_for_dmrs_and_ptrs feat-mac-sock feature-4g-sched feature-nr-4g-nfapi-modifications feature-support-clang-format feature/make-s1-mme-port-configurable feature/make-s1-mme-port-configurable-with-astyle-fixes fedora-gen-kernel-fix fembms-enb-ue fft_bench_hotfix finalize-oaicn-integration firas fix-check fix-ci-tun fix-clock-source fix-compile fix-itti-segv fix-l2-sim fix-limeSDR-compile fix-nr-pdcp-timer fix-nr-rlc-range-nack fix-physim-deploy fix-quectel fix-realtime fix-retransmission-rbg fix-softmodem-restart fix-warnings fix-x2-without-gnb fix_NR_DLUL_PF fix_NR_DLUL_PF_benchmark fix_coreset_dmrs_idx fix_do_ra_data fix_nr_ulsim fix_pdsch_low_prb fix_rb_corruption fix_reestablishment fix_rfsim_mimo fix_rrc_x2_ticking fixes-CE-RLC-PDU-size fixes-mac-sched-nfapi fixes-mac-sched-tun fixes-tun fixgtpu flexran-apps flexran-improvements flexran-repair-mme-mgmt flexran-rtc-repo-is-public fr2-hw-test fujitsu_lte_contribution fujitsu_lte_contribution-128 gNB-nrUE-USRP generate_push_ptrs git-dashboard gnb-freerun-txru gnb-n300-fixes gnb-only-test gnb-realtime-hotfix gnb-realtime-quickfix gnb-threadpool hack-bch-no-sched-sf-0 hack-exit-gnb-when-no-enb-nsa harq-hotfix hotfix-minor-remove-nr-rlc-cppcheck-error hotfix-nr-rlc-tick hotfix-ocp-executable hotfix-ue-musim-compilation hotfix_usrp_lib improve_build_nr_lte_merge improve_nr_modulation improve_ue_stability integ-w13-test-rt-issue integration-develop-nr-2019w45 integration_2020_wk15 integration_2020_wk40 integration_2020_wk41 integration_2020_wk42_2 integration_2020_wk45 integration_2020_wk45_2 integration_2020_wk46 integration_2020_wk46_2 integration_2020_wk47 integration_2020_wk48 integration_2020_wk48_2 integration_2020_wk49 integration_2020_wk50 integration_2020_wk50_1 integration_2020_wk51 integration_2020_wk51_2 integration_2021_wk02 integration_2021_wk02_wMR988 integration_2021_wk04 integration_2021_wk05 integration_2021_wk06 integration_2021_wk06_MR978 integration_2021_wk06_b integration_2021_wk06_c integration_2021_wk08 integration_2021_wk08_2 integration_2021_wk08_MR963 integration_2021_wk09 integration_2021_wk09_b integration_2021_wk10 integration_2021_wk10_b integration_2021_wk11 integration_2021_wk12 integration_2021_wk12_b integration_2021_wk13_a integration_2021_wk13_b integration_2021_wk13_b_fix_tdas integration_2021_wk13_b_fixed integration_2021_wk13_c integration_2021_wk14_a integration_2021_wk15_a integration_2021_wk16 integration_2021_wk17_a integration_2021_wk17_b integration_2021_wk18_a integration_2021_wk18_b integration_2021_wk19 integration_2021_wk20_a integration_2021_wk22 integration_2021_wk23 integration_2021_wk27 integration_w5GC_CBRA_test inter-RRU-final inter-RRU-nr inter-RRU-oairu inter-rru-UE interoperability-test isip_nr itti-enhancement l2-fixes ldpc-dec-layering ldpc-decoder-codegen ldpc-decoder-codegen2 ldpc-decoder-improvements ldpc-offload ldpc_offload_t1 ldpc_short_codeword_fixes load_gnb lte-ulsch-bugfix lte_uplink_improvement mac-fixes-wk45_2 mbms-fix-develop-nr merging-2019-w51-to-develop-nr migrate-cpp-check-container migrate-vm-pipeline-to-bionic minor-fix-doc-basic-sim mosaic5g-oai-ran mosaic5g-oai-sim msg4_phy_0303_lfq multiple_ssb_sib1_bugfix nasmesh_kernel_5.8 new-gtpu new_rlc_2020 nfapi-bugfix nfapi_nr_arch_mod nfapi_nr_develop nfapi_nr_develop_new ngap-dlul ngap-support ngap-w48-merge2 ngap-wf ngap-wf-1120 ngap-wf-1120-srb ngap-wf-1120-srb-gtp ngap-wf-1120-srb-gtp-hs ngap-wf-1120-srb-gtp-hs1 ngap-wf-1120-srb-gtp-hs2 ngap-wf-1120-srb-gtp-yhz ngap-wf-1203-yunsdr ngap-wf-liuyu ngap_lfq_1120 ngap_merge noCore nr-bsr-fix nr-coreset-bug-fix nr-dl-mimo-2layer nr-dlsch-multi-thread nr-dlsch-thread nr-dmrs-fixes nr-dual-connectivity nr-interdigital-test nr-ip-uplink-noS1 nr-mac-pdu-wireshark nr-mac-remove-ue-list nr-multiple-ssb nr-pdcp nr-pdcp-benchmarking nr-pdcp-improvements nr-pdcp-nea2-security nr-pdcp-nia2-integrity nr-pdcp-small-bugfixes nr-pdcp-srb-integrity nr-pdsch-extraction-bugfix nr-physim-update nr-ra-fix nr-rlc-am-bugfix-w44 nr-rlc-bugfix-w44 nr-ssb-measurements nr-stats-print nr-timing-measurement nr-timing-measurement-merge nr-ue-buffer-status nr-ue-slot-based nr-uldci nrPBCHTCFix nrPbchTcFix nrUE nrUE-hs nrUE-upper-layer nr_beamforming nr_bsr nr_ci_dlsim nr_csi_newbranch nr_dci_procedures nr_demo_wsa2019 nr_dl_dmrs_type2 nr_dl_pf nr_dl_pf2 nr_dl_ul_ptrs nr_dlsch_parallel_measurements nr_dlsim_plot nr_fapi_for_push_tmp nr_fdd_if_fix nr_fix_easycppcheck nr_flexible_NRBDL nr_improve_build_procedures nr_improve_chanest nr_increase_tp nr_mib_vsa_test nr_pdcch_testing nr_pdcch_updates nr_pdsch_integration nr_polar_decoder_improvement nr_power_measurement_fixes nr_prach nr_prach_fr2 nr_pucch nr_pucch2 nr_segmentation_fixes nr_sim_fix nr_tdd_configuration nr_ue_msg3 nr_ue_pdcp_fix nr_ue_tti_cleanup nr_ul_pf nr_ul_scfdma nr_vcd nrue-multi-thread nrue_msg2_reception nsa-ue nsa_remove_band_hardcodings oai-sim oai-ubuntu-docker oai-ubuntu-docker-for-lmssdr oairu oairu-dockerfile-support oc-docker-october-improvements openxg/develop pdcp-benchmark pdsch-ch-est phy-asan-fixes physim-build-deploy physim-deploy-handle-error-cases polar8 prb_based_dl_channel_estimation ptrs_rrc_config pusch-mthread-scaling-fix pusch-retrans-fix-ue ra-dl-ul recursive-cmake reduce_memory_footprint remove-ci-workaround remove_nos1_hack_pdcp remove_x2_gnb_hardcoding repair-TA revert-f5c94279 revert_memcpy rh-ci-add-ue-parallelization rh_ci_add_runtime_stats rh_ci_add_uldlharq_stats rh_ci_fix_autoterminate rh_ci_fr1_update rh_ci_gsheet_rt_monitoring rh_ci_nsa2jenkins rh_ci_nsa_test_n310 rh_ci_oc rh_ci_phy_test_improve rh_ci_py rh_ci_ra_fr2 rh_ci_rfsim_ra rh_ci_test_benetel rh_ci_test_nsa rh_ci_test_nsa_wk16 rh_ci_test_nsa_wk17_b rh_ci_test_nsa_wk17b rh_ci_test_rfsim_sa rh_ci_ue_parallel rh_doc_update_3 rh_fr1_newjenkins rh_fr1_update rh_gnb_compile_fix rh_wk50_debug rlc-v2-bugfix-status-reporting rlc-v2-tick rlc_v2_coverity_fixes rohan_ulsim2RxFix rrc-enb-phy-testmode ru-parallel-beamforming runel runel-reverse-test s1-subnormal_rewrite s1_subnormal s1_subnormal-robert s1ap-bugfix-rab_setup sa-demo sa-demo-hs sa-merge-rrc-srb sa-msg4 sa-msg4-rrc sa-msg4-rrc-yihz sa-msg4-rrc-yihz-hs sa_rrc_yihz sanitize-address sanitize-v1 sanitize-v1-tmp sarma_pvnp_oai scs_60_iisc sim-channels small-bugfixes-w40 small-config-change small_nr_bugfixes smallcleanup softmodem_cleanup split73 t-gnb-tracer test-5GREC test-nsa-benetel test-panos test-x310-perf test_nsa_gtpu_fix test_rt-fix_phy-test testing_2symb_pdcch testing_with_external_txdata thread-pool tools_5Gadapt tp-ota-test trx_thread_param trx_write_thread ue-csi ue-dci-false-detection ue-fixes ue-fixes-ota ue-pdsch-pusch-parallel ue-race-fix ue-updates-runel-test ue_adjust_gain ue_beam_selection ue_dlsch-multi-threading ue_dlsch_decoding_ldpc_offload ue_nfapi_mch uhd_priority_set_cleanup ul-freq-iq-samps-to-file ul_dl_dci_same_slot ul_harq ulsch_decode_mthread ulsim_changes update-to-2019-march-june-release usrp_fix_adc_shift_and_pps_sync usrp_gpio_test usrp_stop_cleanly usrp_x400 wf-sa-rrc wf_testc wireshark-T-hack-ueid wireshark-log-scheduling-requests wk11-with-phytest x2-endc-processing x2_handle_sctp_shutdown xiangwab xiangwan xw2 yihongzheng_srb zzs 2021.wk14_a 2021.wk13_d 2021.wk13_c 2021.w27 2021.w26 2021.w25 2021.w24 2021.w23 2021.w22 2021.w20 2021.w19 2021.w18_b 2021.w18_a 2021.w17_b 2021.w16 2021.w15 2021.w14 2021.w13_a 2021.w12 2021.w11 2021.w10 2021.w09 2021.w08 2021.w06 2021.w05 2021.w04 2021.w02 2020.w51_2 2020.w51 2020.w50 2020.w49 2020.w48_2 2020.w48 2020.w47 2020.w46_2 2020.w46 2020.w45_2 2020.w45 2020.w44 2020.w42_2 2020.w42 2020.w41 2020.w39 2020.w38 2020.w37 2020.w36 2020.w34 2020.w33 2020.w31 2020.w30 2020.w29 2020.w28 2020.w26 2020.w25 2020.w24 2020.w23 2020.w22 2020.w19 2020.w17 2020.w16 2020.w15 2020.w11 2020.w09 2020.w06 2020.w05 2020.w04 2020.w03 osa-etsi-ws-ue osa-etsi-ws-try2 osa-etsi-ws-try1 osa-etsi-ws-gNB oai_nr_sync nr-ip-over-lte nr-ip-over-lte-v.1.5 nr-ip-over-lte-v.1.4 nr-ip-over-lte-v.1.3 nr-ip-over-lte-v.1.2 nr-ip-over-lte-v.1.1 nr-ip-over-lte-v.1.0 develop-nr-pdcch develop-nr-2020w03 develop-nr-2020w02 develop-nr-2019w51 develop-nr-2019w50 develop-nr-2019w48 develop-nr-2019w47 develop-nr-2019w45 develop-nr-2019w43 develop-nr-2019w42 develop-nr-2019w40 develop-nr-2019w28 develop-nr-2019w23 benetel_phase_rotation benetel_gnb_rel_2.0 benetel_gnb_rel_1.0 benetel_enb_rel_2.0 benetel_enb_rel_1.0
No related merge requests found
...@@ -5526,11 +5526,13 @@ void dft1536(int16_t *input, int16_t *output, int scale) ...@@ -5526,11 +5526,13 @@ void dft1536(int16_t *input, int16_t *output, int scale)
#include "twiddle3072.h" #include "twiddle3072.h"
// 1024 x 3 // 1024 x 3
void idft3072(int16_t *input, int16_t *output) void idft3072(int16_t *input, int16_t *output, int scale)
{ {
int i,i2,j; int i,i2,j;
uint32_t tmp[3][1024] __attribute__((aligned(32))); uint32_t tmp[3][1024] __attribute__((aligned(32)));
uint32_t tmpo[3][1024] __attribute__((aligned(32))); uint32_t tmpo[3][1024] __attribute__((aligned(32)));
simd_q15_t *y128p=(simd_q15_t*)output;
simd_q15_t ONE_OVER_SQRT3_Q15_128 = set1_int16(ONE_OVER_SQRT3_Q15);
for (i=0,j=0; i<1024; i++) { for (i=0,j=0; i<1024; i++) {
tmp[0][i] = ((uint32_t *)input)[j++]; tmp[0][i] = ((uint32_t *)input)[j++];
...@@ -5561,15 +5563,39 @@ void idft3072(int16_t *input, int16_t *output) ...@@ -5561,15 +5563,39 @@ void idft3072(int16_t *input, int16_t *output)
(simd_q15_t*)(twa3072+i),(simd_q15_t*)(twb3072+i)); (simd_q15_t*)(twa3072+i),(simd_q15_t*)(twb3072+i));
} }
if (scale==1) {
for (i=0; i<48; i++) {
y128p[0] = mulhi_int16(y128p[0],ONE_OVER_SQRT3_Q15_128);
y128p[1] = mulhi_int16(y128p[1],ONE_OVER_SQRT3_Q15_128);
y128p[2] = mulhi_int16(y128p[2],ONE_OVER_SQRT3_Q15_128);
y128p[3] = mulhi_int16(y128p[3],ONE_OVER_SQRT3_Q15_128);
y128p[4] = mulhi_int16(y128p[4],ONE_OVER_SQRT3_Q15_128);
y128p[5] = mulhi_int16(y128p[5],ONE_OVER_SQRT3_Q15_128);
y128p[6] = mulhi_int16(y128p[6],ONE_OVER_SQRT3_Q15_128);
y128p[7] = mulhi_int16(y128p[7],ONE_OVER_SQRT3_Q15_128);
y128p[8] = mulhi_int16(y128p[8],ONE_OVER_SQRT3_Q15_128);
y128p[9] = mulhi_int16(y128p[9],ONE_OVER_SQRT3_Q15_128);
y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT3_Q15_128);
y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT3_Q15_128);
y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT3_Q15_128);
y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT3_Q15_128);
y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT3_Q15_128);
y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT3_Q15_128);
y128p+=16;
}
}
_mm_empty(); _mm_empty();
_m_empty(); _m_empty();
} }
void dft3072(int16_t *input, int16_t *output) void dft3072(int16_t *input, int16_t *output, int scale)
{ {
int i,i2,j; int i,i2,j;
uint32_t tmp[3][1024] __attribute__((aligned(32))); uint32_t tmp[3][1024] __attribute__((aligned(32)));
uint32_t tmpo[3][1024] __attribute__((aligned(32))); uint32_t tmpo[3][1024] __attribute__((aligned(32)));
simd_q15_t *y128p=(simd_q15_t*)output;
simd_q15_t ONE_OVER_SQRT3_Q15_128 = set1_int16(ONE_OVER_SQRT3_Q15);
for (i=0,j=0; i<1024; i++) { for (i=0,j=0; i<1024; i++) {
tmp[0][i] = ((uint32_t *)input)[j++]; tmp[0][i] = ((uint32_t *)input)[j++];
...@@ -5598,17 +5624,41 @@ void dft3072(int16_t *input, int16_t *output) ...@@ -5598,17 +5624,41 @@ void dft3072(int16_t *input, int16_t *output)
(simd_q15_t*)(twa3072+i),(simd_q15_t*)(twb3072+i)); (simd_q15_t*)(twa3072+i),(simd_q15_t*)(twb3072+i));
} }
if (scale==1) {
for (i=0; i<48; i++) {
y128p[0] = mulhi_int16(y128p[0],ONE_OVER_SQRT3_Q15_128);
y128p[1] = mulhi_int16(y128p[1],ONE_OVER_SQRT3_Q15_128);
y128p[2] = mulhi_int16(y128p[2],ONE_OVER_SQRT3_Q15_128);
y128p[3] = mulhi_int16(y128p[3],ONE_OVER_SQRT3_Q15_128);
y128p[4] = mulhi_int16(y128p[4],ONE_OVER_SQRT3_Q15_128);
y128p[5] = mulhi_int16(y128p[5],ONE_OVER_SQRT3_Q15_128);
y128p[6] = mulhi_int16(y128p[6],ONE_OVER_SQRT3_Q15_128);
y128p[7] = mulhi_int16(y128p[7],ONE_OVER_SQRT3_Q15_128);
y128p[8] = mulhi_int16(y128p[8],ONE_OVER_SQRT3_Q15_128);
y128p[9] = mulhi_int16(y128p[9],ONE_OVER_SQRT3_Q15_128);
y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT3_Q15_128);
y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT3_Q15_128);
y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT3_Q15_128);
y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT3_Q15_128);
y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT3_Q15_128);
y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT3_Q15_128);
y128p+=16;
}
}
_mm_empty(); _mm_empty();
_m_empty(); _m_empty();
} }
#include "twiddle6144.h" #include "twiddle6144.h"
void idft6144(int16_t *input, int16_t *output) void idft6144(int16_t *input, int16_t *output, int scale)
{ {
int i,i2,j; int i,i2,j;
uint32_t tmp[3][2048] __attribute__((aligned(32))); uint32_t tmp[3][2048] __attribute__((aligned(32)));
uint32_t tmpo[3][2048] __attribute__((aligned(32))); uint32_t tmpo[3][2048] __attribute__((aligned(32)));
simd_q15_t *y128p=(simd_q15_t*)output;
simd_q15_t ONE_OVER_SQRT3_Q15_128 = set1_int16(ONE_OVER_SQRT3_Q15);
for (i=0,j=0; i<2048; i++) { for (i=0,j=0; i<2048; i++) {
tmp[0][i] = ((uint32_t *)input)[j++]; tmp[0][i] = ((uint32_t *)input)[j++];
...@@ -5638,6 +5688,28 @@ void idft6144(int16_t *input, int16_t *output) ...@@ -5638,6 +5688,28 @@ void idft6144(int16_t *input, int16_t *output)
(simd_q15_t*)(twa6144+i),(simd_q15_t*)(twb6144+i)); (simd_q15_t*)(twa6144+i),(simd_q15_t*)(twb6144+i));
} }
if (scale==1) {
for (i=0; i<96; i++) {
y128p[0] = mulhi_int16(y128p[0],ONE_OVER_SQRT3_Q15_128);
y128p[1] = mulhi_int16(y128p[1],ONE_OVER_SQRT3_Q15_128);
y128p[2] = mulhi_int16(y128p[2],ONE_OVER_SQRT3_Q15_128);
y128p[3] = mulhi_int16(y128p[3],ONE_OVER_SQRT3_Q15_128);
y128p[4] = mulhi_int16(y128p[4],ONE_OVER_SQRT3_Q15_128);
y128p[5] = mulhi_int16(y128p[5],ONE_OVER_SQRT3_Q15_128);
y128p[6] = mulhi_int16(y128p[6],ONE_OVER_SQRT3_Q15_128);
y128p[7] = mulhi_int16(y128p[7],ONE_OVER_SQRT3_Q15_128);
y128p[8] = mulhi_int16(y128p[8],ONE_OVER_SQRT3_Q15_128);
y128p[9] = mulhi_int16(y128p[9],ONE_OVER_SQRT3_Q15_128);
y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT3_Q15_128);
y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT3_Q15_128);
y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT3_Q15_128);
y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT3_Q15_128);
y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT3_Q15_128);
y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT3_Q15_128);
y128p+=16;
}
}
// write_output("out.m","out",output,6144,1,1); // write_output("out.m","out",output,6144,1,1);
_mm_empty(); _mm_empty();
_m_empty(); _m_empty();
...@@ -5645,11 +5717,13 @@ void idft6144(int16_t *input, int16_t *output) ...@@ -5645,11 +5717,13 @@ void idft6144(int16_t *input, int16_t *output)
} }
void dft6144(int16_t *input, int16_t *output) void dft6144(int16_t *input, int16_t *output, int scale)
{ {
int i,i2,j; int i,i2,j;
uint32_t tmp[3][2048] __attribute__((aligned(32))); uint32_t tmp[3][2048] __attribute__((aligned(32)));
uint32_t tmpo[3][2048] __attribute__((aligned(32))); uint32_t tmpo[3][2048] __attribute__((aligned(32)));
simd_q15_t *y128p=(simd_q15_t*)output;
simd_q15_t ONE_OVER_SQRT3_Q15_128 = set1_int16(ONE_OVER_SQRT3_Q15);
for (i=0,j=0; i<2048; i++) { for (i=0,j=0; i<2048; i++) {
tmp[0][i] = ((uint32_t *)input)[j++]; tmp[0][i] = ((uint32_t *)input)[j++];
...@@ -5677,6 +5751,28 @@ void dft6144(int16_t *input, int16_t *output) ...@@ -5677,6 +5751,28 @@ void dft6144(int16_t *input, int16_t *output)
(simd_q15_t*)(twa6144+i),(simd_q15_t*)(twb6144+i)); (simd_q15_t*)(twa6144+i),(simd_q15_t*)(twb6144+i));
} }
if (scale==1) {
for (i=0; i<96; i++) {
y128p[0] = mulhi_int16(y128p[0],ONE_OVER_SQRT3_Q15_128);
y128p[1] = mulhi_int16(y128p[1],ONE_OVER_SQRT3_Q15_128);
y128p[2] = mulhi_int16(y128p[2],ONE_OVER_SQRT3_Q15_128);
y128p[3] = mulhi_int16(y128p[3],ONE_OVER_SQRT3_Q15_128);
y128p[4] = mulhi_int16(y128p[4],ONE_OVER_SQRT3_Q15_128);
y128p[5] = mulhi_int16(y128p[5],ONE_OVER_SQRT3_Q15_128);
y128p[6] = mulhi_int16(y128p[6],ONE_OVER_SQRT3_Q15_128);
y128p[7] = mulhi_int16(y128p[7],ONE_OVER_SQRT3_Q15_128);
y128p[8] = mulhi_int16(y128p[8],ONE_OVER_SQRT3_Q15_128);
y128p[9] = mulhi_int16(y128p[9],ONE_OVER_SQRT3_Q15_128);
y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT3_Q15_128);
y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT3_Q15_128);
y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT3_Q15_128);
y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT3_Q15_128);
y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT3_Q15_128);
y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT3_Q15_128);
y128p+=16;
}
}
_mm_empty(); _mm_empty();
_m_empty(); _m_empty();
...@@ -5769,9 +5865,9 @@ void dft18432(int16_t *input, int16_t *output) { ...@@ -5769,9 +5865,9 @@ void dft18432(int16_t *input, int16_t *output) {
tmp[2][i] = ((uint32_t *)input)[j++]; tmp[2][i] = ((uint32_t *)input)[j++];
} }
dft6144((int16_t*)(tmp[0]),(int16_t*)(tmpo[0])); dft6144((int16_t*)(tmp[0]),(int16_t*)(tmpo[0]),1);
dft6144((int16_t*)(tmp[1]),(int16_t*)(tmpo[1])); dft6144((int16_t*)(tmp[1]),(int16_t*)(tmpo[1]),1);
dft6144((int16_t*)(tmp[2]),(int16_t*)(tmpo[2])); dft6144((int16_t*)(tmp[2]),(int16_t*)(tmpo[2]),1);
for (i=0,i2=0; i<12288; i+=8,i2+=4) { for (i=0,i2=0; i<12288; i+=8,i2+=4) {
bfly3((simd_q15_t*)(&tmpo[0][i2]),(simd_q15_t*)(&tmpo[1][i2]),(simd_q15_t*)(&tmpo[2][i2]), bfly3((simd_q15_t*)(&tmpo[0][i2]),(simd_q15_t*)(&tmpo[1][i2]),(simd_q15_t*)(&tmpo[2][i2]),
...@@ -5795,9 +5891,9 @@ void idft18432(int16_t *input, int16_t *output) { ...@@ -5795,9 +5891,9 @@ void idft18432(int16_t *input, int16_t *output) {
tmp[2][i] = ((uint32_t *)input)[j++]; tmp[2][i] = ((uint32_t *)input)[j++];
} }
idft6144((int16_t*)(tmp[0]),(int16_t*)(tmpo[0])); idft6144((int16_t*)(tmp[0]),(int16_t*)(tmpo[0]),1);
idft6144((int16_t*)(tmp[1]),(int16_t*)(tmpo[1])); idft6144((int16_t*)(tmp[1]),(int16_t*)(tmpo[1]),1);
idft6144((int16_t*)(tmp[2]),(int16_t*)(tmpo[2])); idft6144((int16_t*)(tmp[2]),(int16_t*)(tmpo[2]),1);
for (i=0,i2=0; i<12288; i+=8,i2+=4) { for (i=0,i2=0; i<12288; i+=8,i2+=4) {
ibfly3((simd_q15_t*)(&tmpo[0][i2]),(simd_q15_t*)(&tmpo[1][i2]),(simd_q15_t*)(&tmpo[2][i2]), ibfly3((simd_q15_t*)(&tmpo[0][i2]),(simd_q15_t*)(&tmpo[1][i2]),(simd_q15_t*)(&tmpo[2][i2]),
...@@ -19286,7 +19382,7 @@ int main(int argc, char**argv) ...@@ -19286,7 +19382,7 @@ int main(int argc, char**argv)
for (i=0; i<10000; i++) { for (i=0; i<10000; i++) {
start_meas(&ts); start_meas(&ts);
idft3072((int16_t *)x,(int16_t *)y); idft3072((int16_t *)x,(int16_t *)y,1);
stop_meas(&ts); stop_meas(&ts);
} }
...@@ -19339,7 +19435,7 @@ int main(int argc, char**argv) ...@@ -19339,7 +19435,7 @@ int main(int argc, char**argv)
for (i=0; i<10000; i++) { for (i=0; i<10000; i++) {
start_meas(&ts); start_meas(&ts);
idft6144((int16_t *)x,(int16_t *)y); idft6144((int16_t *)x,(int16_t *)y,1);
stop_meas(&ts); stop_meas(&ts);
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment