From b29fc0c84788962a3438eded15e1c0c9552c708a Mon Sep 17 00:00:00 2001 From: Kewen Lin Date: Wed, 3 Jun 2026 14:45:56 +0000 Subject: [PATCH] Backport HYGON C86-4G series processors support from GCC upstream i386: Support HYGON c86-4g series processors (https://gcc.gnu.org/g:2a64a63d982) i386: Adjust some c86-4g*.md modeling to reduce build time (https://gcc.gnu.org/g:c776dcd5f86) i386: Refine c86-4g fdiv scheduling model (https://gcc.gnu.org/g:dd682ea0414) --- ...pport-HYGON-c86-4g-series-processors.patch | 6778 +++++++++++++++++ ...-c86-4g-.md-modeling-to-reduce-build.patch | 588 ++ ...-Refine-c86-4g-fdiv-scheduling-model.patch | 1410 ++++ gcc.spec | 13 +- 4 files changed, 8788 insertions(+), 1 deletion(-) create mode 100644 HYGON-0001-i386-Support-HYGON-c86-4g-series-processors.patch create mode 100644 HYGON-0002-i386-Adjust-some-c86-4g-.md-modeling-to-reduce-build.patch create mode 100644 HYGON-0003-i386-Refine-c86-4g-fdiv-scheduling-model.patch diff --git a/HYGON-0001-i386-Support-HYGON-c86-4g-series-processors.patch b/HYGON-0001-i386-Support-HYGON-c86-4g-series-processors.patch new file mode 100644 index 0000000..dd54109 --- /dev/null +++ b/HYGON-0001-i386-Support-HYGON-c86-4g-series-processors.patch @@ -0,0 +1,6778 @@ +From e6d78e1c374add4a2704cfb8dc08513fd260935e Mon Sep 17 00:00:00 2001 +From: Xin Liu +Date: Wed, 27 May 2026 20:50:03 +0000 +Subject: [PATCH 1/3] i386: Support HYGON c86-4g series processors + +Upstream reference: https://gcc.gnu.org/g:2a64a63d982 + +This patch enables new x86 CPU vendor HYGON ID detection +and adds c86-4g series c86-4g-m{4,6,7} processor supports. +Without such support, if users use -march=native option on +HYGON machines, they can get some old arch like core2, it +would be suboptimal. It also enables -m{arch,tune}=c86-4g +-m{4,6,7} supports. Based on the hardware characteristics, +appropriate cost models and tuning parameters are provided. + +New machine description files are introduced: c86-4g.md is +used to describe the pipeline for c86-4g-m4 and c86-4g-m6, +while c86-4g-m7.md describes the pipeline for c86-4g-m7. +To better model some pipeline information, it introduces +new attrs c86_attr and c86_decode by following existing +practice. + +Bootstrapped and regtested on hygon c86-4g-m4 and c86-4g-m7 +machine, as well as a cfarm x86-64 machine. + +Co-authored-by: Zhaoling Bao +Signed-off-by: Xin Liu +Signed-off-by: Zhaoling Bao + +gcc/ChangeLog: + + * common/config/i386/cpuinfo.h (get_hygon_cpu): Detect the specific + type of HYGON CPU and return HYGON CPU name. + (cpu_indicator_init): Handle HYGON CPU. + * common/config/i386/i386-common.cc (processor_names): Add HYGON + C86-4G processors c86-4g-m{4,6,7}. + (processor_alias_table): Add hygon, hygonfam18h and c86-4g-m{4,6,7} + entries. + (ARRAY_SIZE): Update as new entries added. + * common/config/i386/i386-cpuinfo.h (enum processor_vendor): Add + VENDOR_HYGON. + (enum processor_types): Add HYGONFAM18H. + (enum processor_subtypes): Add HYGONFAM18H_C86_4G_M{4,6,7}. + * config.gcc: Add support for c86_4g_m{4,6,7}. + * config/i386/cpuid.h (signature_HYGON_ebx): Add signature for HYGON. + (signature_HYGON_ecx): Ditto. + (signature_HYGON_edx): Ditto. + * config/i386/driver-i386.cc (host_detect_local_cpu): Support HYGON + c86-4g-m4{4,6,7} processors. + * config/i386/i386-c.cc (ix86_target_macros_internal): Ditto. + * config/i386/i386-options.cc (m_C86_4G_M4): New definition. + (m_C86_4G_M6): Ditto. + (m_C86_4G_M7): Ditto. + (m_C86_4G): Ditto. + (processor_cost_table): Add cost entries for c86-4g-m4{4,6,7}. + * config/i386/i386.cc (ix86_reassociation_width): Add handlings for + PROCESSOR_C86_4G_M{4,6,7}. + * config/i386/i386.h (enum processor_type): Define + PROCESSOR_C86_4G_M{4,6,7}. + (PTA_C86_4G_M4): New define. + (PTA_C86_4G_M6): Ditto. + (PTA_C86_4G_M7): Ditto. + * config/i386/x86-tune-costs.h (c86_4g_m4_memcpy): New stringop_algs. + (c86_4g_m4_cost): New processor_costs. + (c86_4g_m6_cost): Ditto. + (c86_4g_m7_cost): Ditto. + * config/i386/x86-tune-sched.cc (ix86_issue_rate): Handle + PROCESSOR_C86_4G_M{4,6,7}. + (ix86_adjust_cost): Ditto. + * config/i386/x86-tune.def (X86_TUNE_SCHEDULE): Handle m_C86_4G. + (X86_TUNE_PARTIAL_REG_DEPENDENCY): Ditto. + (X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY): Ditto. + (X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY): Ditto. + (X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY): Ditto. + (X86_TUNE_MEMORY_MISMATCH_STALL): Ditto. + (X86_TUNE_FUSE_CMP_AND_BRANCH_32): Ditto. + (X86_TUNE_FUSE_CMP_AND_BRANCH_64): Ditto. + (X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS): Ditto. + (X86_TUNE_USE_LEAVE): Ditto. + (X86_TUNE_PUSH_MEMORY): Ditto. + (X86_TUNE_INTEGER_DFMODE_MOVES): Ditto. + (X86_TUNE_MISALIGNED_MOVE_STRING_PRO_EPILOGUES): Ditto. + (X86_TUNE_USE_SAHF): Ditto. + (X86_TUNE_USE_BT): Ditto. + (X86_TUNE_AVOID_MFENCE): Ditto. + (X86_TUNE_USE_FFREEP): Ditto. + (X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL): Ditto. + (X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL): Ditto. + (X86_TUNE_SSE_TYPELESS_STORES): Ditto. + (X86_TUNE_SSE_LOAD0_BY_PXOR): Ditto. + (X86_TUNE_USE_GATHER_2PARTS): Ditto. + (X86_TUNE_USE_GATHER_4PARTS): Ditto. + (X86_TUNE_USE_GATHER_8PARTS): Ditto. + (X86_TUNE_AVOID_128FMA_CHAINS): Ditto. + (X86_TUNE_AVOID_256FMA_CHAINS): Ditto. + (X86_TUNE_USE_RCR): Ditto. + (X86_TUNE_AVX256_MOVE_BY_PIECES): Handle m_C86_4G_M{4,6}. + (X86_TUNE_USE_SCATTER_2PARTS): Handle m_C86_4G_M7. + (X86_TUNE_USE_SCATTER_4PARTS): Ditto. + (X86_TUNE_USE_SCATTER_8PARTS): Ditto. + (X86_TUNE_SSE_REDUCTION_PREFER_PSHUF): Ditto. + (X86_TUNE_AVX512_SPLIT_REGS): Ditto. + (X86_TUNE_AVX512_MOVE_BY_PIECES): Ditto. + (X86_TUNE_AVX512_MASKED_EPILOGUES): Ditto. + * doc/extend.texi: Document about hygonfam18h and c86-4g-m{4,6,7}. + * doc/invoke.texi: Document about c86-4g-m{4,6,7}. + * config/i386/c86-4g-m7.md: New file for c86-4g-m7 scheduling model + information. + * config/i386/c86-4g.md: New file for c86-4g-m{4,6} scheduling model + information. + * config/i386/i386.md (cpu attr): Add c86_4g_m{4,6,7}. + (c86-4g.md): New include. + (c86-4g-m7.md): Ditto. + (*cmpixf_i387): Set attr c86_decode. + (*cmpi): Ditto. + (swap): Ditto. + (*swap): Ditto. + (extendhisi2): Ditto. + (floathi2): Ditto. + (floatxf2): Ditto. + (*float2): Ditto. + (*floatdi2_i387): Ditto. + (*anddi_1_bt): Ditto. + (*iordi_1_bts): Ditto. + (*xordi_1_btc): Ditto. + (*): Ditto. + (*btr): Ditto. + (*btsq_imm): Ditto. + (*btrq_imm): Ditto. + (*btcq_imm): Ditto. + (*tzcnt_1): Ditto. + (*tzcnt_1_falsedep): Ditto. + (*bsf_1): Ditto. + (*ctz2_falsedep): Ditto. + (*ctzsi2_zext): Ditto. + (*ctzsi2_zext_falsedep): Ditto. + (bsr_rex64): Ditto. + (bsr_rex64_1): Ditto. + (bsr_rex64_1_zext): Ditto. + (bsr): Ditto. + (bsr_1): Ditto. + (bsr_zext_1): Ditto. + (*bswaphi2_movbe): Ditto. + (*bswaphi2): Ditto. + (bswaphisi2_lowpart): Ditto. + (fpremxf4_i387): Ditto. + (fprem1xf4_i387): Ditto. + (xf2): Ditto. + (sincosxf3): Ditto. + (fptanxf4_i387): Ditto. + (atan2xf3): Ditto. + (fyl2xxf3_i387): Ditto. + (fyl2xp1xf3_i387): Ditto. + (fxtractxf3_i387): Ditto. + (*f2xm1xf2_i387): Ditto. + (fscalexf4_i387): Ditto. + (rintxf2): Ditto. + (*movxi_internal_avx512f): Set attr c86_attr. + (*movoi_internal_avx): Ditto. + (*movti_internal): Ditto. + (*movdi_internal): Ditto. + (*movsi_internal): Ditto. + (*movhi_internal): Ditto. + (*movtf_internal): Ditto. + (*movdf_internal): Ditto. + (*movsf_internal): Ditto. + (*zero_extendsidi2): Ditto. + (sqrtxf2): Ditto. + (3): Ditto. + (*ieee_s3): Ditto. + * config/i386/mmx.md (*mmx_maskmovq): Set attr c86_decode. + (*mmx_maskmovq): Ditto. + (sse_movntq): Set attr c86_attr. + (*mmx_blendps): Ditto. + (mmx_blendvps): Ditto. + (*mmx_pmaddwd): Ditto. + (mmx_pblendvb_v8qi): Ditto. + (mmx_pblendvb_): Ditto. + (sse4_1_v4qiv4hi2): Ditto. + (sse4_1_v2hiv2si2): Ditto. + (sse4_1_v2qiv2si2): Ditto. + (sse4_1_v2qiv2hi2): Ditto. + (*mmx_pinsrd): Ditto. + (*mmx_pinsrw): Ditto. + (*mmx_pinsrb): Ditto. + (*mmx_pextrw): Ditto. + (*mmx_pextrw): Ditto. + (*mmx_pextrw_zext): Ditto. + (*mmx_pextrb): Ditto. + (*mmx_pextrb_zext): Ditto. + (*mmx_pblendw64): Ditto. + (*mmx_pblendw32): Ditto. + (*vec_extractv2si_1): Ditto. + (*vec_extractv2si_1_zext): Ditto. + (*pinsrw): Ditto. + (*pinsrb): Ditto. + (*pextrw): Ditto. + (*pextrw): Ditto. + (*pextrw_zext): Ditto. + (*pextrb): Ditto. + (*pextrb_zext): Ditto. + (*mmx_psadbw): Ditto. + * config/i386/sse.md (ktest): Set attr c86_decode. + (*kortest): Ditto. + (sse_cvtsi2ss): Ditto. + (sse2_cvtsi2sd): Ditto. + (sse2_maskmovdqu): Ditto. + (*_dp): Ditto. + (*_mpsadbw): Ditto. + (pclmulqdq): Ditto. + (conflict): Ditto. + (_blendm): Set attr c86_attr. + (sse2_movnti): Ditto. + (_movnt): Ditto. + (_movnt): Ditto. + (_rcp2): Ditto. + (sse_vmrcpv4sf2): Ditto. + (rcp14): Ditto. + (srcp14): Ditto. + (srcp14_mask): Ditto. + (_sqrt2): Ditto. + (_vmsqrt2): Ditto. + (*_vmsqrt2): Ditto. + (rsqrt14): Ditto. + (rsqrt14): Ditto. + (rsqrt14__mask"): Ditto. + (*3): Ditto. + (ieee_3): Ditto. + (*_vm3): + Ditto. + (_ieee_vm3 + ): Ditto. + (*ieee_3): Ditto. + (avx_hv4df3): Ditto. + (*sse3_haddv2df3): Ditto. + (sse3_hsubv2df3): Ditto. + (*sse3_haddv2df3_low): Ditto. + (*sse3_hsubv2df3_low): Ditto. + (avx_hv8sf3): Ditto. + (sse3_hv4sf3): Ditto. + (*reducep): Ditto. + (reduces): Ditto. + (*_eq3_1): Ditto. + (_andnot3): Ditto. + (*3): Ditto. + (*andnot3): Ditto. + (3): Ditto. + (*tf3): Ditto. + (vec_set_0): Ditto. + (@vec_set_0): Ditto. + (*sse4_1_extractps): Ditto. + (vec_extract): Ditto. + (_align): Ditto. + (avx512bw_pmaddwd512): Ditto. + (*avx2_pmaddw): Ditto. + (*sse2_pmaddwd): Ditto. + (*avx2_3): Ditto. + (*avx512f_3): Ditto. + (*avx512bw_3): Ditto. + (*sse4_1_3): Ditto. + (*v8hi3): Ditto. + (*v16qi3): Ditto. + (*andnot3_mask): Ditto. + (*3): Ditto. + (v1ti3): Ditto. + (_pinsr): Ditto. + (*_vinsert_0): Ditto. + (_vinsert + _1): Ditto. + (vec_set_lo_): Ditto. + (vec_set_hi_): Ditto. + (avx512dq_shuf_64x2_1): Ditto. + (avx512f_shuf_64x2_1): Ditto. + (*avx512f_shuf_64x2_1_1): Ditto. + (avx512vl_shuf_32x4_1): Ditto. + (avx512f_shuf_32x4_1): Ditto. + (*avx512f_shuf_32x4_1_1): Ditto. + (*vec_extract): Ditto. + (*vec_extract_zext): Ditto. + (*vec_extractv16qi_zext): Ditto. + (*vec_extractv4si): Ditto. + (*vec_extractv4si_zext): Ditto. + (*vec_extractv2di_1): Ditto. + (*vec_concatv2si_sse4_1): Ditto. + (vec_concatv2di): Ditto. + (*_uavg3): Ditto. + (*_psadbw): Ditto. + (_movmsk): Ditto. + (*_movmsk_ext): Ditto. + (_pmovmskb): Ditto. + (*_pmovmskb_zext): Ditto. + (*sse2_maskmovdqu): Ditto. + (avx2_phwv16hi3): Ditto. + (ssse3_phwv8hi3): Ditto. + (ssse3_phdv4si3): Ditto. + (avx2_phdv8si3): Ditto. + (avx2_pmaddubsw256): Ditto. + (avx512bw_pmaddubsw512): Ditto. + (ssse3_pmaddubsw128): Ditto. + (_psign3): Ditto. + (ssse3_psign3): Ditto. + (*abs2): Ditto. + (abs2_mask): Ditto. + (abs2_mask): Ditto. + (sse4a_movnt): Ditto. + (sse4a_vmmovnt): Ditto. + (_blend): Ditto. + (_blendv): Ditto. + (sse4_1_blendv): Ditto. + (_movntdqa): Ditto. + (_pblendvb): Ditto. + (sse4_1_pblend): Ditto. + (*avx2_pblend): Ditto. + (avx2_pblendd): Ditto. + (avx2_v16qiv16hi2): Ditto. + (avx512bw_v32qiv32hi2): Ditto. + (sse4_1_v8qiv8hi2): Ditto. + (*sse4_1_v8qiv8hi2_1): Ditto. + (avx512f_v16qiv16si2): Ditto. + (avx2_v8qiv8si2): Ditto. + (*avx2_v8qiv8si2_1): Ditto. + (sse4_1_v4qiv4si2): Ditto. + (*sse4_1_v4qiv4si2_1): Ditto. + (avx512f_v16hiv16si2): Ditto. + (avx2_v8hiv8si2): Ditto. + (sse4_1_v4hiv4si2): Ditto. + (*sse4_1_v4hiv4si2_1): Ditto. + (avx512f_v8qiv8di2): Ditto. + (*avx512f_v8qiv8di2_1): Ditto. + (avx2_v4qiv4di2): Ditto. + (*avx2_v4qiv4di2_1): Ditto. + (sse4_1_v2qiv2di2): Ditto. + (*sse4_1_v2qiv2di2_1): Ditto. + (avx512f_v8hiv8di2): Ditto. + (avx2_v4hiv4di2): Ditto. + (*avx2_v4hiv4di2_1): Ditto. + (sse4_1_v2hiv2di2): Ditto. + (*sse4_1_v2hiv2di2_1): Ditto. + (avx512f_v8siv8di2): Ditto. + (avx2_v4siv4di2): Ditto. + (sse4_1_v2siv2di2): Ditto. + (*sse4_1_v2siv2di2_1): Ditto. + (sse4_1_round): Ditto. + (*sse4_1_round"): Ditto. + (sse4_2_pcmpestri): Ditto. + (sse4_2_pcmpestrm): Ditto. + (sse4_2_pcmpestr_cconly): Ditto. + (sse4_2_pcmpistri): Ditto. + (sse4_2_pcmpistrm): Ditto. + (sse4_2_pcmpistr_cconly): Ditto. + (xop_phaddbw): Ditto. + (xop_phaddbd): Ditto. + (xop_phaddbq): Ditto. + (xop_phaddwd): Ditto. + (xop_phaddwq): Ditto. + (xop_phadddq): Ditto. + (xop_phsubbw): Ditto. + (xop_phsubwd): Ditto. + (xop_phsubdq): Ditto. + (aesenc): Ditto. + (aesenclast): Ditto. + (aesdec): Ditto. + (aesdeclast): Ditto. + (aesimc): Ditto. + (aeskeygenassist): Ditto. + (_permvar): Ditto. + (avx2_perm_1): Ditto. + (_permvar): Ditto. + (avx512f_perm_1): Ditto. + (avx512f_broadcast): Ditto. + (avx_vbroadcastf128_): Ditto. + (avx512vl_broadcast_1): Ditto. + (avx512dq_broadcast_1): Ditto. + (*_vpermi2var3_mask): Ditto. + (_vpermt2var3): Ditto. + (_vpermt2var3_mask): Ditto. + (*avx_vperm2f128_nozero): Ditto. + (vec_set_lo_): Ditto. + (vec_set_hi_): Ditto. + (vec_set_lo_): Ditto. + (vec_set_hi_): Ditto. + (vec_set_lo_v32qi): Ditto. + (_maskload): Ditto. + (_maskstore): Ditto. + (avx_vec_concat): Ditto. + (_compress_mask): Ditto. + (compress_mask): Ditto. + (_compressstore_mask): Ditto. + (compressstore_mask): Ditto. + (expand_mask): Ditto. + (avx512bw_dbpsadbw): Ditto. + (clz2): Ditto. + (vpmadd52v8di): Ditto. + (vpmadd52): Ditto. + (vpmadd52_maskz_1): Ditto. + (vpmadd52_mask): Ditto. + (vaesdec_): Ditto. + (vaesdeclast_): Ditto. + (vaesenc_): Ditto. + (vaesenclast_): Ditto. + +gcc/testsuite/ChangeLog: + + * gcc.target/i386/builtin_target.c: Add handling for HYGON CPUs by + validating the vendor and invoking HYGON-specific CPU detection. + * gcc.target/i386/funcspec-56.inc: Test function target attribute on + {arch,tune}=c86-4g-m{4,6,7}. + * g++.target/i386/mv33.C: New test. +--- + gcc/common/config/i386/cpuinfo.h | 57 + + gcc/common/config/i386/i386-common.cc | 20 +- + gcc/common/config/i386/i386-cpuinfo.h | 5 + + gcc/config.gcc | 26 +- + gcc/config/i386/c86-4g-m7.md | 1983 +++++++++++++++++ + gcc/config/i386/c86-4g.md | 1204 ++++++++++ + gcc/config/i386/cpuid.h | 4 + + gcc/config/i386/driver-i386.cc | 18 + + gcc/config/i386/i386-c.cc | 21 + + gcc/config/i386/i386-options.cc | 9 +- + gcc/config/i386/i386.cc | 5 +- + gcc/config/i386/i386.h | 18 + + gcc/config/i386/i386.md | 39 +- + gcc/config/i386/mmx.md | 25 + + gcc/config/i386/sse.md | 246 +- + gcc/config/i386/x86-tune-costs.h | 276 +++ + gcc/config/i386/x86-tune-sched.cc | 6 + + gcc/config/i386/x86-tune.def | 84 +- + gcc/doc/extend.texi | 12 + + gcc/doc/invoke.texi | 21 + + gcc/testsuite/g++.target/i386/mv33.C | 42 + + .../gcc.target/i386/builtin_target.c | 6 + + gcc/testsuite/gcc.target/i386/funcspec-56.inc | 6 + + 23 files changed, 4076 insertions(+), 57 deletions(-) + create mode 100644 gcc/config/i386/c86-4g-m7.md + create mode 100644 gcc/config/i386/c86-4g.md + create mode 100644 gcc/testsuite/g++.target/i386/mv33.C + +diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h +index f2b4b9593b7..9ec5d9f4750 100644 +--- a/gcc/common/config/i386/cpuinfo.h ++++ b/gcc/common/config/i386/cpuinfo.h +@@ -315,6 +315,48 @@ get_amd_cpu (struct __processor_model *cpu_model, + return cpu; + } + ++/* Get the specific type of HYGON CPU and return HYGON CPU name. Return ++ NULL for unknown HYGON CPU. */ ++ ++static inline const char * ++get_hygon_cpu (struct __processor_model *cpu_model, ++ struct __processor_model2 *cpu_model2, ++ unsigned int *cpu_features2 __attribute__((unused))) ++{ ++ const char *cpu = NULL; ++ unsigned int family = cpu_model2->__cpu_family; ++ unsigned int model = cpu_model2->__cpu_model; ++ ++ switch (family) ++ { ++ case 0x18: ++ cpu_model->__cpu_type = HYGONFAM18H; ++ if (model == 0x4) ++ { ++ cpu = "c86-4g-m4"; ++ CHECK___builtin_cpu_is ("c86-4g-m4"); ++ cpu_model->__cpu_subtype = HYGONFAM18H_C86_4G_M4; ++ } ++ else if (model == 0x6) ++ { ++ cpu = "c86-4g-m6"; ++ CHECK___builtin_cpu_is ("c86-4g-m6"); ++ cpu_model->__cpu_subtype = HYGONFAM18H_C86_4G_M6; ++ } ++ else if (model == 0x7) ++ { ++ cpu = "c86-4g-m7"; ++ CHECK___builtin_cpu_is ("c86-4g-m7"); ++ cpu_model->__cpu_subtype = HYGONFAM18H_C86_4G_M7; ++ } ++ break; ++ default: ++ break; ++ } ++ ++ return cpu; ++} ++ + /* Get the specific type of Intel CPU and return Intel CPU name. Return + NULL for unknown Intel CPU. */ + +@@ -1042,6 +1084,21 @@ cpu_indicator_init (struct __processor_model *cpu_model, + cpu_model->__cpu_vendor = VENDOR_CYRIX; + else if (vendor == signature_NSC_ebx) + cpu_model->__cpu_vendor = VENDOR_NSC; ++ else if (vendor == signature_HYGON_ebx) ++ { ++ /* Adjust model and family for HYGON CPUS. */ ++ if (family == 0x0f) ++ { ++ family += extended_family; ++ model += extended_model; ++ } ++ cpu_model2->__cpu_family = family; ++ cpu_model2->__cpu_model = model; ++ ++ /* Get CPU type. */ ++ get_hygon_cpu (cpu_model, cpu_model2, cpu_features2); ++ cpu_model->__cpu_vendor = VENDOR_HYGON; ++ } + else + cpu_model->__cpu_vendor = VENDOR_OTHER; + +diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc +index dff43c9c786..e2877e0d547 100644 +--- a/gcc/common/config/i386/i386-common.cc ++++ b/gcc/common/config/i386/i386-common.cc +@@ -1834,7 +1834,10 @@ const char *const processor_names[] = + "znver1", + "znver2", + "znver3", +- "znver4" ++ "znver4", ++ "c86-4g-m4", ++ "c86-4g-m6", ++ "c86-4g-m7" + }; + + /* Guarantee that the array is aligned with enum processor_type. */ +@@ -2091,6 +2094,15 @@ const pta processor_alias_table[] = + | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW + | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT, + M_CPU_TYPE (AMD_BTVER2), P_PROC_BMI}, ++ {"c86-4g-m4", PROCESSOR_C86_4G_M4, CPU_C86_4G_M4, ++ PTA_C86_4G_M4, ++ M_CPU_SUBTYPE (HYGONFAM18H_C86_4G_M4), P_PROC_AVX2}, ++ {"c86-4g-m6", PROCESSOR_C86_4G_M6, CPU_C86_4G_M6, ++ PTA_C86_4G_M6, ++ M_CPU_SUBTYPE (HYGONFAM18H_C86_4G_M6), P_PROC_AVX2}, ++ {"c86-4g-m7", PROCESSOR_C86_4G_M7, CPU_C86_4G_M7, ++ PTA_C86_4G_M7, ++ M_CPU_SUBTYPE (HYGONFAM18H_C86_4G_M7), P_PROC_AVX512F}, + + {"generic", PROCESSOR_GENERIC, CPU_GENERIC, + PTA_64BIT +@@ -2111,10 +2123,14 @@ const pta processor_alias_table[] = + M_CPU_TYPE (AMDFAM10H_SHANGHAI), P_NONE}, + {"istanbul", PROCESSOR_GENERIC, CPU_GENERIC, 0, + M_CPU_TYPE (AMDFAM10H_ISTANBUL), P_NONE}, ++ {"hygon", PROCESSOR_GENERIC, CPU_GENERIC, 0, ++ M_VENDOR (VENDOR_HYGON), P_NONE}, ++ {"hygonfam18h", PROCESSOR_GENERIC, CPU_GENERIC, 0, ++ M_CPU_TYPE (HYGONFAM18H), P_NONE}, + }; + + /* NB: processor_alias_table stops at the "generic" entry. */ +-unsigned int const pta_size = ARRAY_SIZE (processor_alias_table) - 7; ++unsigned int const pta_size = ARRAY_SIZE (processor_alias_table) - 9; + unsigned int const num_arch_names = ARRAY_SIZE (processor_alias_table); + + /* Provide valid option values for -march and -mtune options. */ +diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h +index cd1fa29b3a2..99254b72e1b 100644 +--- a/gcc/common/config/i386/i386-cpuinfo.h ++++ b/gcc/common/config/i386/i386-cpuinfo.h +@@ -30,6 +30,7 @@ enum processor_vendor + VENDOR_INTEL = 1, + VENDOR_AMD, + VENDOR_ZHAOXIN, ++ VENDOR_HYGON, + VENDOR_OTHER, + VENDOR_CENTAUR, + VENDOR_CYRIX, +@@ -60,6 +61,7 @@ enum processor_types + INTEL_TREMONT, + AMDFAM19H, + ZHAOXIN_FAM7H, ++ HYGONFAM18H, + CPU_TYPE_MAX, + BUILTIN_CPU_TYPE_MAX = CPU_TYPE_MAX + }; +@@ -97,6 +99,9 @@ enum processor_subtypes + AMDFAM19H_ZNVER4, + ZHAOXIN_FAM7H_YONGFENG, + ZHAOXIN_FAM7H_SHIJIDADAO, ++ HYGONFAM18H_C86_4G_M4, ++ HYGONFAM18H_C86_4G_M6, ++ HYGONFAM18H_C86_4G_M7, + CPU_SUBTYPE_MAX + }; + +diff --git a/gcc/config.gcc b/gcc/config.gcc +index e0e6e6f9a5a..35641a74d68 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -672,7 +672,7 @@ silvermont knl knm skylake-avx512 cannonlake icelake-client icelake-server \ + skylake goldmont goldmont-plus tremont cascadelake tigerlake cooperlake \ + sapphirerapids alderlake rocketlake eden-x2 nano nano-1000 nano-2000 nano-3000 \ + nano-x2 eden-x4 nano-x4 x86-64 x86-64-v2 x86-64-v3 x86-64-v4 lujiazui yongfeng \ +-shijidadao native" ++shijidadao c86-4g-m4 c86-4g-m6 c86-4g-m7 native" + + # Additional x86 processors supported by --with-cpu=. Each processor + # MUST be separated by exactly one space. +@@ -3877,6 +3877,18 @@ case ${target} in + cpu=pentiumpro + arch_without_sse2=yes + ;; ++ c86_4g_m4-*) ++ arch=c86-4g-m4 ++ cpu=c86-4g-m4 ++ ;; ++ c86_4g_m6-*) ++ arch=c86-4g-m6 ++ cpu=c86-4g-m6 ++ ;; ++ c86_4g_m7-*) ++ arch=c86-4g-m7 ++ cpu=c86-4g-m7 ++ ;; + *) + arch=pentiumpro + cpu=generic +@@ -3971,6 +3983,18 @@ case ${target} in + arch=corei7 + cpu=corei7 + ;; ++ c86_4g_m4-*) ++ arch=c86-4g-m4 ++ cpu=c86-4g-m4 ++ ;; ++ c86_4g_m6-*) ++ arch=c86-4g-m6 ++ cpu=c86-4g-m6 ++ ;; ++ c86_4g_m7-*) ++ arch=c86-4g-m7 ++ cpu=c86-4g-m7 ++ ;; + *) + arch=x86-64 + cpu=generic +diff --git a/gcc/config/i386/c86-4g-m7.md b/gcc/config/i386/c86-4g-m7.md +new file mode 100644 +index 00000000000..214c45d1468 +--- /dev/null ++++ b/gcc/config/i386/c86-4g-m7.md +@@ -0,0 +1,1983 @@ ++;; Copyright (C) 2026 Free Software Foundation, Inc. ++;; ++;; This file is part of GCC. ++;; ++;; GCC is free software; you can redistribute it and/or modify ++;; it under the terms of the GNU General Public License as published by ++;; the Free Software Foundation; either version 3, or (at your option) ++;; any later version. ++;; ++;; GCC is distributed in the hope that it will be useful, ++;; but WITHOUT ANY WARRANTY; without even the implied warranty of ++;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++;; GNU General Public License for more details. ++;; ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; . ++;; ++ ++;; HYGON c86-4g-m7 Scheduling ++;; Modeling automatons for decoders, integer execution pipes, ++;; AGU pipes, branch, floating point execution and fp store units. ++(define_automaton "c86_4g_m7, c86_4g_m7_ieu, c86_4g_m7_agu, c86_4g_m7_fpu") ++ ++;; Decoders unit has 4 decoders and all of them can decode fast path ++;; and vector type instructions. ++(define_cpu_unit "c86-4g-m7-decode0" "c86_4g_m7") ++(define_cpu_unit "c86-4g-m7-decode1" "c86_4g_m7") ++(define_cpu_unit "c86-4g-m7-decode2" "c86_4g_m7") ++(define_cpu_unit "c86-4g-m7-decode3" "c86_4g_m7") ++ ++;; Currently blocking all decoders for vector path instructions as ++;; they are dispatched separetely as microcode sequence. ++(define_reservation "c86-4g-m7-vector" "c86-4g-m7-decode0+c86-4g-m7-decode1+c86-4g-m7-decode2+c86-4g-m7-decode3") ++ ++;; Direct instructions can be issued to any of the four decoders. ++(define_reservation "c86-4g-m7-direct" "c86-4g-m7-decode0|c86-4g-m7-decode1|c86-4g-m7-decode2|c86-4g-m7-decode3") ++ ++;; Fix me: Need to revisit this later to simulate fast path double behavior. ++(define_reservation "c86-4g-m7-double" "c86-4g-m7-direct") ++ ++;; Integer unit 4 ALU pipes. ++(define_cpu_unit "c86-4g-m7-ieu0" "c86_4g_m7_ieu") ++(define_cpu_unit "c86-4g-m7-ieu1" "c86_4g_m7_ieu") ++(define_cpu_unit "c86-4g-m7-ieu2" "c86_4g_m7_ieu") ++(define_cpu_unit "c86-4g-m7-ieu3" "c86_4g_m7_ieu") ++ ++;; c86-4g-m7 has an additional branch unit. ++(define_cpu_unit "c86-4g-m7-bru0" "c86_4g_m7_ieu") ++(define_reservation "c86-4g-m7-ieu" "c86-4g-m7-ieu0|c86-4g-m7-ieu1|c86-4g-m7-ieu2|c86-4g-m7-ieu3") ++ ++;; 3 AGU pipes in c86-4g-m7 ++(define_cpu_unit "c86-4g-m7-agu0" "c86_4g_m7_agu") ++(define_cpu_unit "c86-4g-m7-agu1" "c86_4g_m7_agu") ++(define_cpu_unit "c86-4g-m7-agu2" "c86_4g_m7_agu") ++(define_reservation "c86-4g-m7-agu-reserve" "c86-4g-m7-agu0|c86-4g-m7-agu1|c86-4g-m7-agu2") ++ ++;; Load is 4 cycles. We do not model reservation of load unit. ++(define_reservation "c86-4g-m7-load" "c86-4g-m7-agu-reserve") ++(define_reservation "c86-4g-m7-store" "c86-4g-m7-agu-reserve") ++ ++;; vectorpath (microcoded) instructions are single issue instructions. ++;; So, they occupy all the integer units. ++(define_reservation "c86-4g-m7-ivector" "c86-4g-m7-ieu0+c86-4g-m7-ieu1 ++ +c86-4g-m7-ieu2+c86-4g-m7-ieu3+c86-4g-m7-bru0 ++ +c86-4g-m7-agu0+c86-4g-m7-agu1+c86-4g-m7-agu2") ++ ++;; Floating point unit 4 FP pipes. ++(define_cpu_unit "c86-4g-m7-fpu0" "c86_4g_m7_fpu") ++(define_cpu_unit "c86-4g-m7-fpu1" "c86_4g_m7_fpu") ++(define_cpu_unit "c86-4g-m7-fpu2" "c86_4g_m7_fpu") ++(define_cpu_unit "c86-4g-m7-fpu3" "c86_4g_m7_fpu") ++(define_reservation "c86-4g-m7-fpu" "c86-4g-m7-fpu0|c86-4g-m7-fpu1|c86-4g-m7-fpu2|c86-4g-m7-fpu3") ++(define_reservation "c86-4g-m7-fpu_0_2" "c86-4g-m7-fpu0|c86-4g-m7-fpu2") ++(define_reservation "c86-4g-m7-fpu_1_3" "c86-4g-m7-fpu1|c86-4g-m7-fpu3") ++(define_reservation "c86-4g-m7-fpu_0_1" "c86-4g-m7-fpu0|c86-4g-m7-fpu1") ++(define_reservation "c86-4g-m7-fpu_0_2x2" "c86-4g-m7-fpu0*2|c86-4g-m7-fpu2*2") ++(define_reservation "c86-4g-m7-fpu_0_2x4" "c86-4g-m7-fpu0*4|c86-4g-m7-fpu2*4") ++(define_reservation "c86-4g-m7-fvector" "c86-4g-m7-fpu0+c86-4g-m7-fpu1 ++ +c86-4g-m7-fpu2+c86-4g-m7-fpu3 ++ +c86-4g-m7-agu0+c86-4g-m7-agu1+c86-4g-m7-agu2") ++ ++;; IMOV/IMOVX ++(define_insn_reservation "c86_4g_m7_imov_xchg" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "imov") ++ (and (eq_attr "c86_decode" "vector") ++ (eq_attr "memory" "none")))) ++ "c86-4g-m7-direct") ++ ++(define_insn_reservation "c86_4g_m7_imov_xchg_load" 5 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "imov") ++ (and (eq_attr "c86_decode" "vector") ++ (eq_attr "memory" "!none")))) ++ "c86-4g-m7-direct,c86-4g-m7-load") ++ ++(define_insn_reservation "c86_4g_m7_imovx_cwde" 2 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "imovx") ++ (and (eq_attr "c86_decode" "double") ++ (eq_attr "memory" "none")))) ++ "c86-4g-m7-direct,c86-4g-m7-ieu") ++ ++(define_insn_reservation "c86_4g_m7_imov" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "imov,imovx") ++ (and (eq_attr "c86_decode" "direct") ++ (eq_attr "memory" "none")))) ++ "c86-4g-m7-direct,c86-4g-m7-ieu") ++ ++(define_insn_reservation "c86_4g_m7_imov_load" 5 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "imov,imovx") ++ (and (eq_attr "c86_decode" "!vector") ++ (eq_attr "memory" "load")))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-ieu") ++ ++(define_insn_reservation "c86_4g_m7_imov_store" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "imov,imovx") ++ (and (eq_attr "c86_decode" "!vector") ++ (eq_attr "memory" "store")))) ++ "c86-4g-m7-direct,c86-4g-m7-store,c86-4g-m7-ieu") ++ ++;; PUSH ++(define_insn_reservation "c86_4g_m7_push" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "push,sse") ++ (eq_attr "memory" "store"))) ++ "c86-4g-m7-direct,c86-4g-m7-store") ++ ++(define_insn_reservation "c86_4g_m7_push_mem" 5 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "push") ++ (eq_attr "memory" "both"))) ++ "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-store") ++ ++;; POP ++(define_insn_reservation "c86_4g_m7_pop" 4 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "pop") ++ (eq_attr "memory" "load"))) ++ "c86-4g-m7-direct,c86-4g-m7-load") ++ ++(define_insn_reservation "c86_4g_m7_pop_mem" 5 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "pop") ++ (eq_attr "memory" "both"))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-store") ++ ++;; IMUL/IMULX ++(define_insn_reservation "c86_4g_m7_imul" 3 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "imul,imulx") ++ (eq_attr "memory" "none"))) ++ "c86-4g-m7-direct,c86-4g-m7-ieu1") ++ ++(define_insn_reservation "c86_4g_m7_imul_load" 7 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "imul") ++ (eq_attr "memory" "!none"))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-ieu1") ++ ++;; IDIV ++(define_insn_reservation "c86_4g_m7_idiv_DI" 41 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "idiv") ++ (and (eq_attr "mode" "DI") ++ (eq_attr "memory" "none")))) ++ "c86-4g-m7-double,c86-4g-m7-ieu3*41") ++ ++(define_insn_reservation "c86_4g_m7_idiv_SI" 25 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "idiv") ++ (and (eq_attr "mode" "SI") ++ (eq_attr "memory" "none")))) ++ "c86-4g-m7-double,c86-4g-m7-ieu3*25") ++ ++(define_insn_reservation "c86_4g_m7_idiv_HI" 17 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "idiv") ++ (and (eq_attr "mode" "HI") ++ (eq_attr "memory" "none")))) ++ "c86-4g-m7-double,c86-4g-m7-ieu3*17") ++ ++(define_insn_reservation "c86_4g_m7_idiv_QI" 15 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "idiv") ++ (and (eq_attr "mode" "QI") ++ (eq_attr "memory" "none")))) ++ "c86-4g-m7-direct,c86-4g-m7-ieu3*15") ++ ++(define_insn_reservation "c86_4g_m7_idiv_DI_load" 45 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "idiv") ++ (and (eq_attr "mode" "DI") ++ (eq_attr "memory" "load")))) ++ "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-ieu3*41") ++ ++(define_insn_reservation "c86_4g_m7_idiv_SI_load" 29 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "idiv") ++ (and (eq_attr "mode" "SI") ++ (eq_attr "memory" "load")))) ++ "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-ieu3*25") ++ ++(define_insn_reservation "c86_4g_m7_idiv_HI_load" 21 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "idiv") ++ (and (eq_attr "mode" "HI") ++ (eq_attr "memory" "load")))) ++ "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-ieu3*17") ++ ++(define_insn_reservation "c86_4g_m7_idiv_QI_load" 19 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "idiv") ++ (and (eq_attr "mode" "QI") ++ (eq_attr "memory" "load")))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-ieu3*15") ++ ++;; Integer/genaral Instructions ++(define_insn_reservation "c86_4g_m7_insn" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "alu,negnot,rotate1,ishift1,test,incdec,icmp, ++ rotate,rotatex,ishift,ishiftx,icmov") ++ (eq_attr "memory" "none,unknown"))) ++ "c86-4g-m7-direct,c86-4g-m7-ieu") ++ ++(define_insn_reservation "c86_4g_m7_insn_load" 5 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "alu,incdec,icmp,test,ishift, ++ ishiftx,icmov,rotate,rotatex") ++ (eq_attr "memory" "load"))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-ieu") ++ ++(define_insn_reservation "c86_4g_m7_insn_store" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ishift1,rotate1,rotate,incdec, ++ alu,icmov,ishift,negnot,alu1") ++ (eq_attr "memory" "store"))) ++ "c86-4g-m7-direct,c86-4g-m7-ieu,c86-4g-m7-store") ++ ++(define_insn_reservation "c86_4g_m7_insn2_store" 5 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "icmp") ++ (eq_attr "memory" "store"))) ++ "c86-4g-m7-direct,c86-4g-m7-ieu,c86-4g-m7-store") ++ ++(define_insn_reservation "c86_4g_m7_insn_both" 5 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "alu,negnot,rotate1,ishift1,incdec,rotate, ++ rotatex,ishift,ishiftx,icmov") ++ (eq_attr "memory" "both"))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-ieu,c86-4g-m7-store") ++ ++(define_insn_reservation "c86_4g_m7_setcc" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "setcc") ++ (eq_attr "memory" "none,unknown"))) ++ "c86-4g-m7-direct,c86-4g-m7-ieu0|c86-4g-m7-ieu3") ++ ++(define_insn_reservation "c86_4g_m7_setcc_load" 5 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "setcc") ++ (eq_attr "memory" "load"))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-ieu0|c86-4g-m7-ieu3") ++ ++(define_insn_reservation "c86_4g_m7_setcc_store" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "setcc") ++ (eq_attr "memory" "store"))) ++ "c86-4g-m7-direct,c86-4g-m7-store,c86-4g-m7-ieu0|c86-4g-m7-ieu3") ++ ++;; ALU1 ++(define_insn_reservation "c86_4g_m7_alu1_double" 2 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "alu1") ++ (and (eq_attr "c86_decode" "double") ++ (eq_attr "memory" "none,unknown")))) ++ "c86-4g-m7-double,c86-4g-m7-ieu") ++ ++(define_insn_reservation "c86_4g_m7_alu1_double_load" 6 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "alu1") ++ (and (eq_attr "c86_decode" "double") ++ (eq_attr "memory" "both")))) ++ "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-store,c86-4g-m7-ieu") ++ ++(define_insn_reservation "c86_4g_m7_alu1_vector" 3 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "alu1") ++ (and (eq_attr "c86_decode" "vector") ++ (eq_attr "memory" "none,unknown")))) ++ "c86-4g-m7-vector,c86-4g-m7-ivector*3") ++ ++(define_insn_reservation "c86_4g_m7_alu1_vector_load" 7 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "alu1") ++ (and (eq_attr "c86_decode" "vector") ++ (eq_attr "memory" "both")))) ++ "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-store,c86-4g-m7-ivector*3") ++ ++(define_insn_reservation "c86_4g_m7_alu1_direct" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "alu1") ++ (and (eq_attr "c86_decode" "direct") ++ (eq_attr "memory" "none,unknown")))) ++ "c86-4g-m7-direct,c86-4g-m7-ieu") ++ ++(define_insn_reservation "c86_4g_m7_alu1_direct_load" 5 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "alu1") ++ (and (eq_attr "c86_decode" "direct") ++ (eq_attr "memory" "both")))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-store,c86-4g-m7-ieu") ++ ++;; CALL/CALLV ++(define_insn_reservation "c86_4g_m7_call" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (eq_attr "type" "call,callv")) ++ "c86-4g-m7-double,c86-4g-m7-ieu0|c86-4g-m7-bru0,c86-4g-m7-store") ++ ++;; IBR ++(define_insn_reservation "c86_4g_m7_branch" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ibr") ++ (eq_attr "memory" "none"))) ++ "c86-4g-m7-direct,c86-4g-m7-ieu0|c86-4g-m7-bru0") ++ ++(define_insn_reservation "c86_4g_m7_branch_load" 5 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ibr") ++ (eq_attr "memory" "load"))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-ieu0|c86-4g-m7-bru0") ++ ++;; LEA ++(define_insn_reservation "c86_4g_m7_lea" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (eq_attr "type" "lea")) ++ "c86-4g-m7-direct,c86-4g-m7-ieu") ++ ++;; LEAVE ++(define_insn_reservation "c86_4g_m7_leave" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (eq_attr "type" "leave")) ++ "c86-4g-m7-double,c86-4g-m7-ieu,c86-4g-m7-store") ++ ++;; STR ++(define_insn_reservation "c86_4g_m7_str" 3 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "str") ++ (eq_attr "memory" "none"))) ++ "c86-4g-m7-vector,c86-4g-m7-ivector*3") ++ ++(define_insn_reservation "c86_4g_m7_str_load" 7 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "str") ++ (eq_attr "memory" "load"))) ++ "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-ivector*3") ++ ++ ++(define_insn_reservation "c86_4g_m7_ieu_vector" 5 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "other,multi") ++ (and (eq_attr "unit" "!i387") ++ (eq_attr "memory" "none,unknown")))) ++ "c86-4g-m7-vector,c86-4g-m7-ivector*5") ++ ++(define_insn_reservation "c86_4g_m7_ieu_vector_load" 9 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "other,multi") ++ (and (eq_attr "unit" "!i387") ++ (eq_attr "memory" "load")))) ++ "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-ivector*5") ++ ++;; SSEINS ++(define_insn_reservation "c86_4g_m7_sse_insertimm" 3 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sseins") ++ (and (eq_attr "memory" "none") ++ (eq_attr "length_immediate" "2")))) ++ "c86-4g-m7-double,c86-4g-m7-fpu0|c86-4g-m7-fpu3,c86-4g-m7-fpu1") ++ ++(define_insn_reservation "c86_4g_m7_sse_insert" 3 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sseins") ++ (and (eq_attr "memory" "none") ++ (eq_attr "length_immediate" "!2")))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu1") ++ ++;; FCMOV ++(define_insn_reservation "c86_4g_m7_fp_cmov" 4 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (eq_attr "type" "fcmov")) ++ "c86-4g-m7-vector,c86-4g-m7-fvector*3") ++ ++;; FLD ++(define_insn_reservation "c86_4g_m7_fp_mov_direct_load" 8 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "c86_decode" "direct") ++ (and (eq_attr "type" "fmov") ++ (eq_attr "memory" "load")))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu1") ++ ++;; FST ++(define_insn_reservation "c86_4g_m7_fp_mov_direct_store" 8 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "c86_decode" "direct") ++ (and (eq_attr "type" "fmov") ++ (eq_attr "memory" "store")))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu1,c86-4g-m7-store") ++ ++;; FILD ++(define_insn_reservation "c86_4g_m7_fp_mov_double_load" 11 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "c86_decode" "double") ++ (and (eq_attr "type" "fmov") ++ (eq_attr "memory" "load")))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu1") ++ ++;; FIST ++(define_insn_reservation "c86_4g_m7_fp_mov_double_store" 8 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "c86_decode" "double") ++ (and (eq_attr "type" "fmov") ++ (eq_attr "memory" "store")))) ++ "c86-4g-m7-double,c86-4g-m7-fpu1,c86-4g-m7-store") ++ ++(define_insn_reservation "c86_4g_m7_fp_mov_direct" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "c86_decode" "direct") ++ (and (eq_attr "type" "fmov") ++ (eq_attr "memory" "none")))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu1") ++ ++;; FSQRT ++(define_insn_reservation "c86_4g_m7fp_sqrt" 22 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "fpspc") ++ (eq_attr "c86_attr" "sqrt"))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu1*22") ++ ++;; FPSPC ++(define_insn_reservation "c86_4g_m7_fp_spc_direct" 5 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "fpspc") ++ (and (eq_attr "c86_decode" "direct") ++ (and (eq_attr "c86_attr" "other") ++ (eq_attr "memory" "store"))))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu3") ++ ++(define_insn_reservation "c86_4g_m7_fp_spc" 6 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "fpspc") ++ (and (eq_attr "c86_attr" "other") ++ (eq_attr "memory" "none")))) ++ "c86-4g-m7-vector,c86-4g-m7-fvector*6") ++ ++(define_insn_reservation "c86_4g_m7_fp_op_mul" 5 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "fop,fmul") ++ (eq_attr "memory" "none"))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu_0_2") ++ ++(define_insn_reservation "c86_4g_m7_fp_op_mul_load" 12 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "fop,fmul") ++ (and (eq_attr "fp_int_src" "false") ++ (eq_attr "memory" "load")))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_2") ++ ++(define_insn_reservation "c86_4g_m7_fp_op_imul_load" 16 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "fmul") ++ (and (eq_attr "fp_int_src" "true") ++ (eq_attr "memory" "!none")))) ++ "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu0,c86-4g-m7-fpu_0_2") ++ ++;; FDIV ++(define_insn_reservation "c86_4g_m7_fp_div" 15 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "fdiv") ++ (eq_attr "memory" "none"))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu1*7") ++ ++(define_insn_reservation "c86_4g_m7_fp_div_load" 22 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "fdiv") ++ (and (eq_attr "fp_int_src" "false") ++ (eq_attr "memory" "!none")))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu1*7") ++ ++(define_insn_reservation "c86_4g_m7_fp_idiv_load" 26 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "fdiv") ++ (and (eq_attr "fp_int_src" "true") ++ (eq_attr "memory" "!none")))) ++ "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu1*7") ++ ++(define_insn_reservation "c86_4g_m7_fp_fsgn" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (eq_attr "type" "fsgn")) ++ "c86-4g-m7-direct,c86-4g-m7-fpu_1_3") ++ ++;; FCMP ++(define_insn_reservation "c86_4g_m7_fp_fcmp" 5 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "fcmp") ++ (eq_attr "memory" "none"))) ++ "c86-4g-m7-double,c86-4g-m7-fpu0,c86-4g-m7-fpu1") ++ ++(define_insn_reservation "c86_4g_m7_fp_fcmp_load" 12 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "fcmp") ++ (eq_attr "memory" "load"))) ++ "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu0,c86-4g-m7-fpu1") ++ ++;; MMX ++(define_insn_reservation "c86_4g_m7_fp_mmx" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (eq_attr "type" "mmx")) ++ "c86-4g-m7-direct") ++ ++(define_insn_reservation "c86_4g_m7_mmx_add_cmp" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "mmxadd,mmxcmp") ++ (eq_attr "memory" "none"))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu") ++ ++(define_insn_reservation "c86_4g_m7_mmx_add_cmp_load" 8 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "mmxadd,mmxcmp") ++ (eq_attr "memory" "load"))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu") ++ ++(define_insn_reservation "c86_4g_m7_mmx_cvt" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "mmxcvt") ++ (and (eq_attr "c86_attr" "other") ++ (eq_attr "memory" "none")))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu_0_1") ++ ++(define_insn_reservation "c86_4g_m7_mmx_cvt_load" 8 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "mmxcvt") ++ (and (eq_attr "c86_attr" "other") ++ (eq_attr "memory" "load")))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_1") ++ ++(define_insn_reservation "c86_4g_m7_mmx_shift" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "mmxshft") ++ (and (eq_attr "c86_attr" "other") ++ (eq_attr "memory" "none")))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu1") ++ ++(define_insn_reservation "c86_4g_m7_mmx_shift_load" 8 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "mmxshft") ++ (and (eq_attr "c86_attr" "other") ++ (eq_attr "memory" "load")))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu1") ++ ++(define_insn_reservation "c86_4g_m7_mmx_shift_avg" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "mmxshft") ++ (and (eq_attr "c86_attr" "avg") ++ (eq_attr "memory" "none")))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu") ++ ++(define_insn_reservation "c86_4g_m7_mmx_shift_avg_load" 8 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "mmxshft") ++ (and (eq_attr "c86_attr" "avg") ++ (eq_attr "memory" "load")))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu") ++ ++;; SADBW ++(define_insn_reservation "c86_4g_m7_mmx_shift_sadbw" 3 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "mmxshft") ++ (and (eq_attr "c86_attr" "sadbw") ++ (eq_attr "memory" "none")))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu0") ++ ++(define_insn_reservation "c86_4g_m7_mmx_shift_sadbw_load" 10 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "mmxshft") ++ (and (eq_attr "c86_attr" "sadbw") ++ (eq_attr "memory" "load")))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu0") ++ ++(define_insn_reservation "c86_4g_m7_mmx_mov" 4 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "mmxmov") ++ (eq_attr "memory" "none"))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu1") ++ ++(define_insn_reservation "c86_4g_m7_mmx_mov_store" 4 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "mmxmov") ++ (and (eq_attr "c86_attr" "other") ++ (eq_attr "memory" "store")))) ++ "c86-4g-m7-direct,c86-4g-m7-store,c86-4g-m7-fpu1") ++ ++(define_insn_reservation "c86_4g_m7_mmx_mov_load" 11 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "mmxmov") ++ (eq_attr "memory" "load"))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu1") ++ ++(define_insn_reservation "c86_4g_m7_mmx_mul" 3 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "mmxmul") ++ (eq_attr "memory" "none"))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu0") ++ ++(define_insn_reservation "c86_4g_m7_mmx_mul_load" 10 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "mmxmul") ++ (eq_attr "memory" "load"))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu0") ++ ++;; PINSR ++(define_insn_reservation "c86_4g_m7_sse_pinsr_reg" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog,mmxcvt") ++ (and (eq_attr "c86_attr" "insr") ++ (and (eq_attr "prefix" "orig") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-double,c86-4g-m7-ieu2,c86-4g-m7-fpu_0_1") ++ ++(define_insn_reservation "c86_4g_m7_sse_pinsr_reg_load" 3 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog,mmxcvt") ++ (and (eq_attr "c86_attr" "insr") ++ (and (eq_attr "prefix" "orig") ++ (eq_attr "memory" "load"))))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_1") ++ ++(define_insn_reservation "c86_4g_m7_avx_vpinsr_reg" 2 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog") ++ (and (eq_attr "c86_attr" "insr") ++ (and (eq_attr "prefix" "!orig") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-double,c86-4g-m7-fpu2*2") ++ ++(define_insn_reservation "c86_4g_m7_avx_vpinsr_reg_load" 8 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog") ++ (and (eq_attr "c86_attr" "insr") ++ (and (eq_attr "prefix" "!orig") ++ (eq_attr "memory" "load"))))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu1|c86-4g-m7-fpu2|c86-4g-m7-fpu3") ++ ++;; PERM ++(define_insn_reservation "c86_4g_m7_avx512_perm_xmm" 3 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog") ++ (and (ior (and (eq_attr "c86_attr" "perm2") ++ (eq_attr "mode" "V4SF,V2DF,TI")) ++ (and (eq_attr "c86_attr" "perm") ++ (eq_attr "mode" "V8SF,V4DF,TI,OI"))) ++ (and (eq_attr "prefix" "evex") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu_0_2x2") ++ ++(define_insn_reservation "c86_4g_m7_avx512_perm_xmm_opload" 10 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog") ++ (and (ior (and (eq_attr "c86_attr" "perm2") ++ (eq_attr "mode" "V4SF,V2DF,TI")) ++ (and (eq_attr "c86_attr" "perm") ++ (eq_attr "mode" "V8SF,V4DF,TI,OI"))) ++ (and (eq_attr "prefix" "evex") ++ (eq_attr "memory" "load"))))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_2x2") ++ ++(define_insn_reservation "c86_4g_m7_avx512_permi2_ymm" 4 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog") ++ (and (eq_attr "c86_attr" "perm2") ++ (and (eq_attr "mode" "V8SF,V4DF,OI") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-vector") ++ ++(define_insn_reservation "c86_4g_m7_avx512_permi2_zmm" 16 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog") ++ (and (eq_attr "c86_attr" "perm2") ++ (and (eq_attr "mode" "V16SF,V8DF,XI") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-vector") ++ ++(define_insn_reservation "c86_4g_m7_avx512_permi2_ymm_load" 11 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog") ++ (and (eq_attr "c86_attr" "perm2") ++ (and (eq_attr "mode" "V8SF,V4DF,OI") ++ (eq_attr "memory" "load"))))) ++ "c86-4g-m7-vector,c86-4g-m7-load") ++ ++(define_insn_reservation "c86_4g_m7_avx512_permi2_zmm_load" 23 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog") ++ (and (eq_attr "c86_attr" "perm2") ++ (and (eq_attr "mode" "V16SF,V8DF,XI") ++ (eq_attr "memory" "load"))))) ++ "c86-4g-m7-vector,c86-4g-m7-load") ++ ++(define_insn_reservation "c86_4g_m7_avx512_perm_zmm_imm" 4 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog") ++ (and (eq_attr "c86_attr" "perm") ++ (and (eq_attr "mode" "V16SF,V8DF,XI") ++ (and (match_operand 2 "immediate_operand") ++ (eq_attr "memory" "none")))))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu_0_2x4") ++ ++(define_insn_reservation "c86_4g_m7_avx512_perm_zmm_imm_load" 11 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog") ++ (and (eq_attr "c86_attr" "perm") ++ (and (eq_attr "mode" "V16SF,V8DF,XI") ++ (and (match_operand 2 "immediate_operand") ++ (eq_attr "memory" "load")))))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_2x4") ++ ++(define_insn_reservation "c86_4g_m7_avx512_perm_zmm_noimm" 8 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog") ++ (and (eq_attr "c86_attr" "perm") ++ (and (eq_attr "mode" "V16SF,V8DF,XI") ++ (and (match_operand 2 "nonimmediate_operand") ++ (eq_attr "memory" "none")))))) ++ "c86-4g-m7-vector") ++ ++(define_insn_reservation "c86_4g_m7_sse_perm_zmm_noimm_load" 15 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog") ++ (and (eq_attr "c86_attr" "perm") ++ (and (eq_attr "mode" "V16SF,V8DF,XI") ++ (and (match_operand 2 "nonimmediate_operand") ++ (eq_attr "memory" "load")))))) ++ "c86-4g-m7-vector,c86-4g-m7-load") ++ ++(define_insn_reservation "c86_4g_m7_avx_perm_ymm" 3 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog") ++ (and (eq_attr "c86_attr" "perm") ++ (and (eq_attr "prefix" "!evex") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-vector") ++ ++(define_insn_reservation "c86_4g_m7_avx_perm_ymem" 10 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog") ++ (and (eq_attr "c86_attr" "perm") ++ (and (eq_attr "prefix" "!evex") ++ (eq_attr "memory" "load"))))) ++ "c86-4g-m7-vector,c86-4g-m7-load") ++ ++;; VINSERT ++(define_insn_reservation "c86_4g_m7_avx512_insertx_ymm" 3 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog,sselog1") ++ (and (eq_attr "c86_attr" "insertx") ++ (and (eq_attr "mode" "V8SF,V4DF,OI") ++ (and (eq_attr "prefix" "evex") ++ (eq_attr "memory" "none")))))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu_0_2x2,c86-4g-m7-fpu_0_2x2") ++ ++(define_insn_reservation "c86_4g_m7_avx512_insertx_ymem" 10 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog,sselog1") ++ (and (eq_attr "c86_attr" "insertx") ++ (and (eq_attr "mode" "V8SF,V4DF,OI") ++ (and (eq_attr "prefix" "evex") ++ (eq_attr "memory" "load,both")))))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_2x2,c86-4g-m7-fpu_0_2x2") ++ ++(define_insn_reservation "c86_4g_m7_avx512_insertx_zxmm" 5 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog") ++ (and (eq_attr "c86_attr" "insertx") ++ (and (eq_attr "mode" "V16SF,V8DF,XI") ++ (and (match_test "GET_MODE_SIZE (GET_MODE (operands[2]))==16") ++ (match_operand 2 "register_operand")))))) ++ "c86-4g-m7-double,c86-4g-m7-fpu_0_2x4,c86-4g-m7-fpu_0_2x4") ++ ++(define_insn_reservation "c86_4g_m7_avx512_insertx_zxmem" 12 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog") ++ (and (eq_attr "c86_attr" "insertx") ++ (and (eq_attr "mode" "V16SF,V8DF,XI") ++ (and (match_test "GET_MODE_SIZE (GET_MODE (operands[2]))==16") ++ (match_operand 2 "memory_operand")))))) ++ "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu_0_2x4,c86-4g-m7-fpu_0_2x4") ++ ++(define_insn_reservation "c86_4g_m7_avx512_insertx_zymm" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog") ++ (and (eq_attr "c86_attr" "insertx") ++ (and (eq_attr "mode" "V16SF,V8DF,XI") ++ (and (match_test "GET_MODE_SIZE (GET_MODE (operands[2]))==32") ++ (match_operand 2 "register_operand")))))) ++ "c86-4g-m7-double,c86-4g-m7-fpu_1_3,c86-4g-m7-fpu_1_3") ++ ++(define_insn_reservation "c86_4g_m7_avx512_insertx_zymem" 8 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog") ++ (and (eq_attr "c86_attr" "insertx") ++ (and (eq_attr "mode" "V16SF,V8DF,XI") ++ (and (match_test "GET_MODE_SIZE (GET_MODE (operands[2]))==32") ++ (match_operand 2 "memory_operand")))))) ++ "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu_1_3") ++ ++(define_insn_reservation "c86_4g_m7_avx_insertx_ymm" 3 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog,sselog1") ++ (and (eq_attr "c86_attr" "insertx") ++ (and (eq_attr "prefix" "!evex") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu0*2") ++ ++(define_insn_reservation "c86_4g_m7_avx_insertx_ymem" 10 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog,sselog1") ++ (and (eq_attr "c86_attr" "insertx") ++ (and (eq_attr "prefix" "!evex") ++ (eq_attr "memory" "load"))))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu0*2") ++ ++;; SHUF/MULTISHIFTQB ++(define_insn_reservation "c86_4g_m7_avx512_shuf_xymm" 3 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog") ++ (and (eq_attr "c86_attr" "shufx") ++ (and (not (eq_attr "mode" "V8DF,V16SF,XI")) ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu_0_2x2") ++ ++(define_insn_reservation "c86_4g_m7_avx512_shuf_zmm" 4 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog") ++ (and (eq_attr "c86_attr" "shufx") ++ (and (eq_attr "mode" "V8DF,V16SF,XI") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-vector") ++ ++(define_insn_reservation "c86_4g_m7_avx512_shuf_xymem" 10 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog") ++ (and (eq_attr "c86_attr" "shufx") ++ (and (not (eq_attr "mode" "V8DF,V16SF,XI")) ++ (eq_attr "memory" "load"))))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_2x2") ++ ++(define_insn_reservation "c86_4g_m7_avx512_shuf_zmem" 11 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog") ++ (and (eq_attr "c86_attr" "shufx") ++ (and (eq_attr "mode" "V8DF,V16SF,XI") ++ (eq_attr "memory" "load"))))) ++ "c86-4g-m7-vector,c86-4g-m7-load") ++ ++;; SSELOGIC ++(define_insn_reservation "c86_4g_m7_sselogic_xymm" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog,sselog1") ++ (and (eq_attr "c86_attr" "sselogic") ++ (eq_attr "memory" "none")))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu") ++ ++(define_insn_reservation "c86_4g_m7_sselogic_xymm_load" 8 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog,sselog1") ++ (and (eq_attr "c86_attr" "sselogic") ++ (eq_attr "memory" "load")))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu") ++ ++;; CMPESTR ++(define_insn_reservation "c86_4g_m7_avx512_cmpestr" 6 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog") ++ (and (eq_attr "c86_attr" "cmpestr") ++ (eq_attr "memory" "none")))) ++ "c86-4g-m7-vector") ++ ++(define_insn_reservation "c86_4g_m7_avx512_cmpestr_load" 13 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog") ++ (and (eq_attr "c86_attr" "cmpestr") ++ (eq_attr "memory" "load")))) ++ "c86-4g-m7-vector,c86-4g-m7-load") ++ ++;; SSELOG ++(define_insn_reservation "c86_4g_m7_avx512_log" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1") ++ (and (eq_attr "c86_attr" "other") ++ (eq_attr "memory" "none")))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu_1_3") ++ ++(define_insn_reservation "c86_4g_m7_avx512_log_load" 8 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1") ++ (and (eq_attr "c86_attr" "other") ++ (eq_attr "memory" "load")))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_1_3") ++ ++;; SSELOG1 ++;; VDBPSADBW ++(define_insn_reservation "c86_4g_m7_avx512_vdbpsadbw_xymm" 4 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog1") ++ (and (eq_attr "c86_attr" "sadbw") ++ (and (eq_attr "mode" "OI,TI") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-double,c86-4g-m7-fpu_0_2,c86-4g-m7-fpu_1_3") ++ ++(define_insn_reservation "c86_4g_m7_avx512_vdbpsadbw_xymem" 11 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog1") ++ (and (eq_attr "c86_attr" "sadbw") ++ (and (eq_attr "mode" "OI,TI") ++ (eq_attr "memory" "load"))))) ++ "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu_0_2,c86-4g-m7-fpu_1_3") ++ ++(define_insn_reservation "c86_4g_m7_avx512_vdbpsadbw_zmm" 4 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog1") ++ (and (eq_attr "c86_attr" "sadbw") ++ (and (eq_attr "mode" "XI") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-vector") ++ ++(define_insn_reservation "c86_4g_m7_avx512_vdbpsadbw_zmem" 11 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog1") ++ (and (eq_attr "c86_attr" "sadbw") ++ (and (eq_attr "mode" "XI") ++ (eq_attr "memory" "load"))))) ++ "c86-4g-m7-vector,c86-4g-m7-load") ++ ++;; ABS ++(define_insn_reservation "c86_4g_m7_avx512_abs" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog1,sse") ++ (and (eq_attr "c86_attr" "abs") ++ (and (eq_attr "prefix" "evex") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu") ++ ++(define_insn_reservation "c86_4g_m7_avx512_abs_load" 8 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog1,sse") ++ (and (eq_attr "c86_attr" "abs") ++ (and (eq_attr "prefix" "evex") ++ (eq_attr "memory" "load,both"))))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu") ++ ++;; SIGN ++(define_insn_reservation "c86_4g_m7_avx_sign" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog1") ++ (and (eq_attr "c86_attr" "sign") ++ (eq_attr "memory" "none")))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu0|c86-4g-m7-fpu3") ++ ++(define_insn_reservation "c86_4g_m7_avx_sign_load" 8 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog1") ++ (and (eq_attr "c86_attr" "sign") ++ (eq_attr "memory" "!none")))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu0|c86-4g-m7-fpu3") ++ ++;; BLEND/ABS/AES ++(define_insn_reservation "c86_4g_m7_avx_blend" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog1") ++ (and (eq_attr "c86_attr" "abs,blend,aes") ++ (and (eq_attr "prefix" "!evex") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu_0_1") ++ ++(define_insn_reservation "c86_4g_m7_avx_blend_load" 8 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog1") ++ (and (eq_attr "c86_attr" "abs,blend,aes") ++ (and (eq_attr "prefix" "!evex") ++ (eq_attr "memory" "load"))))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_1") ++ ++(define_insn_reservation "c86_4g_m7_avx512_aes" 3 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog1,ssecvt,sse") ++ (and (eq_attr "c86_attr" "aes") ++ (and (eq_attr "prefix" "evex") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu_1_3") ++ ++(define_insn_reservation "c86_4g_m7_avx512_aes_load" 10 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog1,ssecvt,sse") ++ (and (eq_attr "c86_attr" "aes") ++ (and (eq_attr "prefix" "evex") ++ (eq_attr "memory" "load"))))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_1_3") ++ ++(define_insn_reservation "c86_4g_m7_avx_aes" 3 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog1") ++ (and (eq_attr "c86_attr" "aes") ++ (and (eq_attr "prefix" "!evex") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu0|c86-4g-m7-fpu1") ++ ++(define_insn_reservation "c86_4g_m7_avx_aes_load" 10 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog1") ++ (and (eq_attr "c86_attr" "aes") ++ (and (eq_attr "prefix" "!evex") ++ (eq_attr "memory" "load"))))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu0|c86-4g-m7-fpu1") ++ ++;; EXTR ++(define_insn_reservation "c86_4g_m7_extr" 5 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog1,sselog,mmxcvt") ++ (and (eq_attr "c86_attr" "extr") ++ (eq_attr "memory" "none")))) ++ "c86-4g-m7-double,c86-4g-m7-fpu1,c86-4g-m7-fpu_0_1") ++ ++(define_insn_reservation "c86_4g_m7_extr_load" 12 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sselog1,sselog,mmxcvt") ++ (and (eq_attr "c86_attr" "extr") ++ (eq_attr "memory" "!none")))) ++ "c86-4g-m7-double,c86-4g-m7-store,c86-4g-m7-fpu1,c86-4g-m7-fpu_0_1") ++ ++;; SSECOMI ++(define_insn_reservation "c86_4g_m7_avx_ssecomi_comi" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssecomi") ++ (and (eq_attr "prefix_extra" "0") ++ (eq_attr "memory" "none")))) ++ "c86-4g-m7-double,c86-4g-m7-fpu2|c86-4g-m7-fpu3") ++ ++(define_insn_reservation "c86_4g_m7_avx_ssecomi_comi_load" 8 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssecomi") ++ (and (eq_attr "prefix_extra" "0") ++ (eq_attr "memory" "load")))) ++ "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu2|c86-4g-m7-fpu3") ++ ++(define_insn_reservation "c86_4g_m7_avx_ssecomi_test" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssecomi") ++ (and (eq_attr "prefix_extra" "1") ++ (eq_attr "memory" "none")))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu1|c86-4g-m7-fpu2") ++ ++(define_insn_reservation "c86_4g_m7_avx_ssecomi_test_load" 8 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssecomi") ++ (and (eq_attr "prefix_extra" "1") ++ (eq_attr "memory" "load")))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu1|c86-4g-m7-fpu2") ++ ++;; SSEIMUL ++(define_insn_reservation "c86_4g_m7_avx512_imul" 3 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sseimul") ++ (and (eq_attr "prefix" "evex") ++ (eq_attr "memory" "none")))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu_0_2") ++ ++(define_insn_reservation "c86_4g_m7_avx512_imul_mem" 10 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sseimul") ++ (and (eq_attr "prefix" "evex") ++ (eq_attr "memory" "load")))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_2") ++ ++(define_insn_reservation "c86_4g_m7_avx_imul" 3 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sseimul") ++ (and (eq_attr "prefix" "!evex") ++ (eq_attr "memory" "none")))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu0|c86-4g-m7-fpu3") ++ ++(define_insn_reservation "c86_4g_m7_avx_imul_mem" 10 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sseimul") ++ (and (eq_attr "prefix" "!evex") ++ (eq_attr "memory" "load")))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu0|c86-4g-m7-fpu3") ++ ++;; SSEMOV ++(define_insn_reservation "c86_4g_m7_avx512_mov_vmov" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssemov,sseiadd") ++ (and (eq_attr "c86_attr" "other,blend,maxmin") ++ (eq_attr "memory" "none")))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu") ++ ++(define_insn_reservation "c86_4g_m7_avx512_mov_vmov_store" 8 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssemov") ++ (and (eq_attr "c86_attr" "other") ++ (eq_attr "memory" "store")))) ++ "c86-4g-m7-direct,c86-4g-m7-store,c86-4g-m7-fpu1") ++ ++(define_insn_reservation "c86_4g_m7_avx512_mov_vmov_load" 8 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssemov,sseiadd") ++ (and (eq_attr "c86_attr" "other,blend,maxmin") ++ (eq_attr "memory" "load")))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu") ++ ++(define_insn_reservation "c86_4g_m7_avx512_vpmovx_y" 3 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssemov") ++ (and (eq_attr "c86_attr" "vpmovx") ++ (and (eq_attr "prefix" "evex") ++ (and (eq_attr "mode" "OI,V8SF,V4DF") ++ (eq_attr "memory" "none")))))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu_0_2x2") ++ ++(define_insn_reservation "c86_4g_m7_avx512_vpmovx_y_load" 10 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssemov,sselog1") ++ (and (eq_attr "c86_attr" "vpmovx") ++ (and (eq_attr "prefix" "evex") ++ (and (eq_attr "mode" "OI,V8SF,V4DF") ++ (eq_attr "memory" "load,both")))))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_2x2") ++ ++(define_insn_reservation "c86_4g_m7_avx512_vpmovx_z" 5 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssemov") ++ (and (eq_attr "c86_attr" "vpmovx") ++ (and (eq_attr "mode" "XI") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu_0_2x4") ++ ++(define_insn_reservation "c86_4g_m7_avx512_vpmovx_z_load" 12 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssemov") ++ (and (eq_attr "c86_attr" "vpmovx") ++ (and (eq_attr "mode" "XI") ++ (eq_attr "memory" "load"))))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_2x4") ++ ++(define_insn_reservation "c86_4g_m7_avx512_vpmovx_x" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssemov") ++ (and (eq_attr "c86_attr" "vpmovx") ++ (and (eq_attr "prefix" "evex") ++ (and (eq_attr "mode" "TI,SI") ++ (eq_attr "memory" "none")))))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu_1_3") ++ ++(define_insn_reservation "c86_4g_m7_avx512_vpmovx_x_load" 8 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssemov") ++ (and (eq_attr "c86_attr" "vpmovx") ++ (and (eq_attr "prefix" "evex") ++ (and (eq_attr "mode" "TI,SI") ++ (eq_attr "memory" "load")))))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_1_3") ++ ++(define_insn_reservation "c86_4g_m7_avx_vpmovx_xx" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssemov") ++ (and (eq_attr "c86_attr" "vpmovx") ++ (and (eq_attr "prefix" "!evex") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu1|c86-4g-m7-fpu2") ++ ++(define_insn_reservation "c86_4g_m7_avx_vpmovx_xx_load" 8 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssemov") ++ (and (eq_attr "c86_attr" "vpmovx") ++ (and (eq_attr "prefix" "!evex") ++ (eq_attr "memory" "load,both"))))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu1|c86-4g-m7-fpu2") ++ ++;; EXPAND ++(define_insn_reservation "c86_4g_m7_avx512_expand" 3 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssemov") ++ (and (eq_attr "c86_attr" "expand,compress") ++ (and (not (eq_attr "mode" "XI,V16SF,V8DF")) ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu3*2,c86-4g-m7-fpu1*2|c86-4g-m7-fpu3*2") ++ ++(define_insn_reservation "c86_4g_m7_avx512_expand_load" 10 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssemov") ++ (and (eq_attr "c86_attr" "expand,compress") ++ (and (not (eq_attr "mode" "XI,V16SF,V8DF")) ++ (eq_attr "memory" "load"))))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu3*2,c86-4g-m7-fpu1*2|c86-4g-m7-fpu3*2") ++ ++(define_insn_reservation "c86_4g_m7_avx512_expand_z" 10 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssemov") ++ (and (eq_attr "c86_attr" "expand,compress") ++ (and (eq_attr "mode" "XI,V16SF,V8DF") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-vector") ++ ++(define_insn_reservation "c86_4g_m7_avx512_expand_z_load" 17 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssemov") ++ (and (eq_attr "c86_attr" "expand,compress") ++ (and (eq_attr "mode" "XI,V16SF,V8DF") ++ (eq_attr "memory" "load"))))) ++ "c86-4g-m7-vector,c86-4g-m7-load") ++ ++;; MOVNT ++(define_insn_reservation "c86_4g_m7_avx512_movnt_load" 8 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssemov") ++ (and (eq_attr "c86_attr" "movnt") ++ (and (eq_attr "mode" "XI,V16SF,V8DF") ++ (eq_attr "memory" "load"))))) ++ "c86-4g-m7-double,c86-4g-m7-load") ++ ++(define_insn_reservation "c86_4g_m7_avx512_movnt_store" 8 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssemov") ++ (and (eq_attr "c86_attr" "movnt") ++ (and (eq_attr "mode" "XI,V16SF,V8DF") ++ (eq_attr "memory" "store"))))) ++ "c86-4g-m7-direct,c86-4g-m7-store,c86-4g-m7-fpu1*2") ++ ++(define_insn_reservation "c86_4g_m7_sse_movnt_store" 4 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssemov,mmxmov") ++ (and (eq_attr "c86_attr" "movnt") ++ (and (not (eq_attr "mode" "XI,V16SF,V8DF")) ++ (eq_attr "memory" "!none"))))) ++ "c86-4g-m7-direct,c86-4g-m7-store,c86-4g-m7-fpu1") ++ ++(define_insn_reservation "c86_4g_m7_sse_movnt_xy" 4 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssemov") ++ (and (eq_attr "c86_attr" "movnt") ++ (and (not (eq_attr "mode" "XI,V16SF,V8DF")) ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu_1_3") ++ ++;; BLENDV ++(define_insn_reservation "c86_4g_m7_avx512_blendv" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssemov") ++ (and (eq_attr "c86_attr" "blendv") ++ (eq_attr "memory" "none")))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu_0_1") ++ ++(define_insn_reservation "c86_4g_m7_avx512_blendv_load" 8 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssemov") ++ (and (eq_attr "c86_attr" "blendv") ++ (eq_attr "memory" "load")))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_1") ++ ++;; SSEISHFT ++(define_insn_reservation "c86_4g_m7_avx512_sseishft_aligr" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sseishft") ++ (and (eq_attr "prefix_extra" "1") ++ (and (eq_attr "prefix" "evex") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu_1_3") ++ ++(define_insn_reservation "c86_4g_m7_avx512_sseishft_aligr_load" 8 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sseishft") ++ (and (eq_attr "prefix_extra" "1") ++ (and (eq_attr "prefix" "evex") ++ (eq_attr "memory" "load"))))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_1_3") ++ ++(define_insn_reservation "c86_4g_m7_avx512_sseishft_vshift" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sseishft") ++ (and (eq_attr "prefix_extra" "!1") ++ (eq_attr "memory" "none")))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu_0_2") ++ ++(define_insn_reservation "c86_4g_m7_avx512_sseishft_vshift_load" 8 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sseishft") ++ (and (eq_attr "prefix_extra" "!1") ++ (eq_attr "memory" "!none")))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_2") ++ ++ ++;; SSEADD ++(define_insn_reservation "c86_4g_m7_avx512_sseadd_maxmin_xy" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sseadd,sse") ++ (and (eq_attr "c86_attr" "maxmin") ++ (and (eq_attr "prefix" "evex") ++ (and (eq_attr "memory" "none") ++ (eq_attr "memory" "none")))))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu_0_2") ++ ++(define_insn_reservation "c86_4g_m7_avx512_sseadd_maxmin_xy_load" 8 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sseadd,sse") ++ (and (eq_attr "c86_attr" "maxmin") ++ (and (eq_attr "prefix" "evex") ++ (ior (eq_attr "memory" "load") ++ (eq_attr "memory" "load")))))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_2") ++ ++(define_insn_reservation "c86_4g_m7_avx_sseadd_maxmin" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sseadd,sse") ++ (and (eq_attr "c86_attr" "maxmin") ++ (and (eq_attr "prefix" "vex") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu_0_1") ++ ++(define_insn_reservation "c86_4g_m7_avx_sseadd_maxmin_load" 8 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sseadd,sse") ++ (and (eq_attr "c86_attr" "maxmin") ++ (and (eq_attr "prefix" "vex") ++ (eq_attr "memory" "load"))))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_1") ++ ++(define_insn_reservation "c86_4g_m7_sse_sseadd_maxmin" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sseadd,sse") ++ (and (eq_attr "c86_attr" "maxmin") ++ (and (eq_attr "prefix" "orig") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu2|c86-4g-m7-fpu3") ++ ++(define_insn_reservation "c86_4g_m7_sse_sseadd_maxmin_load" 8 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sseadd,sse") ++ (and (eq_attr "c86_attr" "maxmin") ++ (and (eq_attr "prefix" "orig") ++ (eq_attr "memory" "load"))))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu2|c86-4g-m7-fpu3") ++ ++;; SUB/ADD ++(define_insn_reservation "c86_4g_m7_avx512_sseadd_xy" 3 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sseadd") ++ (and (eq_attr "c86_attr" "other") ++ (eq_attr "memory" "none")))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu3") ++ ++(define_insn_reservation "c86_4g_m7_avx512_sseadd_xy_load" 10 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sseadd") ++ (and (eq_attr "c86_attr" "other") ++ (eq_attr "memory" "load")))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu3") ++ ++;; HADD/HSUB ++(define_insn_reservation "c86_4g_m7_avx_sseadd_hplus" 7 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sseadd,sseadd1") ++ (and (eq_attr "c86_attr" "hplus") ++ (eq_attr "memory" "none")))) ++ "c86-4g-m7-vector") ++ ++(define_insn_reservation "c86_4g_m7_avx_sseadd_hplus_load" 14 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sseadd,sseadd1") ++ (and (eq_attr "c86_attr" "hplus") ++ (eq_attr "memory" "load")))) ++ "c86-4g-m7-vector,c86-4g-m7-load") ++ ++;; SSEIADD ++(define_insn_reservation "c86_4g_m7_avx512_sseiadd_madd" 3 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sseiadd") ++ (and (eq_attr "c86_attr" "sadbw,madd") ++ (and (ior (eq_attr "prefix" "evex") ++ (eq_attr "mode" "XI")) ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu_0_2") ++ ++(define_insn_reservation "c86_4g_m7_avx512_sseiadd_madd_mem" 10 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sseiadd") ++ (and (eq_attr "c86_attr" "sadbw,madd") ++ (and (ior (eq_attr "prefix" "evex") ++ (eq_attr "mode" "XI")) ++ (eq_attr "memory" "load"))))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_2") ++ ++(define_insn_reservation "c86_4g_m7_avx_sseiadd_sadbw" 3 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sseiadd") ++ (and (eq_attr "c86_attr" "sadbw") ++ (and (eq_attr "prefix" "vex,maybe_evex") ++ (and (eq_attr "mode" "TI,OI") ++ (eq_attr "memory" "none")))))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu_0_1") ++ ++(define_insn_reservation "c86_4g_m7_avx_sseiadd_sadbw_mem" 10 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sseiadd") ++ (and (eq_attr "c86_attr" "sadbw") ++ (and (eq_attr "prefix" "vex,maybe_evex") ++ (and (eq_attr "mode" "TI,OI") ++ (eq_attr "memory" "load")))))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_1") ++ ++(define_insn_reservation "c86_4g_m7_sse_sseiadd_sadbw" 3 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sseiadd") ++ (and (eq_attr "c86_attr" "sadbw") ++ (and (eq_attr "prefix" "orig") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu0|c86-4g-m7-fpu3") ++ ++(define_insn_reservation "c86_4g_m7_sse_sseiadd_sadbw_mem" 10 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sseiadd") ++ (and (eq_attr "c86_attr" "sadbw") ++ (and (eq_attr "prefix" "orig") ++ (eq_attr "memory" "load"))))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu0|c86-4g-m7-fpu3") ++ ++(define_insn_reservation "c86_4g_m7_sse_sseiadd_madd" 3 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sseiadd") ++ (and (eq_attr "c86_attr" "madd") ++ (and (eq_attr "prefix" "!evex") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu0") ++ ++(define_insn_reservation "c86_4g_m7_sse_sseiadd_madd_mem" 10 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sseiadd") ++ (and (eq_attr "c86_attr" "madd") ++ (and (eq_attr "prefix" "!evex") ++ (eq_attr "memory" "load"))))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu0") ++ ++;; AVG ++(define_insn_reservation "c86_4g_m7_avx512_sseiadd_avg" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sseiadd") ++ (and (eq_attr "c86_attr" "avg") ++ (eq_attr "memory" "none")))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu") ++ ++(define_insn_reservation "c86_4g_m7_avx512_sseiadd_avg_load" 8 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sseiadd") ++ (and (eq_attr "c86_attr" "avg") ++ (eq_attr "memory" "load")))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu") ++ ++(define_insn_reservation "c86_4g_m7_avx_sseiadd_hplus" 3 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sseiadd,sseiadd1") ++ (and (eq_attr "c86_attr" "hplus") ++ (and (eq_attr "prefix" "vex") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-vector") ++ ++(define_insn_reservation "c86_4g_m7_avx_sseiadd_hplus_load" 10 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sseiadd,sseiadd1") ++ (and (eq_attr "c86_attr" "hplus") ++ (and (eq_attr "prefix" "vex") ++ (eq_attr "memory" "load"))))) ++ "c86-4g-m7-vector,c86-4g-m7-load") ++ ++(define_insn_reservation "c86_4g_m7_sse_sseiadd_hplus" 3 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sseiadd,sseiadd1") ++ (and (eq_attr "c86_attr" "hplus") ++ (and (eq_attr "prefix" "orig") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-vector,c86-4g-m7-fpu0*2") ++ ++(define_insn_reservation "c86_4g_m7_sse_sseiadd_hplus_load" 10 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sseiadd,sseiadd1") ++ (and (eq_attr "c86_attr" "hplus") ++ (and (eq_attr "prefix" "orig") ++ (eq_attr "memory" "load"))))) ++ "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-fpu0*2") ++ ++;; SSEMUL ++(define_insn_reservation "c86_4g_m7_avx512_ssemul" 3 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssemul") ++ (eq_attr "memory" "none"))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu0") ++ ++(define_insn_reservation "c86_4g_m7_avx512_ssemul_load" 10 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssemul") ++ (eq_attr "memory" "load"))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu0") ++ ++;; SSEDIV ++(define_insn_reservation "c86_4g_m7_avx512_ssediv" 13 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssediv") ++ (and (not (eq_attr "mode" "V16SF,V8DF")) ++ (eq_attr "memory" "none")))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu3*7") ++ ++(define_insn_reservation "c86_4g_m7_avx512_ssediv_mem" 20 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssediv") ++ (and (not (eq_attr "mode" "V16SF,V8DF")) ++ (eq_attr "memory" "load")))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu3*7") ++ ++(define_insn_reservation "c86_4g_m7_avx512_ssediv_z" 24 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssediv") ++ (and (eq_attr "mode" "V16SF,V8DF") ++ (eq_attr "memory" "none")))) ++ "c86-4g-m7-double,c86-4g-m7-fpu3*7") ++ ++(define_insn_reservation "c86_4g_m7_avx512_ssediv_zmem" 31 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssediv") ++ (and (eq_attr "mode" "V16SF,V8DF") ++ (eq_attr "memory" "load")))) ++ "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu3*7") ++ ++;; SSECMP ++(define_insn_reservation "c86_4g_m7_avx512_ssecmp" 5 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssecmp") ++ (and (eq_attr "prefix" "evex") ++ (and (eq_attr "mode" "V2DF,V4DF,V8SF,V4SF,SF,DF") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-double,c86-4g-m7-fpu_0_2,c86-4g-m7-fpu_1_3") ++ ++(define_insn_reservation "c86_4g_m7_avx512_ssecmp_load" 12 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssecmp") ++ (and (eq_attr "prefix" "evex") ++ (and (eq_attr "mode" "V2DF,V4DF,V8SF,V4SF,SF,DF") ++ (eq_attr "memory" "load"))))) ++ "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu_0_2,c86-4g-m7-fpu_1_3") ++ ++(define_insn_reservation "c86_4g_m7_avx512_ssecmp_z" 5 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssecmp") ++ (and (eq_attr "mode" "V16SF,V8DF,XI") ++ (and (eq_attr "c86_attr" "other") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-vector") ++ ++(define_insn_reservation "c86_4g_m7_avx512_ssecmp_z_load" 12 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssecmp") ++ (and (eq_attr "mode" "V16SF,V8DF,XI") ++ (and (eq_attr "c86_attr" "other") ++ (eq_attr "memory" "load"))))) ++ "c86-4g-m7-vector,c86-4g-m7-load") ++ ++(define_insn_reservation "c86_4g_m7_avx512_ssecmp_vp" 5 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssecmp") ++ (and (eq_attr "prefix" "evex") ++ (and (eq_attr "mode" "TI,OI") ++ (and (eq_attr "c86_attr" "other") ++ (eq_attr "memory" "none")))))) ++ "c86-4g-m7-double,c86-4g-m7-fpu,c86-4g-m7-fpu_1_3") ++ ++(define_insn_reservation "c86_4g_m7_avx512_ssecmp_vp_load" 12 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssecmp") ++ (and (eq_attr "prefix" "evex") ++ (and (eq_attr "mode" "TI,OI") ++ (and (eq_attr "c86_attr" "other") ++ (eq_attr "memory" "load")))))) ++ "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu,c86-4g-m7-fpu_1_3") ++ ++(define_insn_reservation "c86_4g_m7_avx_ssecmp_vp" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssecmp") ++ (and (eq_attr "prefix" "!evex") ++ (eq_attr "memory" "none")))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu") ++ ++(define_insn_reservation "c86_4g_m7_avx_ssecmp_vp_load" 8 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssecmp") ++ (and (eq_attr "prefix" "!evex") ++ (eq_attr "memory" "load")))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu") ++ ++;; VPTEST ++(define_insn_reservation "c86_4g_m7_avx512_ssecmp_test" 6 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssecmp") ++ (and (eq_attr "mode" "TI,OI") ++ (and (eq_attr "c86_attr" "ptest") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-double,c86-4g-m7-fpu,c86-4g-m7-fpu_1_3") ++ ++(define_insn_reservation "c86_4g_m7_avx512_ssecmp_test_load" 13 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssecmp") ++ (and (eq_attr "mode" "TI,OI") ++ (and (eq_attr "c86_attr" "ptest") ++ (eq_attr "memory" "load"))))) ++ "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu1,c86-4g-m7-fpu_1_3") ++ ++(define_insn_reservation "c86_4g_m7_avx512_ssecmp_test_z" 4 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssecmp") ++ (and (eq_attr "mode" "XI") ++ (and (eq_attr "c86_attr" "ptest") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-vector") ++ ++(define_insn_reservation "c86_4g_m7_avx512_ssecmp_test_z_load" 11 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssecmp") ++ (and (eq_attr "mode" "XI") ++ (and (eq_attr "c86_attr" "ptest") ++ (eq_attr "memory" "load"))))) ++ "c86-4g-m7-vector,c86-4g-m7-load") ++ ++;; SSECVT ++(define_insn_reservation "c86_4g_m7_avx512_ssecvt_xy" 4 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssecvt") ++ (and (eq_attr "c86_attr" "other") ++ (and (eq_attr "prefix" "evex") ++ (and (eq_attr "mode" "TI,V4SF,V2DF,OI,V8SF,V4DF") ++ (and (not (ior (match_operand:V8DI 1 "register_operand") ++ (match_operand:V8DF 1 "register_operand"))) ++ (eq_attr "memory" "none"))))))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu_1_3") ++ ++(define_insn_reservation "c86_4g_m7_avx512_ssecvt_xy_load" 11 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssecvt") ++ (and (eq_attr "prefix" "evex") ++ (and (eq_attr "c86_attr" "other") ++ (and (eq_attr "mode" "TI,V4SF,V2DF,OI,V8SF,V4DF") ++ (and (not (ior (match_operand:V8DI 1 "register_operand") ++ (match_operand:V8DF 1 "register_operand"))) ++ (eq_attr "memory" "!none"))))))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_1_3") ++ ++(define_insn_reservation "c86_4g_m7_avx512_ssecvt_y_z" 8 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssecvt") ++ (and (eq_attr "mode" "OI,V8SF,V4DF") ++ (and (eq_attr "c86_attr" "other") ++ (and (ior (match_operand:V8DI 1 "register_operand") ++ (match_operand:V8DF 1 "register_operand")) ++ (eq_attr "memory" "none")))))) ++ "c86-4g-m7-double,c86-4g-m7-fpu_1_3") ++ ++(define_insn_reservation "c86_4g_m7_avx512_ssecvt_y_z_load" 15 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssecvt") ++ (and (eq_attr "mode" "OI,V8SF,V4DF") ++ (and (eq_attr "c86_attr" "other") ++ (and (ior (match_operand:V8DI 1 "memory_operand") ++ (match_operand:V8DF 1 "memory_operand")) ++ (eq_attr "memory" "!none")))))) ++ "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu_1_3") ++ ++(define_insn_reservation "c86_4g_m7_avx512_ssecvt_z" 4 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssecvt") ++ (and (eq_attr "c86_attr" "other") ++ (and (eq_attr "mode" "XI,V16SF,V8DF") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-double,c86-4g-m7-fpu_1_3") ++ ++(define_insn_reservation "c86_4g_m7_avx512_ssecvt_z_load" 11 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssecvt") ++ (and (eq_attr "c86_attr" "other") ++ (and (eq_attr "mode" "XI,V16SF,V8DF") ++ (eq_attr "memory" "!none"))))) ++ "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu_1_3") ++ ++(define_insn_reservation "c86_4g_m7_avx_ssecvt" 4 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssecvt") ++ (and (eq_attr "prefix" "!evex") ++ (and (eq_attr "mmx_isa" "base") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu2|c86-4g-m7-fpu3") ++ ++(define_insn_reservation "c86_4g_m7_avx_ssecvt_load" 11 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssecvt") ++ (and (eq_attr "prefix" "!evex") ++ (and (eq_attr "mmx_isa" "base") ++ (eq_attr "memory" "!none"))))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu2|c86-4g-m7-fpu3") ++ ++;; CVTPI ++(define_insn_reservation "c86_4g_m7_sse_ssecvt_pspi" 4 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssecvt") ++ (and (eq_attr "mode" "SF,DI") ++ (eq_attr "memory" "none")))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu1") ++ ++(define_insn_reservation "c86_4g_m7_sse_ssecvt_pspi_load" 11 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssecvt") ++ (and (eq_attr "mode" "SF,DI") ++ (eq_attr "memory" "load")))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu1") ++ ++(define_insn_reservation "c86_4g_m7_sse_ssecvt_pi" 5 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssecvt") ++ (and (not (eq_attr "mode" "SF,DI")) ++ (and (eq_attr "mmx_isa" "native") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-double,c86-4g-m7-fpu1,c86-4g-m7-fpu_0_1") ++ ++(define_insn_reservation "c86_4g_m7_sse_ssecvt_pi_load" 12 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssecvt") ++ (and (not (eq_attr "mode" "SF,DI")) ++ (and (eq_attr "mmx_isa" "native") ++ (eq_attr "memory" "load"))))) ++ "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu1,c86-4g-m7-fpu_0_1") ++ ++;; SSEMULADD ++(define_insn_reservation "c86_4g_m7_avx512_muladd" 4 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssemuladd") ++ (and (eq_attr "c86_attr" "other") ++ (and (not (eq_attr "isa" "fma,fma4")) ++ (eq_attr "mode" "V32HF,V16SF,V8DF,XI") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu_0_2") ++ ++(define_insn_reservation "c86_4g_m7_avx512_muladd_load" 11 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssemuladd") ++ (and (eq_attr "c86_attr" "other") ++ (and (not (eq_attr "isa" "fma,fma4")) ++ (eq_attr "memory" "load"))))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_2") ++ ++(define_insn_reservation "c86_4g_m7_avx512_muladd_madd" 4 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssemuladd,sse") ++ (and (eq_attr "c86_attr" "madd,rcp") ++ (and (eq_attr "prefix" "evex") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu_0_2") ++ ++(define_insn_reservation "c86_4g_m7_avx512_muladd_madd_load" 11 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssemuladd,sse") ++ (and (eq_attr "c86_attr" "madd,rcp") ++ (and (eq_attr "prefix" "evex") ++ (eq_attr "memory" "load"))))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_2") ++ ++(define_insn_reservation "c86_4g_m7_fma_muladd" 4 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssemuladd") ++ (and (eq_attr "isa" "fma,fma4") ++ (eq_attr "memory" "none")))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu_0_1") ++ ++(define_insn_reservation "c86_4g_m7_fma_muladd_load" 11 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssemuladd") ++ (and (eq_attr "isa" "fma,fma4") ++ (eq_attr "memory" "load")))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_1") ++ ++;; SSE ++(define_insn_reservation "c86_4g_m7_avx512_sse_range" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sse") ++ (and (eq_attr "c86_attr" "other") ++ (and (eq_attr "length_immediate" "!1") ++ (and (eq_attr "prefix" "evex") ++ (and (eq_attr "c86_decode" "direct") ++ (eq_attr "memory" "none"))))))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu_0_2") ++ ++(define_insn_reservation "c86_4g_m7_avx512_sse_range_load" 8 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sse") ++ (and (eq_attr "c86_attr" "other") ++ (and (eq_attr "length_immediate" "!1") ++ (and (eq_attr "c86_decode" "direct") ++ (and (eq_attr "prefix" "evex") ++ (eq_attr "memory" "load"))))))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_2") ++ ++(define_insn_reservation "c86_4g_m7_avx512_sse_conflict_x" 2 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sse") ++ (and (eq_attr "c86_decode" "vector") ++ (and (eq_attr "mode" "TI") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-vector") ++ ++(define_insn_reservation "c86_4g_m7_avx512_sse_conflict_x_load" 9 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sse") ++ (and (eq_attr "c86_decode" "vector") ++ (and (eq_attr "mode" "TI") ++ (eq_attr "memory" "load"))))) ++ "c86-4g-m7-vector,c86-4g-m7-load") ++ ++(define_insn_reservation "c86_4g_m7_avx512_sse_conflict_y" 5 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sse") ++ (and (eq_attr "c86_decode" "vector") ++ (and (eq_attr "mode" "OI") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-vector") ++ ++(define_insn_reservation "c86_4g_m7_avx512_sse_conflict_y_load" 12 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sse") ++ (and (eq_attr "c86_decode" "vector") ++ (and (eq_attr "mode" "OI") ++ (eq_attr "memory" "load"))))) ++ "c86-4g-m7-vector,c86-4g-m7-load") ++ ++(define_insn_reservation "c86_4g_m7_avx512_sse_conflict_z" 8 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sse") ++ (and (eq_attr "c86_decode" "vector") ++ (and (eq_attr "mode" "XI") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-vector") ++ ++(define_insn_reservation "c86_4g_m7_avx512_sse_conflict_z_load" 15 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sse") ++ (and (eq_attr "c86_decode" "vector") ++ (and (eq_attr "mode" "XI") ++ (eq_attr "memory" "load"))))) ++ "c86-4g-m7-vector,c86-4g-m7-load") ++ ++(define_insn_reservation "c86_4g_m7_avx512_sse_class" 4 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sse") ++ (and (eq_attr "c86_attr" "other") ++ (and (eq_attr "length_immediate" "1") ++ (and (not (eq_attr "mode" "V32HF,V16SF,V8DF")) ++ (eq_attr "memory" "none")))))) ++ "c86-4g-m7-double,c86-4g-m7-fpu_1_3,c86-4g-m7-fpu_1_3") ++ ++(define_insn_reservation "c86_4g_m7_avx512_sse_class_load" 11 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sse") ++ (and (eq_attr "c86_attr" "other") ++ (and (eq_attr "length_immediate" "1") ++ (and (not (eq_attr "mode" "V32HF,V16SF,V8DF")) ++ (eq_attr "memory" "load")))))) ++ "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu_1_3,c86-4g-m7-fpu_1_3") ++ ++(define_insn_reservation "c86_4g_m7_avx512_sse_class_z" 4 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sse") ++ (and (eq_attr "c86_attr" "other") ++ (and (eq_attr "length_immediate" "1") ++ (and (eq_attr "mode" "V32HF,V16SF,V8DF") ++ (eq_attr "memory" "none")))))) ++ "c86-4g-m7-vector") ++ ++(define_insn_reservation "c86_4g_m7_avx512_sse_class_z_load" 11 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sse") ++ (and (eq_attr "c86_attr" "other") ++ (and (eq_attr "length_immediate" "1") ++ (and (eq_attr "mode" "V32HF,V16SF,V8DF") ++ (eq_attr "memory" "load")))))) ++ "c86-4g-m7-vector,c86-4g-m7-load") ++ ++(define_insn_reservation "c86_4g_m7_avx_sse" 5 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sse") ++ (and (eq_attr "c86_attr" "rcp,other") ++ (and (eq_attr "prefix" "!evex") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu_0_1") ++ ++(define_insn_reservation "c86_4g_m7_avx_sse_load" 12 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sse") ++ (and (eq_attr "c86_attr" "rcp,other") ++ (and (eq_attr "prefix" "!evex") ++ (eq_attr "memory" "load"))))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_1") ++ ++(define_insn_reservation "c86_4g_m7_avx512_sse_sqrt" 16 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sse") ++ (and (eq_attr "c86_attr" "sqrt") ++ (eq_attr "memory" "none")))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu1*7|c86-4g-m7-fpu3*7") ++ ++(define_insn_reservation "c86_4g_m7_avx512_sse_sqrt_load" 23 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sse") ++ (and (eq_attr "c86_attr" "sqrt") ++ (eq_attr "memory" "load")))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu1*7|c86-4g-m7-fpu3*7") ++ ++;; MSKLOG/MSKMOV ++(define_insn_reservation "c86_4g_m7_avx512_msklog" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "msklog") ++ (eq_attr "c86_decode" "direct"))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu_1_3") ++ ++(define_insn_reservation "c86_4g_m7_avx512_msklog_vector" 4 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "msklog") ++ (eq_attr "c86_decode" "vector"))) ++ "c86-4g-m7-vector") ++ ++(define_insn_reservation "c86_4g_m7_avx512_mskmov_reg_k" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "mskmov") ++ (and (match_operand 0 "register_operand" "r") ++ (eq_attr "memory" "none")))) ++ "c86-4g-m7-double,c86-4g-m7-fpu3,c86-4g-m7-fpu_1_3") ++ ++(define_insn_reservation "c86_4g_m7_avx512_mskmov_xy_k" 2 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "mskmov") ++ (ior (match_operand:V2DI 0 "register_operand" "v") ++ (match_operand:V4DI 0 "register_operand" "v")))) ++ "c86-4g-m7-double,c86-4g-m7-fpu3,c86-4g-m7-fpu_1_3") ++ ++(define_insn_reservation "c86_4g_m7_avx512_mskmov_z_k" 3 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "mskmov") ++ (match_operand:V8DI 0 "register_operand" "v"))) ++ "c86-4g-m7-vector,c86-4g-m7-fpu3*2,c86-4g-m7-fpu1*2|c86-4g-m7-fpu3*2") ++ ++(define_insn_reservation "c86_4g_m7_avx512_mskmov_k_k" 1 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "mskmov") ++ (and (match_operand 0 "register_operand" "k") ++ (match_operand 1 "register_operand" "k")))) ++ "c86-4g-m7-direct,c86-4g-m7-fpu_1_3") ++ ++(define_insn_reservation "c86_4g_m7_avx512_mskmov_k_reg" 3 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "mskmov") ++ (and (match_operand 0 "register_operand" "k") ++ (match_operand 1 "register_operand" "r")))) ++ "c86-4g-m7-double,c86-4g-m7-fpu1*2,c86-4g-m7-fpu1*2|c86-4g-m7-fpu3*2") ++ ++(define_insn_reservation "c86_4g_m7_avx512_mskmov_k_m" 8 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "mskmov") ++ (and (match_operand 0 "register_operand" "k") ++ (match_operand 1 "memory_operand")))) ++ "c86-4g-m7-direct,c86-4g-m7-load") +diff --git a/gcc/config/i386/c86-4g.md b/gcc/config/i386/c86-4g.md +new file mode 100644 +index 00000000000..66c4e2cf744 +--- /dev/null ++++ b/gcc/config/i386/c86-4g.md +@@ -0,0 +1,1204 @@ ++;; Copyright (C) 2026 Free Software Foundation, Inc. ++;; ++;; This file is part of GCC. ++;; ++;; GCC is free software; you can redistribute it and/or modify ++;; it under the terms of the GNU General Public License as published by ++;; the Free Software Foundation; either version 3, or (at your option) ++;; any later version. ++;; ++;; GCC is distributed in the hope that it will be useful, ++;; but WITHOUT ANY WARRANTY; without even the implied warranty of ++;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++;; GNU General Public License for more details. ++;; ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; . ++;; ++ ++ ++(define_attr "c86_decode" "direct,vector,double" ++ (const_string "direct")) ++ ++(define_attr "c86_attr" "other,abs,sqrt,maxmin,blend,blendv,rcp,movnt,avg, ++ sign,sadbw,insr,perm2,perm,insertx,shufx,madd, ++ compress,sselogic,cmpestr,extr,vpmovx,expand,aes, ++ hplus,ptest" ++ (const_string "other")) ++ ++;; HYGON Scheduling ++;; Modeling automatons for decoders, integer execution pipes, ++;; AGU pipes and floating point execution units. ++(define_automaton "c86_4g, c86_4g_ieu, c86_4g_fp, c86_4g_agu") ++ ++;; Decoders unit has 4 decoders and all of them can decode fast path ++;; and vector type instructions. ++(define_cpu_unit "c86-4g-decode0" "c86_4g") ++(define_cpu_unit "c86-4g-decode1" "c86_4g") ++(define_cpu_unit "c86-4g-decode2" "c86_4g") ++(define_cpu_unit "c86-4g-decode3" "c86_4g") ++ ++;; Currently blocking all decoders for vector path instructions as ++;; they are dispatched separetely as microcode sequence. ++;; Fix me: Need to revisit this. ++(define_reservation "c86-4g-vector" "c86-4g-decode0+c86-4g-decode1+c86-4g-decode2+c86-4g-decode3") ++ ++;; Direct instructions can be issued to any of the four decoders. ++(define_reservation "c86-4g-direct" "c86-4g-decode0|c86-4g-decode1|c86-4g-decode2|c86-4g-decode3") ++ ++;; Fix me: Need to revisit this later to simulate fast path double behavior. ++(define_reservation "c86-4g-double" "c86-4g-direct") ++ ++ ++;; Integer unit 4 ALU pipes. ++(define_cpu_unit "c86-4g-ieu0" "c86_4g_ieu") ++(define_cpu_unit "c86-4g-ieu1" "c86_4g_ieu") ++(define_cpu_unit "c86-4g-ieu2" "c86_4g_ieu") ++(define_cpu_unit "c86-4g-ieu3" "c86_4g_ieu") ++(define_reservation "c86-4g-ieu" "c86-4g-ieu0|c86-4g-ieu1|c86-4g-ieu2|c86-4g-ieu3") ++ ++;; 2 AGU pipes in c86_4g ++;; According to CPU diagram last AGU unit is used only for stores. ++(define_cpu_unit "c86-4g-agu0" "c86_4g_agu") ++(define_cpu_unit "c86-4g-agu1" "c86_4g_agu") ++(define_reservation "c86-4g-agu-reserve" "c86-4g-agu0|c86-4g-agu1") ++ ++;; Load is 4 cycles. We do not model reservation of load unit. ++;;(define_reservation "c86-4g-load" "c86-4g-agu-reserve, nothing, nothing, nothing") ++(define_reservation "c86-4g-load" "c86-4g-agu-reserve") ++(define_reservation "c86-4g-store" "c86-4g-agu-reserve") ++ ++;; vectorpath (microcoded) instructions are single issue instructions. ++;; So, they occupy all the integer units. ++(define_reservation "c86-4g-ivector" "c86-4g-ieu0+c86-4g-ieu1 ++ +c86-4g-ieu2+c86-4g-ieu3 ++ +c86-4g-agu0+c86-4g-agu1") ++ ++;; Floating point unit 4 FP pipes. ++(define_cpu_unit "c86-4g-fp0" "c86_4g_fp") ++(define_cpu_unit "c86-4g-fp1" "c86_4g_fp") ++(define_cpu_unit "c86-4g-fp2" "c86_4g_fp") ++(define_cpu_unit "c86-4g-fp3" "c86_4g_fp") ++ ++(define_reservation "c86-4g-fpu" "c86-4g-fp0|c86-4g-fp1|c86-4g-fp2|c86-4g-fp3") ++ ++(define_reservation "c86-4g-fvector" "c86-4g-fp0+c86-4g-fp1 ++ +c86-4g-fp2+c86-4g-fp3 ++ +c86-4g-agu0+c86-4g-agu1") ++ ++;; Call instruction ++(define_insn_reservation "c86_4g_call" 1 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (eq_attr "type" "call,callv")) ++ "c86-4g-double,c86-4g-store,c86-4g-ieu0+c86-4g-ieu3") ++ ++;; General instructions ++(define_insn_reservation "c86_4g_push" 1 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "push") ++ (eq_attr "memory" "store"))) ++ "c86-4g-direct,c86-4g-store") ++ ++(define_insn_reservation "c86_4g_push_load" 4 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "push") ++ (eq_attr "memory" "both"))) ++ "c86-4g-direct,c86-4g-load+c86-4g-store") ++ ++(define_insn_reservation "c86_4g_pop" 4 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "pop") ++ (eq_attr "memory" "load"))) ++ "c86-4g-direct,c86-4g-load") ++ ++(define_insn_reservation "c86_4g_pop_mem" 4 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "pop") ++ (eq_attr "memory" "both"))) ++ "c86-4g-direct,c86-4g-load,c86-4g-store") ++ ++;; Leave ++(define_insn_reservation "c86_4g_leave" 1 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (eq_attr "type" "leave")) ++ "c86-4g-double,c86-4g-ieu+c86-4g-store") ++ ++;; Integer Instructions or General instructions ++;; Multiplications ++;; Reg operands ++(define_insn_reservation "c86_4g_imul" 3 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "imul") ++ (eq_attr "memory" "none"))) ++ "c86-4g-direct,c86-4g-ieu1") ++ ++(define_insn_reservation "c86_4g_imul_mem" 7 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "imul") ++ (eq_attr "memory" "!none"))) ++ "c86-4g-direct,c86-4g-load, c86-4g-ieu1") ++ ++;; Divisions ++;; Reg operands ++(define_insn_reservation "c86_4g_idiv_DI" 41 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "idiv") ++ (and (eq_attr "mode" "DI") ++ (eq_attr "memory" "none")))) ++ "c86-4g-double,c86-4g-ieu2*41") ++ ++(define_insn_reservation "c86_4g_idiv_SI" 25 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "idiv") ++ (and (eq_attr "mode" "SI") ++ (eq_attr "memory" "none")))) ++ "c86-4g-double,c86-4g-ieu2*25") ++ ++(define_insn_reservation "c86_4g_idiv_HI" 17 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "idiv") ++ (and (eq_attr "mode" "HI") ++ (eq_attr "memory" "none")))) ++ "c86-4g-double,c86-4g-ieu2*17") ++ ++(define_insn_reservation "c86_4g_idiv_QI" 15 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "idiv") ++ (and (eq_attr "mode" "QI") ++ (eq_attr "memory" "none")))) ++ "c86-4g-direct,c86-4g-ieu2*15") ++ ++;; Mem operands ++(define_insn_reservation "c86_4g_idiv_mem_DI" 45 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "idiv") ++ (and (eq_attr "mode" "DI") ++ (eq_attr "memory" "load")))) ++ "c86-4g-double,c86-4g-load,c86-4g-ieu2*41") ++ ++(define_insn_reservation "c86_4g_idiv_mem_SI" 29 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "idiv") ++ (and (eq_attr "mode" "SI") ++ (eq_attr "memory" "load")))) ++ "c86-4g-double,c86-4g-load,c86-4g-ieu2*25") ++ ++(define_insn_reservation "c86_4g_idiv_mem_HI" 21 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "idiv") ++ (and (eq_attr "mode" "HI") ++ (eq_attr "memory" "load")))) ++ "c86-4g-double,c86-4g-load,c86-4g-ieu2*17") ++ ++(define_insn_reservation "c86_4g_idiv_mem_QI" 19 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "idiv") ++ (and (eq_attr "mode" "QI") ++ (eq_attr "memory" "load")))) ++ "c86-4g-direct,c86-4g-load,c86-4g-ieu2*15") ++ ++;; STR ISHIFT which are micro coded. ++;; Fix me: Latency need to be rechecked. ++(define_insn_reservation "c86_4g_str_ishift" 6 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "str,ishift") ++ (eq_attr "memory" "both,store"))) ++ "c86-4g-vector,c86-4g-ivector") ++ ++;; MOV - integer moves ++(define_insn_reservation "c86_4g_load_imov_double" 2 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "c86_decode" "double") ++ (and (eq_attr "type" "imovx") ++ (eq_attr "memory" "none")))) ++ "c86-4g-double,c86-4g-ieu") ++ ++(define_insn_reservation "c86_4g_load_imov_direct" 1 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "c86_decode" "!double") ++ (and (eq_attr "type" "imov,imovx") ++ (eq_attr "memory" "none")))) ++ "c86-4g-direct,c86-4g-ieu") ++ ++(define_insn_reservation "c86_4g_load_imov_double_store" 2 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "c86_decode" "double") ++ (and (eq_attr "type" "imovx") ++ (eq_attr "memory" "store")))) ++ "c86-4g-double,c86-4g-ieu,c86-4g-store") ++ ++(define_insn_reservation "c86_4g_load_imov_direct_store" 1 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "c86_decode" "!double") ++ (and (eq_attr "type" "imov,imovx") ++ (eq_attr "memory" "store")))) ++ "c86-4g-direct,c86-4g-ieu,c86-4g-store") ++ ++(define_insn_reservation "c86_4g_load_imov_double_load" 5 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "c86_decode" "double") ++ (and (eq_attr "type" "imovx") ++ (eq_attr "memory" "load")))) ++ "c86-4g-double,c86-4g-load,c86-4g-ieu") ++ ++(define_insn_reservation "c86_4g_load_imov_direct_load" 4 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "c86_decode" "!double") ++ (and (eq_attr "type" "imov,imovx") ++ (eq_attr "memory" "load")))) ++ "c86-4g-direct,c86-4g-load") ++ ++;; INTEGER/GENERAL instructions ++;; register/imm operands only: ALU, ICMP, NEG, NOT, ROTATE, ISHIFT, TEST ++(define_insn_reservation "c86_4g_insn" 1 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "alu,icmp,negnot,rotate,rotate1,ishift,ishift1,test,setcc,incdec,icmov") ++ (eq_attr "memory" "none,unknown"))) ++ "c86-4g-direct,c86-4g-ieu") ++ ++(define_insn_reservation "c86_4g_insn_load" 5 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "alu,icmp,negnot,rotate,rotate1,ishift,ishift1,test,setcc,incdec,icmov") ++ (eq_attr "memory" "load"))) ++ "c86-4g-direct,c86-4g-load,c86-4g-ieu") ++ ++;; FIXME: The instructions matched here has only two operands, which means memory type can only be none, load or both. ++;; Store memory type handling should never take effect here? ++(define_insn_reservation "c86_4g_insn_store" 1 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "alu,icmp,negnot,rotate,rotate1,ishift1,test,setcc,incdec") ++ (eq_attr "memory" "store"))) ++ "c86-4g-direct,c86-4g-ieu,c86-4g-store") ++ ++(define_insn_reservation "c86_4g_insn_both" 5 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "alu,icmp,negnot,rotate,rotate1,ishift1,test,setcc,incdec") ++ (eq_attr "memory" "both"))) ++ "c86-4g-direct,c86-4g-load,c86-4g-ieu,c86-4g-store") ++ ++;; Special latency for multi type. ++(define_insn_reservation "c86_4g_fp_fcomp" 1 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "memory" "none") ++ (and (eq_attr "unit" "i387") ++ (eq_attr "type" "multi")))) ++ "c86-4g-double,c86-4g-fp0|c86-4g-fp2") ++ ++;; Fix me: Other vector type insns keeping latency 6 as of now. ++(define_insn_reservation "c86_4g_ieu_vector" 6 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "memory" "none") ++ (and (eq_attr "unit" "!i387") ++ (eq_attr "type" "other,str,multi")))) ++ "c86-4g-vector,c86-4g-ivector") ++ ++;; ALU1 register operands. ++(define_insn_reservation "c86_4g_alu1_vector" 3 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "c86_decode" "vector") ++ (and (eq_attr "type" "alu1") ++ (eq_attr "memory" "none,unknown")))) ++ "c86-4g-vector,c86-4g-ivector") ++ ++(define_insn_reservation "c86_4g_alu1_double" 2 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "c86_decode" "double") ++ (and (eq_attr "type" "alu1") ++ (eq_attr "memory" "none,unknown")))) ++ "c86-4g-double,c86-4g-ieu") ++ ++(define_insn_reservation "c86_4g_alu1_direct" 1 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "c86_decode" "direct") ++ (and (eq_attr "type" "alu1") ++ (eq_attr "memory" "none,unknown")))) ++ "c86-4g-direct,c86-4g-ieu") ++ ++;; Branches : Fix me need to model conditional branches. ++(define_insn_reservation "c86_4g_branch" 1 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "ibr") ++ (eq_attr "memory" "none"))) ++ "c86-4g-direct") ++ ++;; Indirect branches check latencies. ++(define_insn_reservation "c86_4g_indirect_branch_mem" 6 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "ibr") ++ (eq_attr "memory" "load"))) ++ "c86-4g-vector,c86-4g-ivector") ++ ++;; LEA executes in ALU units with 1 cycle latency. ++(define_insn_reservation "c86_4g_lea" 1 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (eq_attr "type" "lea")) ++ "c86-4g-direct,c86-4g-ieu") ++ ++;; Floating point ++(define_insn_reservation "c86_4g_fp_cmov" 6 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (eq_attr "type" "fcmov")) ++ "c86-4g-vector,c86-4g-fvector") ++ ++ ++(define_insn_reservation "c86_4g_fp_mov_direct_load" 8 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "c86_decode" "direct") ++ (and (eq_attr "type" "fmov") ++ (eq_attr "memory" "load")))) ++ "c86-4g-direct,c86-4g-load,c86-4g-fp1") ++ ++(define_insn_reservation "c86_4g_fp_mov_direct_store" 5 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "c86_decode" "direct") ++ (and (eq_attr "type" "fmov") ++ (eq_attr "memory" "store")))) ++ "c86-4g-direct,c86-4g-fp2|c86-4g-fp3,c86-4g-store") ++ ++(define_insn_reservation "c86_4g_fp_mov_double" 4 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "c86_decode" "double") ++ (and (eq_attr "type" "fmov") ++ (eq_attr "memory" "none")))) ++ "c86-4g-double,c86-4g-fp1") ++ ++(define_insn_reservation "c86_4g_fp_mov_double_load" 12 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "c86_decode" "double") ++ (and (eq_attr "type" "fmov") ++ (eq_attr "memory" "load")))) ++ "c86-4g-double,c86-4g-load,c86-4g-fp1") ++ ++(define_insn_reservation "c86_4g_fp_mov_direct" 1 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "c86_decode" "direct") ++ (and (eq_attr "type" "fmov") ++ (eq_attr "memory" "none")))) ++ "c86-4g-direct,c86-4g-fp1") ++ ++;; SQRT ++(define_insn_reservation "c86_4g_fp_sqrt" 22 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "fpspc") ++ (eq_attr "c86_attr" "sqrt"))) ++ "c86-4g-direct,c86-4g-fp1*22") ++ ++(define_insn_reservation "c86_4g_sse_sqrt_sf" 14 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "mode" "SF,V4SF,V8SF") ++ (and (eq_attr "memory" "none,unknown") ++ (and (eq_attr "c86_attr" "sqrt") ++ (eq_attr "type" "sse"))))) ++ "c86-4g-direct,c86-4g-fp1*14") ++ ++(define_insn_reservation "c86_4g_sse_sqrt_sf_mem" 21 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "mode" "SF,V4SF,V8SF") ++ (and (eq_attr "memory" "load") ++ (and (eq_attr "c86_attr" "sqrt") ++ (eq_attr "type" "sse"))))) ++ "c86-4g-direct,c86-4g-load,c86-4g-fp1*14") ++ ++(define_insn_reservation "c86_4g_sse_sqrt_df" 20 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "mode" "DF,V2DF,V4DF") ++ (and (eq_attr "memory" "none,unknown") ++ (and (eq_attr "c86_attr" "sqrt") ++ (eq_attr "type" "sse"))))) ++ "c86-4g-direct,c86-4g-fp1*20") ++ ++(define_insn_reservation "c86_4g_sse_sqrt_df_mem" 27 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "mode" "DF,V2DF,V4DF") ++ (and (eq_attr "memory" "load") ++ (and (eq_attr "c86_attr" "sqrt") ++ (eq_attr "type" "sse"))))) ++ "c86-4g-direct,c86-4g-load,c86-4g-fp1*20") ++ ++;; RCP ++(define_insn_reservation "c86_4g_sse_rcp" 5 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "mode" "V4SF,V8SF,SF") ++ (and (eq_attr "memory" "none") ++ (and (eq_attr "c86_attr" "rcp") ++ (eq_attr "type" "sse"))))) ++ "c86-4g-direct,c86-4g-fp0|c86-4g-fp2") ++ ++(define_insn_reservation "c86_4g_sse_rcp_mem" 12 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "mode" "V4SF,V8SF,SF") ++ (and (eq_attr "memory" "load") ++ (and (eq_attr "c86_attr" "rcp") ++ (eq_attr "type" "sse"))))) ++ "c86-4g-direct,c86-4g-load,c86-4g-fp0|c86-4g-fp2") ++ ++;; TODO: AGU? ++(define_insn_reservation "c86_4g_fp_spc_direct" 5 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "c86_decode" "direct") ++ (and (eq_attr "type" "fpspc") ++ (eq_attr "memory" "store")))) ++ "c86-4g-direct,c86-4g-fp3") ++ ++;; FABS ++(define_insn_reservation "c86_4g_fp_absneg" 1 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (eq_attr "type" "fsgn")) ++ "c86-4g-direct,c86-4g-fp1|c86-4g-fp3") ++ ++;; FCMP ++(define_insn_reservation "c86_4g_fp_fcmp" 5 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "memory" "none") ++ (and (eq_attr "c86_decode" "double") ++ (eq_attr "type" "fcmp")))) ++ "c86-4g-double,c86-4g-fp0,c86-4g-fp1") ++ ++(define_insn_reservation "c86_4g_fp_fcmp_load" 12 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "memory" "load") ++ (and (eq_attr "c86_decode" "double") ++ (eq_attr "type" "fcmp")))) ++ "c86-4g-double,c86-4g-load, c86-4g-fp0,c86-4g-fp1") ++ ++;;FADD FSUB FMUL ++(define_insn_reservation "c86_4g_fp_op_mul" 5 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "fop,fmul") ++ (eq_attr "memory" "none"))) ++ "c86-4g-direct,c86-4g-fp0|c86-4g-fp2") ++ ++(define_insn_reservation "c86_4g_fp_op_mul_load" 12 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "fop,fmul") ++ (and (eq_attr "fp_int_src" "false") ++ (eq_attr "memory" "load")))) ++ "c86-4g-direct,c86-4g-load,c86-4g-fp0|c86-4g-fp2") ++ ++(define_insn_reservation "c86_4g_fp_op_imul_load" 16 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "fop,fmul") ++ (and (eq_attr "fp_int_src" "true") ++ (eq_attr "memory" "load")))) ++ "c86-4g-double,c86-4g-load,c86-4g-fp0,c86-4g-fp0|c86-4g-fp2") ++ ++(define_insn_reservation "c86_4g_fp_op_div" 15 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "fdiv") ++ (eq_attr "memory" "none"))) ++ "c86-4g-direct,c86-4g-fp1*15") ++ ++(define_insn_reservation "c86_4g_fp_op_div_load" 22 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "fdiv") ++ (eq_attr "memory" "load"))) ++ "c86-4g-direct,c86-4g-load,c86-4g-fp1*15") ++ ++(define_insn_reservation "c86_4g_fp_op_idiv_load" 27 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "fdiv") ++ (and (eq_attr "fp_int_src" "true") ++ (eq_attr "memory" "load")))) ++ "c86-4g-double,c86-4g-load,c86-4g-fp1*19") ++ ++;; MMX, SSE, SSEn.n, AVX, AVX2 instructions ++(define_insn_reservation "c86_4g_fp_insn" 1 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (eq_attr "type" "mmx")) ++ "c86-4g-direct,c86-4g-fpu") ++ ++(define_insn_reservation "c86_4g_mmx_add" 1 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "mmxadd") ++ (eq_attr "memory" "none"))) ++ "c86-4g-direct,c86-4g-fp0|c86-4g-fp1|c86-4g-fp3") ++ ++(define_insn_reservation "c86_4g_mmx_add_load" 8 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "mmxadd") ++ (eq_attr "memory" "load"))) ++ "c86-4g-direct,c86-4g-load,c86-4g-fp0|c86-4g-fp1|c86-4g-fp3") ++ ++(define_insn_reservation "c86_4g_mmx_hadd" 3 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "sseadd1") ++ (eq_attr "memory" "none"))) ++ "c86-4g-direct,c86-4g-fp0") ++ ++(define_insn_reservation "c86_4g_mmx_hadd_load" 10 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "sseadd1") ++ (eq_attr "memory" "load"))) ++ "c86-4g-direct,c86-4g-load,c86-4g-fp0") ++ ++(define_insn_reservation "c86_4g_mmx_cmp" 1 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "mmxcmp") ++ (eq_attr "memory" "none"))) ++ "c86-4g-direct,c86-4g-fp0|c86-4g-fp3") ++ ++(define_insn_reservation "c86_4g_mmx_cmp_load" 8 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "mmxcmp") ++ (eq_attr "memory" "load"))) ++ "c86-4g-direct,c86-4g-load,c86-4g-fp0|c86-4g-fp3") ++ ++(define_insn_reservation "c86_4g_mmx_cvt_pck_shuf" 1 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "mmxcvt,sseshuf,sseshuf1") ++ (eq_attr "memory" "none"))) ++ "c86-4g-direct,c86-4g-fp0|c86-4g-fp1") ++ ++(define_insn_reservation "c86_4g_mmx_cvt_pck_shuf_load" 8 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "mmxcvt,sseshuf,sseshuf1") ++ (eq_attr "memory" "load"))) ++ "c86-4g-direct,c86-4g-load,c86-4g-fp0|c86-4g-fp1") ++ ++(define_insn_reservation "c86_4g_mmx_shift" 1 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "mmxshft") ++ (eq_attr "memory" "none"))) ++ "c86-4g-direct,c86-4g-fp1") ++ ++(define_insn_reservation "c86_4g_mmx_move" 4 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "mmxmov") ++ (eq_attr "memory" "none"))) ++ "c86-4g-direct,c86-4g-fp1") ++ ++(define_insn_reservation "c86_4g_mmx_shift_load" 8 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "mmxshft") ++ (eq_attr "memory" "load"))) ++ "c86-4g-direct,c86-4g-load,c86-4g-fp1") ++ ++(define_insn_reservation "c86_4g_mmx_move_load" 11 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "mmxshft") ++ (eq_attr "memory" "load"))) ++ "c86-4g-direct,c86-4g-load,c86-4g-fp1") ++ ++(define_insn_reservation "c86_4g_mmx_move_store" 1 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "mmxshft,mmxmov") ++ (eq_attr "memory" "store,both"))) ++ "c86-4g-direct,c86-4g-fp2,c86-4g-store") ++ ++(define_insn_reservation "c86_4g_mmx_mul" 3 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "mmxmul") ++ (eq_attr "memory" "none"))) ++ "c86-4g-direct,c86-4g-fp0") ++ ++(define_insn_reservation "c86_4g_mmx_mul_load" 10 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "mmxmul") ++ (eq_attr "memory" "load"))) ++ "c86-4g-direct,c86-4g-load,c86-4g-fp0") ++ ++;; sseabs ++(define_insn_reservation "c86_4g_sse_abs" 1 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "c86_attr" "abs") ++ (and (eq_attr "type" "sselog1") ++ (eq_attr "memory" "none")))) ++ "c86-4g-double,c86-4g-fpu") ++ ++(define_insn_reservation "c86_4g_sse_pinsr_reg" 3 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "sselog") ++ (and (eq_attr "c86_attr" "insr") ++ (and (match_operand 2 "register_operand") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-direct,c86-4g-ieu2,c86-4g-fp0|c86-4g-fp1") ++ ++(define_insn_reservation "c86_4g_sse_pinsr" 1 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "sselog") ++ (and (eq_attr "c86_attr" "insr") ++ (and (not (match_operand 2 "register_operand")) ++ (eq_attr "memory" "none"))))) ++ "c86-4g-direct,c86-4g-fp0|c86-4g-fp1") ++ ++(define_insn_reservation "c86_4g_sse_log" 1 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "sselog") ++ (and (eq_attr "c86_attr" "other") ++ (eq_attr "memory" "none")))) ++ "c86-4g-direct,c86-4g-fp0|c86-4g-fp1") ++ ++(define_insn_reservation "c86_4g_sse_log_load" 8 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "sselog") ++ (and (eq_attr "c86_attr" "other") ++ (eq_attr "memory" "load")))) ++ "c86-4g-direct,c86-4g-load,c86-4g-fp0|c86-4g-fp1") ++ ++(define_insn_reservation "c86_4g_sse_sign" 1 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "sselog1") ++ (and (eq_attr "c86_attr" "sign") ++ (eq_attr "memory" "none")))) ++ "c86-4g-direct,c86-4g-fpu") ++ ++(define_insn_reservation "c86_4g_sse_sign_load" 8 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "sselog1") ++ (and (eq_attr "c86_attr" "sign") ++ (eq_attr "memory" "!none")))) ++ "c86-4g-direct,c86-4g-load,c86-4g-fpu") ++ ++ ++(define_insn_reservation "c86_4g_sse_log1" 1 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "sselog1") ++ (and (eq_attr "c86_attr" "other") ++ (eq_attr "memory" "none")))) ++ "c86-4g-direct,c86-4g-fp0|c86-4g-fp1") ++ ++(define_insn_reservation "c86_4g_sse_log1_load" 8 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "sselog1") ++ (and (eq_attr "c86_attr" "other") ++ (eq_attr "memory" "!none")))) ++ "c86-4g-direct,c86-4g-load,c86-4g-fp0|c86-4g-fp1") ++ ++(define_insn_reservation "c86_4g_sse_extrq" 5 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "sse") ++ (and (eq_attr "memory" "none") ++ (eq_attr "prefix_data16" "1")))) ++ "c86-4g-direct,c86-4g-fp0|c86-4g-fp1,c86-4g-fp1") ++ ++(define_insn_reservation "c86_4g_sse_movsdup" 1 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "sse") ++ (and (eq_attr "memory" "none") ++ (eq_attr "prefix" "vex")))) ++ "c86-4g-direct,c86-4g-fp0|c86-4g-fp1") ++ ++(define_insn_reservation "c86_4g_sse_alignr" 1 ++ (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (eq_attr "prefix_extra" "1")) ++ (and (eq_attr "type" "sseishft") ++ (eq_attr "memory" "none"))) ++ "c86-4g-direct,c86-4g-fp0|c86-4g-fp1") ++ ++(define_insn_reservation "c86_4g_sse_ishift" 1 ++ (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (eq_attr "prefix_extra" "!1")) ++ (and (eq_attr "type" "sseishft") ++ (eq_attr "memory" "none"))) ++ "c86-4g-direct,c86-4g-fp1") ++ ++(define_insn_reservation "c86_4g_sse_ishift_load" 8 ++ (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (eq_attr "prefix_extra" "!1")) ++ (and (eq_attr "type" "sseishft") ++ (eq_attr "memory" "!none"))) ++ "c86-4g-direct,c86-4g-load,c86-4g-fp1") ++ ++(define_insn_reservation "c86_4g_sse_insertimm" 3 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "sseins") ++ (and (eq_attr "memory" "none") ++ (eq_attr "length_immediate" "2")))) ++ "c86-4g-direct,c86-4g-fp0") ++ ++(define_insn_reservation "c86_4g_sse_insert" 4 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "sseins") ++ (and (eq_attr "memory" "none") ++ (eq_attr "length_immediate" "!2")))) ++ "c86-4g-direct,c86-4g-fpu,c86-4g-fp0") ++ ++(define_insn_reservation "c86_4g_sse_comi" 5 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "mode" "SF,DF,V4SF,V2DF") ++ (and (eq_attr "prefix" "!vex") ++ (and (eq_attr "prefix_extra" "0") ++ (and (eq_attr "type" "ssecomi") ++ (eq_attr "memory" "none")))))) ++ "c86-4g-direct,c86-4g-fp0|c86-4g-fp2,c86-4g-fp1") ++ ++(define_insn_reservation "c86_4g_sse_comi_load" 12 ++ (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (eq_attr "mode" "SF,DF,V4SF,V2DF")) ++ (and (eq_attr "prefix_extra" "0") ++ (and (eq_attr "type" "ssecomi") ++ (eq_attr "memory" "load")))) ++ "c86-4g-direct,c86-4g-load,c86-4g-fp0|c86-4g-fp2,c86-4g-fp1") ++ ++(define_insn_reservation "c86_4g_sse_comi_double" 2 ++ (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (eq_attr "mode" "V4SF,V2DF,TI")) ++ (and (eq_attr "prefix" "vex") ++ (and (eq_attr "prefix_extra" "0") ++ (and (eq_attr "type" "ssecomi") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-double,c86-4g-fp0|c86-4g-fp1") ++ ++(define_insn_reservation "c86_4g_sse_comi_double_load" 10 ++ (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (eq_attr "mode" "V4SF,V2DF,TI")) ++ (and (eq_attr "prefix" "vex") ++ (and (eq_attr "prefix_extra" "0") ++ (and (eq_attr "type" "ssecomi") ++ (eq_attr "memory" "load"))))) ++ "c86-4g-double,c86-4g-load,c86-4g-fp0|c86-4g-fp1") ++ ++(define_insn_reservation "c86_4g_sse_test" 4 ++ (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")) ++ (and (eq_attr "prefix_extra" "1") ++ (and (eq_attr "type" "ssecomi") ++ (eq_attr "memory" "none")))) ++ "c86-4g-direct,c86-4g-fp0|c86-4g-fp1,c86-4g-fp1") ++ ++(define_insn_reservation "c86_4g_sse_test_load" 11 ++ (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")) ++ (and (eq_attr "prefix_extra" "1") ++ (and (eq_attr "type" "ssecomi") ++ (eq_attr "memory" "load")))) ++ "c86-4g-direct,c86-4g-load,c86-4g-fp0|c86-4g-fp1,c86-4g-fp1") ++ ++(define_insn_reservation "c86_4g_avx256_test" 8 ++ (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (eq_attr "mode" "V8SF,V4DF,OI")) ++ (and (eq_attr "prefix_extra" "1") ++ (and (eq_attr "type" "ssecomi") ++ (eq_attr "memory" "none")))) ++ "c86-4g-direct,c86-4g-fp0|c86-4g-fp1,c86-4g-fp1") ++ ++(define_insn_reservation "c86_4g_avx256_test_load" 15 ++ (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (eq_attr "mode" "V8SF,V4DF,OI")) ++ (and (eq_attr "prefix_extra" "1") ++ (and (eq_attr "type" "ssecomi") ++ (eq_attr "memory" "load")))) ++ "c86-4g-direct,c86-4g-load,c86-4g-fp0|c86-4g-fp1,c86-4g-fp1") ++ ++;; SSE moves ++;; Fix me: Need to revist this again some of the moves may be restricted ++;; to some fpu pipes. ++ ++;; movnt doesn't touch cache, so latency modeling has little impact. ++(define_insn_reservation "c86_4g_sse_movnt_load" 8 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "c86_attr" "movnt") ++ (and (eq_attr "type" "ssemov,mmxmov,ssecvt") ++ (eq_attr "memory" "load")))) ++ "c86-4g-direct,c86-4g-load") ++ ++(define_insn_reservation "c86_4g_sse_movnt_store" 8 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "c86_attr" "movnt") ++ (and (eq_attr "type" "ssemov,mmxmov,ssecvt") ++ (eq_attr "memory" "store")))) ++ "c86-4g-direct,c86-4g-fp1") ++ ++(define_insn_reservation "c86_4g_sse_mov" 2 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "mode" "SI") ++ (and (eq_attr "isa" "avx") ++ (and (eq_attr "type" "ssemov") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-direct,c86-4g-ieu0") ++ ++(define_insn_reservation "c86_4g_avx_mov" 2 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "mode" "TI") ++ (and (eq_attr "isa" "avx") ++ (and (eq_attr "type" "ssemov") ++ (and (match_operand:SI 1 "register_operand") ++ (eq_attr "memory" "none")))))) ++ "c86-4g-direct,c86-4g-ieu2") ++ ++(define_insn_reservation "c86_4g_sseavx_mov" 1 ++ (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")) ++ (and (eq_attr "prefix_extra" "0") ++ (and (eq_attr "type" "ssemov") ++ (eq_attr "memory" "none")))) ++ "c86-4g-direct,c86-4g-fpu") ++ ++(define_insn_reservation "c86_4g_sseavx_blend" 1 ++ (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (eq_attr "mode" "SF,DF,V4SF,V2DF")) ++ (and (eq_attr "type" "ssemov,sselog1") ++ (and (eq_attr "c86_attr" "blend,blendv") ++ (eq_attr "memory" "none")))) ++ "c86-4g-direct,c86-4g-fp0|c86-4g-fp2") ++ ++(define_insn_reservation "c86_4g_sseavx_mov_store" 1 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "mode" "SF,DF,V4SF,V2DF,TI") ++ (and (eq_attr "type" "ssemov") ++ (eq_attr "memory" "store")))) ++ "c86-4g-direct,c86-4g-fpu,c86-4g-store") ++ ++(define_insn_reservation "c86_4g_sseavx_mov_load" 8 ++ (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")) ++ (and (eq_attr "type" "ssemov") ++ (eq_attr "memory" "load"))) ++ "c86-4g-direct,c86-4g-load,c86-4g-fpu") ++ ++(define_insn_reservation "c86_4g_avx256_mov" 1 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "mode" "V8SF,V4DF,OI") ++ (and (eq_attr "type" "ssemov") ++ (eq_attr "memory" "none")))) ++ "c86-4g-double,c86-4g-fpu") ++ ++(define_insn_reservation "c86_4g_avx256_mov_store" 1 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "mode" "V8SF,V4DF,OI") ++ (and (eq_attr "type" "ssemov") ++ (eq_attr "memory" "store")))) ++ "c86-4g-double,c86-4g-fpu,c86-4g-store") ++ ++(define_insn_reservation "c86_4g_avx256_mov_load" 8 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "mode" "V8SF,V4DF,OI") ++ (and (eq_attr "type" "ssemov") ++ (eq_attr "memory" "load")))) ++ "c86-4g-double,c86-4g-load,c86-4g-fpu") ++ ++;; SSE max & min ++(define_insn_reservation "c86_4g_sse_maxmin" 1 ++ (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (eq_attr "mode" "SF,DF,V4SF,V8SF,V2DF,V4DF,TI")) ++ (and (eq_attr "type" "sseadd") ++ (and (eq_attr "memory" "none") ++ (eq_attr "c86_attr" "maxmin")))) ++ "c86-4g-direct,c86-4g-fp0|c86-4g-fp2") ++ ++(define_insn_reservation "c86_4g_sse_maxmin_load" 8 ++ (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (eq_attr "mode" "SF,DF,V4SF,V8SF,V2DF,V4DF,TI")) ++ (and (eq_attr "type" "sseadd") ++ (and (eq_attr "memory" "load") ++ (eq_attr "c86_attr" "maxmin")))) ++ "c86-4g-direct,c86-4g-load,c86-4g-fp0|c86-4g-fp2") ++ ++(define_insn_reservation "c86_4g_sse_pmaxmin" 1 ++ (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (eq_attr "mode" "TI,OI")) ++ (and (eq_attr "type" "mmxadd,sseiadd") ++ (and (eq_attr "memory" "none") ++ (eq_attr "c86_attr" "maxmin")))) ++ "c86-4g-direct,c86-4g-fpu") ++ ++(define_insn_reservation "c86_4g_sse_pmaxmin_load" 8 ++ (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (eq_attr "mode" "TI,OI")) ++ (and (eq_attr "type" "mmxadd,sseiadd") ++ (and (eq_attr "memory" "load") ++ (eq_attr "c86_attr" "maxmin")))) ++ "c86-4g-direct,c86-4g-load,c86-4g-fpu") ++ ++;; SSE avg ++(define_insn_reservation "c86_4g_sse_avg" 1 ++ (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (eq_attr "c86_attr" "avg")) ++ (and (eq_attr "type" "sseiadd,mmxshft") ++ (eq_attr "memory" "none"))) ++ "c86-4g-direct,c86-4g-fpu") ++ ++(define_insn_reservation "c86_4g_sse_avg_load" 8 ++ (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (eq_attr "c86_attr" "avg")) ++ (and (eq_attr "type" "sseiadd,mmxshft") ++ (eq_attr "memory" "load"))) ++ "c86-4g-direct,c86-4g-load,c86-4g-fp0|c86-4g-fp3") ++ ++;;MMX sadbw ++(define_insn_reservation "c86_4g_sse_sadbw" 3 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "sseiadd,mmxshft") ++ (and (eq_attr "c86_attr" "sadbw") ++ (eq_attr "memory" "none")))) ++ "c86-4g-direct,c86-4g-fp0") ++ ++(define_insn_reservation "c86_4g_sse_sadbw_load" 10 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "sseiadd,mmxshft") ++ (and (eq_attr "c86_attr" "sadbw") ++ (eq_attr "memory" "load")))) ++ "c86-4g-direct,c86-4g-load,c86-4g-fp0") ++ ++;; SSE add ++(define_insn_reservation "c86_4g_sse_add" 3 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "sseadd") ++ (and (eq_attr "memory" "none") ++ (eq_attr "c86_attr" "other")))) ++ "c86-4g-direct,c86-4g-fp1|c86-4g-fp3") ++ ++(define_insn_reservation "c86_4g_sse_add_load" 10 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "sseadd") ++ (and (eq_attr "memory" "load") ++ (eq_attr "c86_attr" "!maxmin")))) ++ "c86-4g-direct,c86-4g-load,c86-4g-fp1|c86-4g-fp3") ++ ++(define_insn_reservation "c86_4g_sse_fma" 5 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "ssemuladd") ++ (eq_attr "memory" "none"))) ++ "c86-4g-direct,c86-4g-fp0|c86-4g-fp2") ++ ++(define_insn_reservation "c86_4g_sse_fma_load" 12 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "ssemuladd") ++ (eq_attr "memory" "load"))) ++ "c86-4g-direct,c86-4g-load,c86-4g-fp0|c86-4g-fp2") ++ ++(define_insn_reservation "c86_4g_sse_iadd" 1 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "sseiadd") ++ (and (eq_attr "c86_attr" "other") ++ (eq_attr "memory" "none")))) ++ "c86-4g-direct,c86-4g-fpu") ++ ++(define_insn_reservation "c86_4g_sse_iadd_load" 8 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "sseiadd") ++ (eq_attr "memory" "load"))) ++ "c86-4g-direct,c86-4g-load,c86-4g-fpu") ++ ++;; SSE conversions. ++(define_insn_reservation "c86_4g_ssecvtsf_si_load" 12 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "mode" "SI") ++ (and (eq_attr "type" "sseicvt") ++ (and (match_operand:SF 1 "memory_operand") ++ (eq_attr "memory" "load"))))) ++ "c86-4g-double,c86-4g-load,c86-4g-fp3,c86-4g-ieu0") ++ ++(define_insn_reservation "c86_4g_ssecvtdf_si" 5 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "mode" "SI") ++ (and (match_operand:DF 1 "register_operand") ++ (and (eq_attr "type" "sseicvt") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-double,c86-4g-fp3,c86-4g-ieu0") ++ ++(define_insn_reservation "c86_4g_ssecvtdf_si_load" 12 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "mode" "SI") ++ (and (eq_attr "type" "sseicvt") ++ (and (match_operand:DF 1 "memory_operand") ++ (eq_attr "memory" "load"))))) ++ "c86-4g-double,c86-4g-load,c86-4g-fp3,c86-4g-ieu0") ++ ++;; All other used ssecvt fp3 pipes ++;; Check: Need to revisit this again. ++;; Some SSE converts may use different pipe combinations. ++(define_insn_reservation "c86_4g_ssecvt" 4 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "ssecvt") ++ (and (eq_attr "c86_attr" "other") ++ (eq_attr "memory" "none")))) ++ "c86-4g-direct,c86-4g-fp1") ++ ++(define_insn_reservation "c86_4g_ssecvt_load" 11 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "type" "ssecvt") ++ (and (eq_attr "c86_attr" "other") ++ (eq_attr "memory" "load")))) ++ "c86-4g-direct,c86-4g-load,c86-4g-fp1") ++ ++;; SSE div ++(define_insn_reservation "c86_4g_ssediv_ss_ps" 10 ++ (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (eq_attr "mode" "V4SF,SF")) ++ (and (eq_attr "type" "ssediv") ++ (eq_attr "memory" "none"))) ++ "c86-4g-direct,c86-4g-fp1*10") ++ ++(define_insn_reservation "c86_4g_ssediv_ss_ps_load" 17 ++ (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (eq_attr "mode" "V4SF,SF")) ++ (and (eq_attr "type" "ssediv") ++ (eq_attr "memory" "load"))) ++ "c86-4g-direct,c86-4g-load,c86-4g-fp1*10") ++ ++(define_insn_reservation "c86_4g_ssediv_sd_pd" 13 ++ (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (eq_attr "mode" "V2DF,DF")) ++ (and (eq_attr "type" "ssediv") ++ (eq_attr "memory" "none"))) ++ "c86-4g-direct,c86-4g-fp1*13") ++ ++(define_insn_reservation "c86_4g_ssediv_sd_pd_load" 20 ++ (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (eq_attr "mode" "V2DF,DF")) ++ (and (eq_attr "type" "ssediv") ++ (eq_attr "memory" "load"))) ++ "c86-4g-direct,c86-4g-load,c86-4g-fp1*13") ++ ++ ++(define_insn_reservation "c86_4g_ssediv_avx256_ps" 10 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "mode" "V8SF") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "ssediv")))) ++ "c86-4g-double,c86-4g-fp1*10") ++ ++(define_insn_reservation "c86_4g_ssediv_avx256_ps_load" 17 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "mode" "V8SF") ++ (and (eq_attr "type" "ssediv") ++ (eq_attr "memory" "load")))) ++ "c86-4g-double,c86-4g-load,c86-4g-fp1*10") ++ ++(define_insn_reservation "c86_4g_ssediv_avx256_pd" 13 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "mode" "V4DF") ++ (and (eq_attr "type" "ssediv") ++ (eq_attr "memory" "none")))) ++ "c86-4g-double,c86-4g-fp1*13") ++ ++(define_insn_reservation "c86_4g_ssediv_avx256_pd_load" 20 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "mode" "V4DF") ++ (and (eq_attr "type" "ssediv") ++ (eq_attr "memory" "load")))) ++ "c86-4g-double,c86-4g-load,c86-4g-fp1*13") ++;; SSE MUL ++(define_insn_reservation "c86_4g_ssemul_ss_ps" 3 ++ (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (eq_attr "mode" "V8SF,V4SF,SF")) ++ (and (eq_attr "type" "ssemul") ++ (eq_attr "memory" "none"))) ++ "c86-4g-direct,c86-4g-fp0|c86-4g-fp2") ++ ++(define_insn_reservation "c86_4g_ssemul_ss_ps_load" 10 ++ (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (eq_attr "mode" "V8SF,V4SF,SF")) ++ (and (eq_attr "type" "ssemul") ++ (eq_attr "memory" "load"))) ++ "c86-4g-direct,c86-4g-load,c86-4g-fp0|c86-4g-fp2") ++ ++(define_insn_reservation "c86_4g_ssemul_sd_pd" 4 ++ (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (eq_attr "mode" "V4DF,V2DF,DF")) ++ (and (eq_attr "type" "ssemul") ++ (eq_attr "memory" "none"))) ++ "c86-4g-direct,c86-4g-fp0|c86-4g-fp2") ++ ++(define_insn_reservation "c86_4g_ssemul_sd_pd_load" 11 ++ (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (eq_attr "mode" "V4DF,V2DF,DF")) ++ (and (eq_attr "type" "ssemul") ++ (eq_attr "memory" "load"))) ++ "c86-4g-direct,c86-4g-load,c86-4g-fp0|c86-4g-fp2") ++ ++;;SSE imul ++(define_insn_reservation "c86_4g_sseimul" 3 ++ (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (eq_attr "mode" "TI")) ++ (and (eq_attr "type" "sseimul") ++ (eq_attr "memory" "none"))) ++ "c86-4g-direct,c86-4g-fp0") ++ ++(define_insn_reservation "c86_4g_sseimul_avx256" 4 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "mode" "OI") ++ (and (eq_attr "type" "sseimul") ++ (eq_attr "memory" "none")))) ++ "c86-4g-double,c86-4g-fp0") ++ ++(define_insn_reservation "c86_4g_sseimul_load" 10 ++ (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (eq_attr "mode" "TI")) ++ (and (eq_attr "type" "sseimul") ++ (eq_attr "memory" "load"))) ++ "c86-4g-direct,c86-4g-load,c86-4g-fp0") ++ ++(define_insn_reservation "c86_4g_sseimul_avx256_load" 11 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "mode" "OI") ++ (and (eq_attr "type" "sseimul") ++ (eq_attr "memory" "load")))) ++ "c86-4g-double,c86-4g-load,c86-4g-fp0") ++ ++(define_insn_reservation "c86_4g_sseimul_di" 3 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "mode" "DI") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "sseimul")))) ++ "c86-4g-direct,c86-4g-fp0") ++ ++(define_insn_reservation "c86_4g_sseimul_load_di" 10 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "mode" "DI") ++ (and (eq_attr "type" "sseimul") ++ (eq_attr "memory" "load")))) ++ "c86-4g-direct,c86-4g-load,c86-4g-fp0") ++ ++;; SSE compares ++(define_insn_reservation "c86_4g_sse_cmp" 1 ++ (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (eq_attr "mode" "SF,DF,V4SF,V2DF")) ++ (and (eq_attr "type" "ssecmp") ++ (eq_attr "memory" "none"))) ++ "c86-4g-direct,c86-4g-fp0|c86-4g-fp2") ++ ++(define_insn_reservation "c86_4g_sse_cmp_load" 8 ++ (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (eq_attr "mode" "SF,DF,V4SF,V2DF")) ++ (and (eq_attr "type" "ssecmp") ++ (eq_attr "memory" "load"))) ++ "c86-4g-direct,c86-4g-load,c86-4g-fp0|c86-4g-fp2") ++ ++ ++(define_insn_reservation "c86_4g_sse_cmp_avx256" 1 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "mode" "V8SF,V4DF") ++ (and (eq_attr "type" "ssecmp") ++ (eq_attr "memory" "none")))) ++ "c86-4g-double,c86-4g-fp0|c86-4g-fp2") ++ ++(define_insn_reservation "c86_4g_sse_cmp_avx256_load" 8 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "mode" "V8SF,V4DF") ++ (and (eq_attr "type" "ssecmp") ++ (eq_attr "memory" "load")))) ++ "c86-4g-double,c86-4g-load,c86-4g-fp0|c86-4g-fp2") ++ ++(define_insn_reservation "c86_4g_sse_icmp" 1 ++ (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (eq_attr "mode" "QI,HI,SI,DI,TI")) ++ (and (eq_attr "type" "ssecmp") ++ (eq_attr "memory" "none"))) ++ "c86-4g-direct,c86-4g-fpu") ++ ++ ++(define_insn_reservation "c86_4g_sse_icmp_load" 8 ++ (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (eq_attr "mode" "QI,HI,SI,DI,TI")) ++ (and (eq_attr "type" "ssecmp") ++ (eq_attr "memory" "load"))) ++ "c86-4g-direct,c86-4g-load,c86-4g-fpu") ++ ++ ++(define_insn_reservation "c86_4g_sse_icmp_avx256" 1 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "mode" "OI") ++ (and (eq_attr "type" "ssecmp") ++ (eq_attr "memory" "none")))) ++ "c86-4g-double,c86-4g-fpu") ++ ++ ++(define_insn_reservation "c86_4g_sse_icmp_avx256_load" 8 ++ (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") ++ (and (eq_attr "mode" "OI") ++ (and (eq_attr "type" "ssecmp") ++ (eq_attr "memory" "load")))) ++ "c86-4g-double,c86-4g-load,c86-4g-fpu") +diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h +index a4c2fed7eda..6ab68edb311 100644 +--- a/gcc/config/i386/cpuid.h ++++ b/gcc/config/i386/cpuid.h +@@ -208,6 +208,10 @@ + #define signature_SHANGHAI_ecx 0x20206961 + #define signature_SHANGHAI_edx 0x68676e61 + ++#define signature_HYGON_ebx 0x6f677948 ++#define signature_HYGON_ecx 0x656e6975 ++#define signature_HYGON_edx 0x6e65476e ++ + #ifndef __x86_64__ + /* At least one cpu (Winchip 2) does not set %ebx and %ecx + for cpuid leaf 1. Forcibly zero the two registers before +diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc +index 2670c90b288..d17ad2515ea 100644 +--- a/gcc/config/i386/driver-i386.cc ++++ b/gcc/config/i386/driver-i386.cc +@@ -496,6 +496,16 @@ const char *host_detect_local_cpu (int argc, const char **argv) + else + processor = PROCESSOR_PENTIUM; + } ++ else if (vendor == VENDOR_HYGON) ++ { ++ processor = PROCESSOR_GENERIC; ++ if (model == 4) ++ processor = PROCESSOR_C86_4G_M4; ++ else if (model == 6) ++ processor = PROCESSOR_C86_4G_M6; ++ else if (model >= 7) ++ processor = PROCESSOR_C86_4G_M7; ++ } + else if (vendor == VENDOR_CENTAUR) + { + processor = PROCESSOR_GENERIC; +@@ -802,6 +812,14 @@ const char *host_detect_local_cpu (int argc, const char **argv) + break; + case PROCESSOR_SHIJIDADAO: + cpu = "shijidadao"; ++ case PROCESSOR_C86_4G_M4: ++ cpu = "c86-4g-m4"; ++ break; ++ case PROCESSOR_C86_4G_M6: ++ cpu = "c86-4g-m6"; ++ break; ++ case PROCESSOR_C86_4G_M7: ++ cpu = "c86-4g-m7"; + break; + + default: +diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc +index aae6840337e..1505963032f 100644 +--- a/gcc/config/i386/i386-c.cc ++++ b/gcc/config/i386/i386-c.cc +@@ -262,6 +262,18 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, + def_or_undef (parse_in, "__rocketlake"); + def_or_undef (parse_in, "__rocketlake__"); + break; ++ case PROCESSOR_C86_4G_M4: ++ def_or_undef (parse_in, "__c86_4g_m4"); ++ def_or_undef (parse_in, "__c86_4g_m4__"); ++ break; ++ case PROCESSOR_C86_4G_M6: ++ def_or_undef (parse_in, "__c86_4g_m6"); ++ def_or_undef (parse_in, "__c86_4g_m6__"); ++ break; ++ case PROCESSOR_C86_4G_M7: ++ def_or_undef (parse_in, "__c86_4g_m7"); ++ def_or_undef (parse_in, "__c86_4g_m7__"); ++ break; + /* use PROCESSOR_max to not set/unset the arch macro. */ + case PROCESSOR_max: + break; +@@ -440,6 +452,15 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, + case PROCESSOR_ROCKETLAKE: + def_or_undef (parse_in, "__tune_rocketlake__"); + break; ++ case PROCESSOR_C86_4G_M4: ++ def_or_undef (parse_in, "__tune_c86_4g_m4__"); ++ break; ++ case PROCESSOR_C86_4G_M6: ++ def_or_undef (parse_in, "__tune_c86_4g_m6__"); ++ break; ++ case PROCESSOR_C86_4G_M7: ++ def_or_undef (parse_in, "__tune_c86_4g_m7__"); ++ break; + case PROCESSOR_INTEL: + case PROCESSOR_GENERIC: + break; +diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc +index 5bf08f394b3..98cf03eaa15 100644 +--- a/gcc/config/i386/i386-options.cc ++++ b/gcc/config/i386/i386-options.cc +@@ -170,6 +170,10 @@ along with GCC; see the file COPYING3. If not see + #define m_ZNVER (m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4) + #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER \ + | m_ZNVER) ++#define m_C86_4G_M4 (HOST_WIDE_INT_1U<3" + v\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "prefix" "orig,vex") ++ (set_attr "c86_attr" "maxmin") + (set_attr "type" "sseadd") + (set_attr "mode" "")]) + +@@ -20988,6 +21024,7 @@ (define_insn "*ieee_s3" + v\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "prefix" "orig,maybe_evex") ++ (set_attr "c86_attr" "maxmin") + (set_attr "type" "sseadd") + (set_attr "mode" "")]) + +diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md +index 197f19e4b1a..bbf0ed9b92f 100644 +--- a/gcc/config/i386/mmx.md ++++ b/gcc/config/i386/mmx.md +@@ -543,6 +543,7 @@ (define_insn "sse_movntq" + [(set_attr "isa" "*,x64") + (set_attr "mmx_isa" "native,*") + (set_attr "type" "mmxmov,ssemov") ++ (set_attr "c86_attr" "movnt") + (set_attr "mode" "DI")]) + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +@@ -1130,6 +1131,7 @@ (define_insn "mmx_blendvps" + vblendvps\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") ++ (set_attr "c86_attr" "blendv") + (set_attr "length_immediate" "1") + (set_attr "prefix_data16" "1,1,*") + (set_attr "prefix_extra" "1") +@@ -2030,6 +2032,7 @@ (define_insn "*mmx_pmaddwd" + [(set_attr "isa" "*,sse2_noavx,avx") + (set_attr "mmx_isa" "native,*,*") + (set_attr "type" "mmxmul,sseiadd,sseiadd") ++ (set_attr "c86_attr" "madd") + (set_attr "mode" "DI,TI,TI")]) + + (define_expand "mmx_pmulhrwv4hi3" +@@ -2702,6 +2705,7 @@ (define_insn "mmx_pblendvb_v8qi" + vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") ++ (set_attr "c86_attr" "blendv") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "*,*,1") + (set_attr "prefix" "orig,orig,vex") +@@ -2722,6 +2726,7 @@ (define_insn "mmx_pblendvb_" + vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") ++ (set_attr "c86_attr" "blendv") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "*,*,1") + (set_attr "prefix" "orig,orig,vex") +@@ -3223,6 +3228,7 @@ (define_insn "sse4_1_v4qiv4hi2" + "%vpmovbw\t{%1, %0|%0, %1}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") ++ (set_attr "c86_attr" "vpmovx") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,orig,maybe_evex") + (set_attr "mode" "TI")]) +@@ -3237,6 +3243,7 @@ (define_insn "sse4_1_v2hiv2si2" + "%vpmovwd\t{%1, %0|%0, %1}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") ++ (set_attr "c86_attr" "vpmovx") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,orig,maybe_evex") + (set_attr "mode" "TI")]) +@@ -3251,6 +3258,7 @@ (define_insn "sse4_1_v2qiv2hi2" + "%vpmovbw\t{%1, %0|%0, %1}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") ++ (set_attr "c86_attr" "vpmovx") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,orig,maybe_evex") + (set_attr "mode" "TI")]) +@@ -3360,6 +3368,7 @@ (define_insn "*mmx_pinsrd" + (set_attr "prefix_data16" "1") + (set_attr "prefix_extra" "1") + (set_attr "type" "sselog") ++ (set_attr "c86_attr" "insr") + (set_attr "length_immediate" "1") + (set_attr "prefix" "orig,vex") + (set_attr "mode" "TI")]) +@@ -3411,6 +3420,7 @@ (define_insn "*mmx_pinsrw" + [(set_attr "isa" "*,sse2_noavx,avx") + (set_attr "mmx_isa" "native,*,*") + (set_attr "type" "mmxcvt,sselog,sselog") ++ (set_attr "c86_attr" "insr") + (set_attr "length_immediate" "1") + (set_attr "mode" "DI,TI,TI")]) + +@@ -3444,6 +3454,7 @@ (define_insn "*mmx_pinsrb" + } + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sselog") ++ (set_attr "c86_attr" "insr") + (set_attr "prefix_data16" "1") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") +@@ -3464,6 +3475,7 @@ (define_insn "*mmx_pextrw" + [(set_attr "isa" "*,sse2,sse4") + (set_attr "mmx_isa" "native,*,*") + (set_attr "type" "mmxcvt,sselog1,sselog1") ++ (set_attr "c86_attr" "extr") + (set_attr "length_immediate" "1") + (set_attr "prefix" "orig,maybe_vex,maybe_vex") + (set_attr "mode" "DI,TI,TI")]) +@@ -3482,6 +3494,7 @@ (define_insn "*mmx_pextrw_zext" + [(set_attr "isa" "*,sse2") + (set_attr "mmx_isa" "native,*") + (set_attr "type" "mmxcvt,sselog1") ++ (set_attr "c86_attr" "extr") + (set_attr "length_immediate" "1") + (set_attr "prefix" "orig,maybe_vex") + (set_attr "mode" "DI,TI")]) +@@ -3496,6 +3509,7 @@ (define_insn "*mmx_pextrb" + %vpextrb\t{%2, %1, %k0|%k0, %1, %2} + %vpextrb\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sselog1") ++ (set_attr "c86_attr" "extr") + (set_attr "prefix_data16" "1") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") +@@ -3638,6 +3652,7 @@ (define_insn "*mmx_pblendw64" + vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") ++ (set_attr "c86_attr" "blend") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "orig,orig,vex") +@@ -3656,6 +3671,7 @@ (define_insn "*mmx_pblendw32" + vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") ++ (set_attr "c86_attr" "blend") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "orig,orig,vex") +@@ -3824,6 +3840,7 @@ (define_insn "*vec_extractv2si_1" + [(set_attr "isa" "*,sse4,sse2,noavx,*,*,*") + (set_attr "mmx_isa" "native,*,*,*,native,*,*") + (set_attr "type" "mmxcvt,ssemov,sseshuf1,sseshuf1,mmxmov,ssemov,imov") ++ (set_attr "c86_attr" "*,extr,*,*,*,*,*") + (set (attr "length_immediate") + (if_then_else (eq_attr "alternative" "1,2,3") + (const_string "1") +@@ -3850,6 +3867,7 @@ (define_insn "*vec_extractv2si_1_zext" + && TARGET_64BIT && TARGET_SSE4_1" + "%vpextrd\t{$1, %1, %k0|%k0, %1, 1}" + [(set_attr "type" "sselog1") ++ (set_attr "c86_attr" "extr") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "maybe_vex") +@@ -3992,6 +4010,7 @@ (define_insn "*pinsrw" + } + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sselog") ++ (set_attr "c86_attr" "insr") + (set_attr "length_immediate" "1") + (set_attr "mode" "TI")]) + +@@ -4025,6 +4044,7 @@ (define_insn "*pinsrb" + } + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sselog") ++ (set_attr "c86_attr" "insr") + (set_attr "prefix_data16" "1") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") +@@ -4042,6 +4062,7 @@ (define_insn "*pextrw" + %vpextrw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "*,sse4") + (set_attr "type" "sselog1") ++ (set_attr "c86_attr" "extr") + (set_attr "length_immediate" "1") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "TI")]) +@@ -4055,6 +4076,7 @@ (define_insn "*pextrw_zext" + "TARGET_SSE2" + "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}" + [(set_attr "type" "sselog1") ++ (set_attr "c86_attr" "extr") + (set_attr "length_immediate" "1") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "TI")]) +@@ -4069,6 +4091,7 @@ (define_insn "*pextrb" + %vpextrb\t{%2, %1, %k0|%k0, %1, %2} + %vpextrb\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sselog1") ++ (set_attr "c86_attr" "extr") + (set_attr "prefix_data16" "1") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") +@@ -4084,6 +4107,7 @@ (define_insn "*pextrb_zext" + "TARGET_SSE4_1" + "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}" + [(set_attr "type" "sselog1") ++ (set_attr "c86_attr" "extr") + (set_attr "prefix_data16" "1") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") +@@ -4419,6 +4443,7 @@ (define_insn "mmx_psadbw" + [(set_attr "isa" "*,sse2_noavx,avx") + (set_attr "mmx_isa" "native,*,*") + (set_attr "type" "mmxshft,sseiadd,sseiadd") ++ (set_attr "c86_attr" "sadbw") + (set_attr "mode" "DI,TI,TI")]) + + (define_expand "reduc_plus_scal_v8qi" +diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md +index f25dd5f2bc4..4d13cd75010 100644 +--- a/gcc/config/i386/sse.md ++++ b/gcc/config/i386/sse.md +@@ -1523,6 +1523,11 @@ (define_insn "_blendm" + } + } + [(set_attr "type" "ssemov") ++ (set (attr "c86_attr") ++ (if_then_else (and (match_test "REG_P (operands[1])") ++ (match_test "REGNO (operands[1]) != REGNO (operands[0])")) ++ (const_string "blend") ++ (const_string "*"))) + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + +@@ -1551,6 +1556,7 @@ (define_insn "_blendm" + vmovdqu\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2} + vpblendmw\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}" + [(set_attr "type" "ssemov") ++ (set_attr "c86_attr" "*,blend") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + +@@ -1763,6 +1769,7 @@ (define_insn "sse2_movnti" + "TARGET_SSE2" + "movnti\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") ++ (set_attr "c86_attr" "movnt") + (set_attr "prefix_data16" "0") + (set_attr "mode" "")]) + +@@ -1774,6 +1781,7 @@ (define_insn "_movnt" + "TARGET_SSE" + "%vmovnt\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") ++ (set_attr "c86_attr" "movnt") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "")]) + +@@ -1789,6 +1797,7 @@ (define_insn "_movnt" + (match_test "TARGET_AVX") + (const_string "*") + (const_string "1"))) ++ (set_attr "c86_attr" "movnt") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "")]) + +@@ -2415,6 +2424,7 @@ (define_insn "_rcp2" + [(set_attr "type" "sse") + (set_attr "atom_sse_attr" "rcp") + (set_attr "btver2_sse_attr" "rcp") ++ (set_attr "c86_attr" "rcp") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "")]) + +@@ -2433,6 +2443,7 @@ (define_insn "sse_vmrcpv4sf2" + (set_attr "type" "sse") + (set_attr "atom_sse_attr" "rcp") + (set_attr "btver2_sse_attr" "rcp") ++ (set_attr "c86_attr" "rcp") + (set_attr "prefix" "orig,vex") + (set_attr "mode" "SF")]) + +@@ -2452,6 +2463,7 @@ (define_insn "*sse_vmrcpv4sf2" + (set_attr "type" "sse") + (set_attr "atom_sse_attr" "rcp") + (set_attr "btver2_sse_attr" "rcp") ++ (set_attr "c86_attr" "rcp") + (set_attr "prefix" "orig,vex") + (set_attr "mode" "SF")]) + +@@ -2501,6 +2513,7 @@ (define_insn "rcp14" + "TARGET_AVX512F" + "vrcp14\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") ++ (set_attr "c86_attr" "rcp") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + +@@ -2515,6 +2528,7 @@ (define_insn "srcp14" + "TARGET_AVX512F" + "vrcp14\t{%1, %2, %0|%0, %2, %1}" + [(set_attr "type" "sse") ++ (set_attr "c86_attr" "rcp") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + +@@ -2532,6 +2546,7 @@ (define_insn "srcp14_mask" + "TARGET_AVX512F" + "vrcp14\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %1}" + [(set_attr "type" "sse") ++ (set_attr "c86_attr" "rcp") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + +@@ -2567,6 +2582,7 @@ (define_insn "_sqrt2" + (set_attr "type" "sse") + (set_attr "atom_sse_attr" "sqrt") + (set_attr "btver2_sse_attr" "sqrt") ++ (set_attr "c86_attr" "sqrt") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "")]) + +@@ -2586,6 +2602,7 @@ (define_insn "_vmsqrt2" + (set_attr "atom_sse_attr" "sqrt") + (set_attr "prefix" "") + (set_attr "btver2_sse_attr" "sqrt") ++ (set_attr "c86_attr" "sqrt") + (set_attr "mode" "")]) + + (define_insn "*_vmsqrt2" +@@ -2605,6 +2622,7 @@ (define_insn "*_vmsqrt2" + (set_attr "atom_sse_attr" "sqrt") + (set_attr "prefix" "") + (set_attr "btver2_sse_attr" "sqrt") ++ (set_attr "c86_attr" "sqrt") + (set_attr "mode" "")]) + + (define_expand "rsqrt2" +@@ -2653,6 +2671,7 @@ (define_insn "rsqrt14" + "TARGET_AVX512F" + "vrsqrt14\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") ++ (set_attr "c86_attr" "rcp") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + +@@ -2667,6 +2686,7 @@ (define_insn "rsqrt14" + "TARGET_AVX512F" + "vrsqrt14\t{%1, %2, %0|%0, %2, %1}" + [(set_attr "type" "sse") ++ (set_attr "c86_attr" "rcp") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + +@@ -2684,6 +2704,7 @@ (define_insn "rsqrt14__mask" + "TARGET_AVX512F" + "vrsqrt14\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %1}" + [(set_attr "type" "sse") ++ (set_attr "c86_attr" "rcp") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + +@@ -2806,6 +2827,7 @@ (define_insn "*3" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseadd") + (set_attr "btver2_sse_attr" "maxmin") ++ (set_attr "c86_attr" "maxmin") + (set_attr "prefix" "") + (set_attr "mode" "")]) + +@@ -2830,6 +2852,7 @@ (define_insn "ieee_3" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseadd") + (set_attr "btver2_sse_attr" "maxmin") ++ (set_attr "c86_attr" "maxmin") + (set_attr "prefix" "") + (set_attr "mode" "")]) + +@@ -2854,6 +2877,7 @@ (define_insn "*ieee_3" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseadd") + (set_attr "btver2_sse_attr" "maxmin") ++ (set_attr "c86_attr" "maxmin") + (set (attr "prefix") + (cond [(eq_attr "alternative" "0") + (const_string "orig") +@@ -2881,6 +2905,7 @@ (define_insn "_vm3 + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sse") + (set_attr "btver2_sse_attr" "maxmin") ++ (set_attr "c86_attr" "maxmin") + (set_attr "prefix" "") + (set_attr "mode" "")]) + +@@ -3057,6 +3082,7 @@ (define_insn "avx_hv4df3" + "TARGET_AVX" + "vhpd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseadd") ++ (set_attr "c86_attr" "hplus") + (set_attr "prefix" "vex") + (set_attr "mode" "V4DF")]) + +@@ -3100,6 +3126,7 @@ (define_insn "*sse3_haddv2df3" + vhaddpd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseadd") ++ (set_attr "c86_attr" "hplus") + (set_attr "prefix" "orig,vex") + (set_attr "mode" "V2DF")]) + +@@ -3122,6 +3149,7 @@ (define_insn "sse3_hsubv2df3" + vhsubpd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseadd") ++ (set_attr "c86_attr" "hplus") + (set_attr "prefix" "orig,vex") + (set_attr "mode" "V2DF")]) + +@@ -3141,6 +3169,7 @@ (define_insn "*sse3_haddv2df3_low" + vhaddpd\t{%1, %1, %0|%0, %1, %1}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseadd1") ++ (set_attr "c86_attr" "hplus") + (set_attr "prefix" "orig,vex") + (set_attr "mode" "V2DF")]) + +@@ -3159,6 +3188,7 @@ (define_insn "*sse3_hsubv2df3_low" + vhsubpd\t{%1, %1, %0|%0, %1, %1}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseadd1") ++ (set_attr "c86_attr" "hplus") + (set_attr "prefix" "orig,vex") + (set_attr "mode" "V2DF")]) + +@@ -3202,6 +3232,7 @@ (define_insn "avx_hv8sf3" + "TARGET_AVX" + "vhps\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseadd") ++ (set_attr "c86_attr" "hplus") + (set_attr "prefix" "vex") + (set_attr "mode" "V8SF")]) + +@@ -3232,6 +3263,7 @@ (define_insn "sse3_hv4sf3" + vhps\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseadd") ++ (set_attr "c86_attr" "hplus") + (set_attr "atom_unit" "complex") + (set_attr "prefix" "orig,vex") + (set_attr "prefix_rep" "1,*") +@@ -3396,6 +3428,7 @@ (define_insn "reducep" + "TARGET_AVX512DQ || (VALID_AVX512FP16_REG_MODE (mode))" + "vreduce\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sse") ++ (set_attr "c86_attr" "aes") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + +@@ -3412,6 +3445,7 @@ (define_insn "reduces" + "TARGET_AVX512DQ || (VALID_AVX512FP16_REG_MODE (mode))" + "vreduce\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "sse") ++ (set_attr "c86_attr" "aes") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + +@@ -3927,6 +3961,7 @@ (define_insn "*_eq3_1" + vpcmpeq\t{%2, %1, %0|%0, %1, %2} + vptestnm\t{%1, %1, %0|%0, %1, %1}" + [(set_attr "type" "ssecmp") ++ (set_attr "c86_attr" "*,ptest") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) +@@ -4025,6 +4060,7 @@ (define_insn "*_eq3_1" + vpcmpeq\t{%2, %1, %0|%0, %1, %2} + vptestnm\t{%1, %1, %0|%0, %1, %1}" + [(set_attr "type" "ssecmp") ++ (set_attr "c86_attr" "*,ptest") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) +@@ -4622,6 +4658,7 @@ (define_insn "_andnot3" + } + [(set_attr "isa" "noavx,avx,avx512dq,avx512f") + (set_attr "type" "sselog") ++ (set_attr "c86_attr" "sselogic") + (set_attr "prefix" "orig,maybe_vex,evex,evex") + (set (attr "mode") + (cond [(and (match_test "") +@@ -4669,6 +4706,7 @@ (define_insn "_andnot3" + return ""; + } + [(set_attr "type" "sselog") ++ (set_attr "c86_attr" "sselogic") + (set_attr "prefix" "evex") + (set (attr "mode") + (if_then_else (match_test "TARGET_AVX512DQ") +@@ -4743,6 +4781,7 @@ (define_insn "*3" + } + [(set_attr "isa" "noavx,avx,avx512dq,avx512f") + (set_attr "type" "sselog") ++ (set_attr "c86_attr" "sselogic") + (set_attr "prefix" "orig,maybe_evex,evex,evex") + (set (attr "mode") + (cond [(and (match_test "") +@@ -4790,6 +4829,7 @@ (define_insn "*3" + return ""; + } + [(set_attr "type" "sselog") ++ (set_attr "c86_attr" "sselogic") + (set_attr "prefix" "evex") + (set (attr "mode") + (if_then_else (match_test "TARGET_AVX512DQ") +@@ -4904,6 +4944,7 @@ (define_insn "*andnot3" + } + [(set_attr "isa" "noavx,avx,avx512vl,avx512f") + (set_attr "type" "sselog") ++ (set_attr "c86_attr" "sselogic") + (set_attr "prefix" "orig,vex,evex,evex") + (set (attr "mode") + (cond [(eq_attr "alternative" "2") +@@ -5030,6 +5071,7 @@ (define_insn "3" + } + [(set_attr "isa" "noavx,avx,avx512vl,avx512f") + (set_attr "type" "sselog") ++ (set_attr "c86_attr" "sselogic") + (set_attr "prefix" "orig,vex,evex,evex") + (set (attr "mode") + (cond [(eq_attr "alternative" "2") +@@ -5092,6 +5134,7 @@ (define_insn "*tf3" + } + [(set_attr "isa" "noavx,avx,avx512vl,avx512f") + (set_attr "type" "sselog") ++ (set_attr "c86_attr" "sselogic") + (set (attr "prefix_data16") + (if_then_else + (and (eq_attr "alternative" "0") +@@ -10575,6 +10618,10 @@ (define_insn "vec_set_0" + (const_string "fmov") + ] + (const_string "ssemov"))) ++ (set (attr "c86_attr") ++ (if_then_else (eq_attr "alternative" "8,9,10") ++ (const_string "insr") ++ (const_string "*"))) + (set (attr "prefix_extra") + (if_then_else (eq_attr "alternative" "8,9,10") + (const_string "1") +@@ -10642,6 +10689,13 @@ (define_insn "vec_set_0" + (if_then_else (eq_attr "alternative" "0,1,2,5,6,9") + (const_string "ssemov") + (const_string "sselog"))) ++ (set (attr "c86_attr") ++ (cond [(eq_attr "alternative" "5,6,9") ++ (const_string "blend") ++ (eq_attr "alternative" "3,4,7,8,10,11") ++ (const_string "insr") ++ ] ++ (const_string "*"))) + (set (attr "prefix_data16") + (if_then_else (eq_attr "alternative" "3,4") + (const_string "1") +@@ -10910,6 +10964,7 @@ (define_insn_and_split "*sse4_1_extractps" + } + [(set_attr "isa" "noavx,noavx,avx,noavx,avx") + (set_attr "type" "sselog,sselog,sselog,*,*") ++ (set_attr "c86_attr" "extr,extr,extr,*,*") + (set_attr "prefix_data16" "1,1,1,*,*") + (set_attr "prefix_extra" "1,1,1,*,*") + (set_attr "length_immediate" "1,1,1,*,*") +@@ -11821,6 +11876,7 @@ (define_insn "*vec_extracthf" + } + [(set_attr "isa" "*,sse4,noavx,avx") + (set_attr "type" "sselog1,sselog1,sseishft1,sseishft1") ++ (set_attr "c86_attr" "extr,extr,other,other") + (set_attr "prefix" "maybe_evex") + (set_attr "mode" "TI")]) + +@@ -12526,6 +12582,7 @@ (define_insn "_align" + "TARGET_AVX512F" + "valign\t{%3, %2, %1, %0|%0, %1, %2, %3}"; + [(set_attr "prefix" "evex") ++ (set_attr "c86_attr" "shufx") + (set_attr "mode" "")]) + + (define_mode_attr vec_extract_imm_predicate +@@ -15161,6 +15218,7 @@ (define_insn "avx512bw_pmaddwd512" + "TARGET_AVX512BW && " + "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"; + [(set_attr "type" "sseiadd") ++ (set_attr "c86_attr" "madd") + (set_attr "prefix" "evex") + (set_attr "mode" "XI")]) + +@@ -15232,6 +15290,7 @@ (define_insn "*avx2_pmaddwd" + "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "vpmaddwd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseiadd") ++ (set_attr "c86_attr" "madd") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + +@@ -15290,6 +15349,7 @@ (define_insn "*sse2_pmaddwd" + vpmaddwd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseiadd") ++ (set_attr "c86_attr" "madd") + (set_attr "atom_unit" "simul") + (set_attr "prefix_data16" "1,*") + (set_attr "prefix" "orig,vex") +@@ -15964,6 +16024,7 @@ (define_insn "*avx2_3" + "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "vp\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseiadd") ++ (set_attr "c86_attr" "maxmin") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) +@@ -16005,6 +16066,7 @@ (define_insn "*avx512f_3" + "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "vp\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseiadd") ++ (set_attr "c86_attr" "maxmin") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "maybe_evex") + (set_attr "mode" "")]) +@@ -16017,6 +16079,7 @@ (define_insn "3" + "TARGET_AVX512BW" + "vp\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseiadd") ++ (set_attr "c86_attr" "maxmin") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + +@@ -16115,6 +16178,7 @@ (define_insn "*sse4_1_3" + vp\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "sseiadd") ++ (set_attr "c86_attr" "maxmin") + (set_attr "prefix_extra" "1,1,*") + (set_attr "prefix" "orig,orig,vex") + (set_attr "mode" "TI")]) +@@ -16130,6 +16194,7 @@ (define_insn "*v8hi3" + vpw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseiadd") ++ (set_attr "c86_attr" "maxmin") + (set_attr "prefix_data16" "1,*") + (set_attr "prefix_extra" "*,1") + (set_attr "prefix" "orig,vex") +@@ -16199,6 +16264,7 @@ (define_insn "*sse4_1_3" + vp\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "sseiadd") ++ (set_attr "c86_attr" "maxmin") + (set_attr "prefix_extra" "1,1,*") + (set_attr "prefix" "orig,orig,vex") + (set_attr "mode" "TI")]) +@@ -16214,6 +16280,7 @@ (define_insn "*v16qi3" + vpb\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseiadd") ++ (set_attr "c86_attr" "maxmin") + (set_attr "prefix_data16" "1,*") + (set_attr "prefix_extra" "*,1") + (set_attr "prefix" "orig,vex") +@@ -16831,6 +16898,7 @@ (define_insn "*andnot3" + } + [(set_attr "isa" "noavx,avx,avx") + (set_attr "type" "sselog") ++ (set_attr "c86_attr" "sselogic") + (set (attr "prefix_data16") + (if_then_else + (and (eq_attr "alternative" "0") +@@ -16896,6 +16964,7 @@ (define_insn "*andnot3_mask" + "TARGET_AVX512F" + "vpandn\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"; + [(set_attr "type" "sselog") ++ (set_attr "c86_attr" "sselogic") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + +@@ -17017,6 +17086,7 @@ (define_insn "*3" + } + [(set_attr "isa" "noavx,avx,avx") + (set_attr "type" "sselog") ++ (set_attr "c86_attr" "sselogic") + (set (attr "prefix_data16") + (if_then_else + (and (eq_attr "alternative" "0") +@@ -17110,6 +17180,7 @@ (define_insn "*3" + } + [(set_attr "isa" "noavx,avx,avx") + (set_attr "type" "sselog") ++ (set_attr "c86_attr" "sselogic") + (set (attr "prefix_data16") + (if_then_else + (and (eq_attr "alternative" "0") +@@ -17145,6 +17216,7 @@ (define_insn "v1ti3" + (set_attr "prefix" "orig,vex,evex") + (set_attr "prefix_data16" "1,*,*") + (set_attr "type" "sselog") ++ (set_attr "c86_attr" "sselogic") + (set_attr "mode" "TI")]) + + (define_expand "one_cmplv1ti2" +@@ -17893,6 +17965,7 @@ (define_insn "_pinsr" + } + [(set_attr "isa" "noavx,noavx,avx,avx,,,avx2") + (set_attr "type" "sselog") ++ (set_attr "c86_attr" "insr") + (set (attr "prefix_rex") + (if_then_else + (and (not (match_test "TARGET_AVX")) +@@ -18002,6 +18075,7 @@ (define_insn "*_vinsert_0" + } + } + [(set_attr "type" "sselog,ssemov,ssemov") ++ (set_attr "c86_attr" "insertx,*,*") + (set_attr "length_immediate" "1,0,0") + (set_attr "prefix" "evex,vex,evex") + (set_attr "mode" ",,")]) +@@ -18034,6 +18108,7 @@ (define_insn "_vinsert_1\t{%3, %2, %1, %0|%0, %1, %2, %3}"; + } + [(set_attr "type" "sselog") ++ (set_attr "c86_attr" "insertx") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) +@@ -18072,6 +18147,7 @@ (define_insn "vec_set_lo_" + "TARGET_AVX512DQ" + "vinsert32x8\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" + [(set_attr "type" "sselog") ++ (set_attr "c86_attr" "insertx") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) +@@ -18089,6 +18165,7 @@ (define_insn "vec_set_hi_" + "TARGET_AVX512DQ" + "vinsert32x8\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" + [(set_attr "type" "sselog") ++ (set_attr "c86_attr" "insertx") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) +@@ -18104,6 +18181,7 @@ (define_insn "vec_set_lo_" + "TARGET_AVX512F" + "vinsert64x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" + [(set_attr "type" "sselog") ++ (set_attr "c86_attr" "insertx") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "XI")]) +@@ -18119,6 +18197,7 @@ (define_insn "vec_set_hi_" + "TARGET_AVX512F" + "vinsert64x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" + [(set_attr "type" "sselog") ++ (set_attr "c86_attr" "insertx") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "XI")]) +@@ -18166,6 +18245,7 @@ (define_insn "avx512dq_shuf_64x2_1" + return "vshuf64x2\t{%3, %2, %1, %0|%0, %1, %2, %3}"; + } + [(set_attr "type" "sselog") ++ (set_attr "c86_attr" "shufx") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "XI")]) +@@ -18228,6 +18308,7 @@ (define_insn "avx512f_shuf_64x2_1" + return "vshuf64x2\t{%3, %2, %1, %0|%0, %1, %2, %3}"; + } + [(set_attr "type" "sselog") ++ (set_attr "c86_attr" "shufx") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) +@@ -18264,6 +18345,7 @@ (define_insn "*avx512f_shuf_64x2_1_1" + return "vshuf64x2\t{%2, %1, %1, %0|%0, %1, %1, %2}"; + } + [(set_attr "type" "sselog") ++ (set_attr "c86_attr" "shufx") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) +@@ -18324,6 +18406,7 @@ (define_insn "avx512vl_shuf_32x4_1" + return "vshuf32x4\t{%3, %2, %1, %0|%0, %1, %2, %3}"; + } + [(set_attr "type" "sselog") ++ (set_attr "c86_attr" "shufx") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) +@@ -18410,6 +18493,7 @@ (define_insn "avx512f_shuf_32x4_1" + return "vshuf32x4\t{%3, %2, %1, %0|%0, %1, %2, %3}"; + } + [(set_attr "type" "sselog") ++ (set_attr "c86_attr" "shufx") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) +@@ -18462,6 +18546,7 @@ (define_insn "*avx512f_shuf_32x4_1_1" + return "vshuf32x4\t{%2, %1, %1, %0|%0, %1, %1, %2}"; + } + [(set_attr "type" "sselog") ++ (set_attr "c86_attr" "shufx") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) +@@ -19040,6 +19125,7 @@ (define_insn "*vec_extract" + %vpextr\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "*,sse4") + (set_attr "type" "sselog1") ++ (set_attr "c86_attr" "extr") + (set_attr "prefix_data16" "1") + (set (attr "prefix_extra") + (if_then_else +@@ -19062,6 +19148,7 @@ (define_insn "*vec_extract_zext" + "TARGET_SSE2" + "%vpextr\t{%2, %1, %k0|%k0, %1, %2}" + [(set_attr "type" "sselog1") ++ (set_attr "c86_attr" "extr") + (set_attr "prefix_data16" "1") + (set (attr "prefix_extra") + (if_then_else +@@ -19082,6 +19169,7 @@ (define_insn "*vec_extractv16qi_zext" + "TARGET_SSE4_1" + "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}" + [(set_attr "type" "sselog1") ++ (set_attr "c86_attr" "extr") + (set_attr "prefix_data16" "1") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") +@@ -19215,6 +19303,7 @@ (define_insn "*vec_extractv4si" + } + [(set_attr "isa" "*,avx512dq,noavx,noavx,avx") + (set_attr "type" "sselog1,sselog1,sseishft1,sseishft1,sseishft1") ++ (set_attr "c86_attr" "extr,extr,*,*,*") + (set (attr "prefix_extra") + (if_then_else (eq_attr "alternative" "0,1") + (const_string "1") +@@ -19233,6 +19322,7 @@ (define_insn "*vec_extractv4si_zext" + "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}" + [(set_attr "isa" "*,avx512dq") + (set_attr "type" "sselog1") ++ (set_attr "c86_attr" "extr") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "maybe_vex") +@@ -19302,6 +19392,10 @@ (define_insn "*vec_extractv2di_1" + (const_string "imov") + ] + (const_string "sselog1"))) ++ (set (attr "c86_attr") ++ (if_then_else (eq_attr "alternative" "0,1") ++ (const_string "extr") ++ (const_string "other"))) + (set (attr "length_immediate") + (if_then_else (eq_attr "alternative" "0,1,3,4,5") + (const_string "1") +@@ -19461,6 +19555,10 @@ (define_insn "*vec_concatv2si_sse4_1" + (const_string "mmxmov") + ] + (const_string "sselog"))) ++ (set (attr "c86_attr") ++ (if_then_else (eq_attr "alternative" "0,1,2,3") ++ (const_string "insr") ++ (const_string "other"))) + (set (attr "prefix_extra") + (if_then_else (eq_attr "alternative" "0,1,2,3") + (const_string "1") +@@ -19561,6 +19659,10 @@ (define_insn "vec_concatv2di" + (eq_attr "alternative" "0,1,2,3,4,5") + (const_string "sselog") + (const_string "ssemov"))) ++ (set (attr "c86_attr") ++ (if_then_else (eq_attr "alternative" "0,1,2,3") ++ (const_string "insr") ++ (const_string "other"))) + (set (attr "prefix_rex") + (if_then_else (eq_attr "alternative" "0,1,2,3") + (const_string "1") +@@ -19762,6 +19864,7 @@ (define_insn "*_uavg3" + vpavg\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseiadd") ++ (set_attr "c86_attr" "avg") + (set_attr "prefix_data16" "1,*") + (set_attr "prefix" "orig,") + (set_attr "mode" "")]) +@@ -19780,6 +19883,7 @@ (define_insn "_psadbw" + vpsadbw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseiadd") ++ (set_attr "c86_attr" "sadbw") + (set_attr "atom_unit" "simul") + (set_attr "prefix_data16" "1,*") + (set_attr "prefix" "orig,maybe_evex") +@@ -19793,6 +19897,7 @@ (define_insn "_movmsk" + "TARGET_SSE" + "%vmovmsk\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") ++ (set_attr "c86_attr" "movnt") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "")]) + +@@ -19805,6 +19910,7 @@ (define_insn "*_movmsk_ext" + "TARGET_64BIT && TARGET_SSE" + "%vmovmsk\t{%1, %k0|%k0, %1}" + [(set_attr "type" "ssemov") ++ (set_attr "c86_attr" "movnt") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "")]) + +@@ -19888,6 +19994,7 @@ (define_insn "_pmovmskb" + "TARGET_SSE2" + "%vpmovmskb\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") ++ (set_attr "c86_attr" "movnt") + (set (attr "prefix_data16") + (if_then_else + (match_test "TARGET_AVX") +@@ -19905,6 +20012,7 @@ (define_insn "*_pmovmskb_zext" + "TARGET_64BIT && TARGET_SSE2" + "%vpmovmskb\t{%1, %k0|%k0, %1}" + [(set_attr "type" "ssemov") ++ (set_attr "c86_attr" "movnt") + (set (attr "prefix_data16") + (if_then_else + (match_test "TARGET_AVX") +@@ -19922,6 +20030,7 @@ (define_insn "*sse2_pmovmskb_ext" + "TARGET_64BIT && TARGET_SSE2" + "%vpmovmskb\t{%1, %k0|%k0, %1}" + [(set_attr "type" "ssemov") ++ (set_attr "c86_attr" "movnt") + (set (attr "prefix_data16") + (if_then_else + (match_test "TARGET_AVX") +@@ -20093,6 +20202,7 @@ (define_insn "*sse2_maskmovdqu" + return "%vmaskmovdqu\t{%2, %1|%1, %2}"; + } + [(set_attr "type" "ssemov") ++ (set_attr "c86_attr" "blend") + (set_attr "prefix_data16" "1") + (set (attr "length_address") + (symbol_ref ("Pmode != word_mode"))) +@@ -20189,6 +20299,7 @@ (define_insn "avx2_phwv16hi3" + "TARGET_AVX2" + "vphw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseiadd") ++ (set_attr "c86_attr" "hplus") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) +@@ -20214,6 +20325,7 @@ (define_insn "ssse3_phwv8hi3" + vphw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseiadd") ++ (set_attr "c86_attr" "hplus") + (set_attr "atom_unit" "complex") + (set_attr "prefix_data16" "1,*") + (set_attr "prefix_extra" "1") +@@ -20278,6 +20390,7 @@ (define_insn "avx2_phdv8si3" + "TARGET_AVX2" + "vphd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseiadd") ++ (set_attr "c86_attr" "hplus") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) +@@ -20301,6 +20414,7 @@ (define_insn "ssse3_phdv4si3" + vphd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseiadd") ++ (set_attr "c86_attr" "hplus") + (set_attr "atom_unit" "complex") + (set_attr "prefix_data16" "1,*") + (set_attr "prefix_extra" "1") +@@ -20340,6 +20454,7 @@ (define_insn_and_split "ssse3_phdv2si3" + } + [(set_attr "mmx_isa" "native,sse_noavx,avx") + (set_attr "type" "sseiadd") ++ (set_attr "c86_attr" "hplus") + (set_attr "atom_unit" "complex") + (set_attr "prefix_extra" "1") + (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) +@@ -20395,6 +20510,7 @@ (define_insn "avx2_pmaddubsw256" + "TARGET_AVX2" + "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseiadd") ++ (set_attr "c86_attr" "madd") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) +@@ -20410,6 +20526,7 @@ (define_insn "avx512bw_pmaddubsw512" + "TARGET_AVX512BW" + "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"; + [(set_attr "type" "sseiadd") ++ (set_attr "c86_attr" "madd") + (set_attr "prefix" "evex") + (set_attr "mode" "XI")]) + +@@ -20485,6 +20602,7 @@ (define_insn "ssse3_pmaddubsw128" + vpmaddubsw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseiadd") ++ (set_attr "c86_attr" "madd") + (set_attr "atom_unit" "simul") + (set_attr "prefix_data16" "1,*") + (set_attr "prefix_extra" "1") +@@ -20522,6 +20640,7 @@ (define_insn "ssse3_pmaddubsw" + [(set_attr "isa" "*,noavx,avx") + (set_attr "mmx_isa" "native,*,*") + (set_attr "type" "sseiadd") ++ (set_attr "c86_attr" "madd") + (set_attr "atom_unit" "simul") + (set_attr "prefix_extra" "1") + (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) +@@ -20805,6 +20924,7 @@ (define_insn "_psign3" + vpsign\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sselog1") ++ (set_attr "c86_attr" "sign") + (set_attr "prefix_data16" "1,*") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,vex") +@@ -20824,6 +20944,7 @@ (define_insn "ssse3_psign3" + [(set_attr "isa" "*,noavx,avx") + (set_attr "mmx_isa" "native,*,*") + (set_attr "type" "sselog1") ++ (set_attr "c86_attr" "sign") + (set_attr "prefix_extra" "1") + (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) + (set_attr "mode" "DI,TI,TI")]) +@@ -20952,6 +21073,7 @@ (define_insn "*abs2" + (set_attr "prefix_data16" "1") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "maybe_vex") ++ (set_attr "c86_attr" "abs") + (set_attr "mode" "")]) + + (define_insn "abs2_mask" +@@ -20964,6 +21086,7 @@ (define_insn "abs2_mask" + "TARGET_AVX512F" + "vpabs\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" + [(set_attr "type" "sselog1") ++ (set_attr "c86_attr" "abs") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + +@@ -20977,6 +21100,7 @@ (define_insn "abs2_mask" + "TARGET_AVX512BW" + "vpabs\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" + [(set_attr "type" "sselog1") ++ (set_attr "c86_attr" "abs") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + +@@ -21009,6 +21133,7 @@ (define_insn "sse4a_movnt" + "TARGET_SSE4A" + "movnt\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") ++ (set_attr "c86_attr" "movnt") + (set_attr "mode" "")]) + + (define_insn "sse4a_vmmovnt" +@@ -21021,6 +21146,7 @@ (define_insn "sse4a_vmmovnt" + "TARGET_SSE4A" + "movnt\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") ++ (set_attr "c86_attr" "movnt") + (set_attr "mode" "")]) + + (define_insn "sse4a_extrqi" +@@ -21097,6 +21223,7 @@ (define_insn "_blend" + vblend\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") ++ (set_attr "c86_attr" "blend") + (set_attr "length_immediate" "1") + (set_attr "prefix_data16" "1,1,*") + (set_attr "prefix_extra" "1") +@@ -21117,6 +21244,7 @@ (define_insn "_blendv" + vblendv\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") ++ (set_attr "c86_attr" "blendv") + (set_attr "length_immediate" "1") + (set_attr "prefix_data16" "1,1,*") + (set_attr "prefix_extra" "1") +@@ -21149,6 +21277,7 @@ (define_insn "sse4_1_blendv" + } + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") ++ (set_attr "c86_attr" "blendv") + (set_attr "length_immediate" "1") + (set_attr "prefix_data16" "1,1,*") + (set_attr "prefix_extra" "1") +@@ -21287,6 +21416,7 @@ (define_insn "_movntdqa" + "%vmovntdqa\t{%1, %0|%0, %1}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") ++ (set_attr "c86_attr" "movnt") + (set_attr "prefix_extra" "1,1,*") + (set_attr "prefix" "orig,orig,maybe_evex") + (set_attr "mode" "")]) +@@ -21344,6 +21474,7 @@ (define_insn "_pblendvb" + vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") ++ (set_attr "c86_attr" "blendv") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "*,*,1") + (set_attr "prefix" "orig,orig,vex") +@@ -21437,6 +21568,7 @@ (define_insn "sse4_1_pblend" + vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") ++ (set_attr "c86_attr" "blend") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "orig,orig,vex") +@@ -21508,6 +21640,7 @@ (define_insn "*avx2_pblend" + return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"; + } + [(set_attr "type" "ssemov") ++ (set_attr "c86_attr" "blend") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "vex") +@@ -21522,6 +21655,7 @@ (define_insn "avx2_pblendd" + "TARGET_AVX2" + "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssemov") ++ (set_attr "c86_attr" "blend") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "vex") +@@ -21546,6 +21680,7 @@ (define_insn "avx2_v16qiv16hi2" + "TARGET_AVX2 && && " + "vpmovbw\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") ++ (set_attr "c86_attr" "vpmovx") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "maybe_evex") + (set_attr "mode" "OI")]) +@@ -21600,6 +21735,7 @@ (define_insn "avx512bw_v32qiv32hi2" + "TARGET_AVX512BW" + "vpmovbw\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") ++ (set_attr "c86_attr" "vpmovx") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "XI")]) +@@ -21660,6 +21796,7 @@ (define_insn "sse4_1_v8qiv8hi2" + "%vpmovbw\t{%1, %0|%0, %1}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") ++ (set_attr "c86_attr" "vpmovx") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,orig,maybe_evex") + (set_attr "mode" "TI")]) +@@ -21672,6 +21809,7 @@ (define_insn "*sse4_1_v8qiv8hi2_1" + "%vpmovbw\t{%1, %0|%0, %1}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") ++ (set_attr "c86_attr" "vpmovx") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,orig,maybe_evex") + (set_attr "mode" "TI")]) +@@ -21784,6 +21922,7 @@ (define_insn "avx512f_v16qiv16si2" + "TARGET_AVX512F" + "vpmovbd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") ++ (set_attr "c86_attr" "vpmovx") + (set_attr "prefix" "evex") + (set_attr "mode" "XI")]) + +@@ -21805,6 +21944,7 @@ (define_insn "avx2_v8qiv8si2" + "TARGET_AVX2 && " + "vpmovbd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") ++ (set_attr "c86_attr" "vpmovx") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "maybe_evex") + (set_attr "mode" "OI")]) +@@ -21816,6 +21956,7 @@ (define_insn "*avx2_v8qiv8si2_1" + "TARGET_AVX2 && " + "%vpmovbd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") ++ (set_attr "c86_attr" "vpmovx") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "maybe_evex") + (set_attr "mode" "OI")]) +@@ -21866,6 +22007,7 @@ (define_insn "sse4_1_v4qiv4si2" + "%vpmovbd\t{%1, %0|%0, %1}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") ++ (set_attr "c86_attr" "vpmovx") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,orig,maybe_evex") + (set_attr "mode" "TI")]) +@@ -21878,6 +22020,7 @@ (define_insn "*sse4_1_v4qiv4si2_1" + "%vpmovbd\t{%1, %0|%0, %1}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") ++ (set_attr "c86_attr" "vpmovx") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,orig,maybe_evex") + (set_attr "mode" "TI")]) +@@ -21926,6 +22069,7 @@ (define_insn "avx512f_v16hiv16si2" + "TARGET_AVX512F" + "vpmovwd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") ++ (set_attr "c86_attr" "vpmovx") + (set_attr "prefix" "evex") + (set_attr "mode" "XI")]) + +@@ -21979,6 +22123,7 @@ (define_insn "avx2_v8hiv8si2" + "TARGET_AVX2 && " + "vpmovwd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") ++ (set_attr "c86_attr" "vpmovx") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "maybe_evex") + (set_attr "mode" "OI")]) +@@ -22038,6 +22183,7 @@ (define_insn "sse4_1_v4hiv4si2" + "%vpmovwd\t{%1, %0|%0, %1}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") ++ (set_attr "c86_attr" "vpmovx") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,orig,maybe_evex") + (set_attr "mode" "TI")]) +@@ -22050,6 +22196,7 @@ (define_insn "*sse4_1_v4hiv4si2_1" + "%vpmovwd\t{%1, %0|%0, %1}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") ++ (set_attr "c86_attr" "vpmovx") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,orig,maybe_evex") + (set_attr "mode" "TI")]) +@@ -22161,6 +22308,7 @@ (define_insn "avx512f_v8qiv8di2" + "TARGET_AVX512F" + "vpmovbq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") ++ (set_attr "c86_attr" "vpmovx") + (set_attr "prefix" "evex") + (set_attr "mode" "XI")]) + +@@ -22171,6 +22319,7 @@ (define_insn "*avx512f_v8qiv8di2_1" + "TARGET_AVX512F" + "vpmovbq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") ++ (set_attr "c86_attr" "vpmovx") + (set_attr "prefix" "evex") + (set_attr "mode" "XI")]) + +@@ -22218,6 +22367,7 @@ (define_insn "avx2_v4qiv4di2" + "TARGET_AVX2 && " + "vpmovbq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") ++ (set_attr "c86_attr" "vpmovx") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "maybe_evex") + (set_attr "mode" "OI")]) +@@ -22229,6 +22379,7 @@ (define_insn "*avx2_v4qiv4di2_1" + "TARGET_AVX2 && " + "vpmovbq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") ++ (set_attr "c86_attr" "vpmovx") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "maybe_evex") + (set_attr "mode" "OI")]) +@@ -22280,6 +22431,7 @@ (define_insn "sse4_1_v2qiv2di2" + "%vpmovbq\t{%1, %0|%0, %1}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") ++ (set_attr "c86_attr" "vpmovx") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,orig,maybe_evex") + (set_attr "mode" "TI")]) +@@ -22303,6 +22455,7 @@ (define_insn "avx512f_v8hiv8di2" + "TARGET_AVX512F" + "vpmovwq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") ++ (set_attr "c86_attr" "vpmovx") + (set_attr "prefix" "evex") + (set_attr "mode" "XI")]) + +@@ -22322,6 +22475,7 @@ (define_insn "avx2_v4hiv4di2" + "TARGET_AVX2 && " + "vpmovwq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") ++ (set_attr "c86_attr" "vpmovx") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "maybe_evex") + (set_attr "mode" "OI")]) +@@ -22333,6 +22487,7 @@ (define_insn "*avx2_v4hiv4di2_1" + "TARGET_AVX2 && " + "vpmovwq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") ++ (set_attr "c86_attr" "vpmovx") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "maybe_evex") + (set_attr "mode" "OI")]) +@@ -22380,6 +22535,7 @@ (define_insn "sse4_1_v2hiv2di2" + "%vpmovwq\t{%1, %0|%0, %1}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") ++ (set_attr "c86_attr" "vpmovx") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,orig,maybe_evex") + (set_attr "mode" "TI")]) +@@ -22392,6 +22548,7 @@ (define_insn "*sse4_1_v2hiv2di2_1" + "%vpmovwq\t{%1, %0|%0, %1}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") ++ (set_attr "c86_attr" "vpmovx") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,orig,maybe_evex") + (set_attr "mode" "TI")]) +@@ -22439,6 +22596,7 @@ (define_insn "avx512f_v8siv8di2" + "TARGET_AVX512F" + "vpmovdq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") ++ (set_attr "c86_attr" "vpmovx") + (set_attr "prefix" "evex") + (set_attr "mode" "XI")]) + +@@ -22490,6 +22648,7 @@ (define_insn "avx2_v4siv4di2" + "TARGET_AVX2 && " + "vpmovdq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") ++ (set_attr "c86_attr" "vpmovx") + (set_attr "prefix" "maybe_evex") + (set_attr "prefix_extra" "1") + (set_attr "mode" "OI")]) +@@ -22545,6 +22704,7 @@ (define_insn "sse4_1_v2siv2di2" + "%vpmovdq\t{%1, %0|%0, %1}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") ++ (set_attr "c86_attr" "vpmovx") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,orig,maybe_evex") + (set_attr "mode" "TI")]) +@@ -22557,6 +22717,7 @@ (define_insn "*sse4_1_v2siv2di2_1" + "%vpmovdq\t{%1, %0|%0, %1}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") ++ (set_attr "c86_attr" "vpmovx") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,orig,maybe_evex") + (set_attr "mode" "TI")]) +@@ -22823,6 +22984,7 @@ (define_insn "sse4_1_round" + vrndscale\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "noavx,noavx,avx,avx512f") + (set_attr "type" "ssecvt") ++ (set_attr "c86_attr" "aes") + (set_attr "length_immediate" "1") + (set_attr "prefix_data16" "1,1,*,*") + (set_attr "prefix_extra" "1") +@@ -22847,6 +23009,7 @@ (define_insn "*sse4_1_round" + vrndscale\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "noavx,noavx,avx,avx512f") + (set_attr "type" "ssecvt") ++ (set_attr "c86_attr" "aes") + (set_attr "length_immediate" "1") + (set_attr "prefix_data16" "1,1,*,*") + (set_attr "prefix_extra" "1") +@@ -23021,6 +23184,7 @@ (define_insn "sse4_2_pcmpestri" + "TARGET_SSE4_2" + "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}" + [(set_attr "type" "sselog") ++ (set_attr "c86_attr" "cmpestr") + (set_attr "prefix_data16" "1") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "maybe_vex") +@@ -23049,6 +23213,7 @@ (define_insn "sse4_2_pcmpestrm" + "TARGET_SSE4_2" + "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}" + [(set_attr "type" "sselog") ++ (set_attr "c86_attr" "cmpestr") + (set_attr "prefix_data16" "1") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") +@@ -23075,6 +23240,7 @@ (define_insn "sse4_2_pcmpestr_cconly" + %vpcmpestri\t{%6, %4, %2|%2, %4, %6} + %vpcmpestri\t{%6, %4, %2|%2, %4, %6}" + [(set_attr "type" "sselog") ++ (set_attr "c86_attr" "cmpestr") + (set_attr "prefix_data16" "1") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") +@@ -23150,6 +23316,7 @@ (define_insn "sse4_2_pcmpistri" + "TARGET_SSE4_2" + "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}" + [(set_attr "type" "sselog") ++ (set_attr "c86_attr" "cmpestr") + (set_attr "prefix_data16" "1") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") +@@ -23174,6 +23341,7 @@ (define_insn "sse4_2_pcmpistrm" + "TARGET_SSE4_2" + "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}" + [(set_attr "type" "sselog") ++ (set_attr "c86_attr" "cmpestr") + (set_attr "prefix_data16" "1") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") +@@ -23198,6 +23366,7 @@ (define_insn "sse4_2_pcmpistr_cconly" + %vpcmpistri\t{%4, %3, %2|%2, %3, %4} + %vpcmpistri\t{%4, %3, %2|%2, %3, %4}" + [(set_attr "type" "sselog") ++ (set_attr "c86_attr" "cmpestr") + (set_attr "prefix_data16" "1") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") +@@ -23609,7 +23778,8 @@ (define_insn "xop_phaddbw" + (const_int 13) (const_int 15)])))))] + "TARGET_XOP" + "vphaddbw\t{%1, %0|%0, %1}" +- [(set_attr "type" "sseiadd1")]) ++ [(set_attr "type" "sseiadd1") ++ (set_attr "c86_attr" "hplus")]) + + (define_insn "xop_phaddbd" + [(set (match_operand:V4SI 0 "register_operand" "=x") +@@ -23638,7 +23808,8 @@ (define_insn "xop_phaddbd" + (const_int 11) (const_int 15)]))))))] + "TARGET_XOP" + "vphaddbd\t{%1, %0|%0, %1}" +- [(set_attr "type" "sseiadd1")]) ++ [(set_attr "type" "sseiadd1") ++ (set_attr "c86_attr" "hplus")]) + + (define_insn "xop_phaddbq" + [(set (match_operand:V2DI 0 "register_operand" "=x") +@@ -23683,7 +23854,8 @@ (define_insn "xop_phaddbq" + (parallel [(const_int 7) (const_int 15)])))))))] + "TARGET_XOP" + "vphaddbq\t{%1, %0|%0, %1}" +- [(set_attr "type" "sseiadd1")]) ++ [(set_attr "type" "sseiadd1") ++ (set_attr "c86_attr" "hplus")]) + + (define_insn "xop_phaddwd" + [(set (match_operand:V4SI 0 "register_operand" "=x") +@@ -23700,7 +23872,8 @@ (define_insn "xop_phaddwd" + (const_int 5) (const_int 7)])))))] + "TARGET_XOP" + "vphaddwd\t{%1, %0|%0, %1}" +- [(set_attr "type" "sseiadd1")]) ++ [(set_attr "type" "sseiadd1") ++ (set_attr "c86_attr" "hplus")]) + + (define_insn "xop_phaddwq" + [(set (match_operand:V2DI 0 "register_operand" "=x") +@@ -23725,7 +23898,8 @@ (define_insn "xop_phaddwq" + (parallel [(const_int 3) (const_int 7)]))))))] + "TARGET_XOP" + "vphaddwq\t{%1, %0|%0, %1}" +- [(set_attr "type" "sseiadd1")]) ++ [(set_attr "type" "sseiadd1") ++ (set_attr "c86_attr" "hplus")]) + + (define_insn "xop_phadddq" + [(set (match_operand:V2DI 0 "register_operand" "=x") +@@ -23740,7 +23914,8 @@ (define_insn "xop_phadddq" + (parallel [(const_int 1) (const_int 3)])))))] + "TARGET_XOP" + "vphadddq\t{%1, %0|%0, %1}" +- [(set_attr "type" "sseiadd1")]) ++ [(set_attr "type" "sseiadd1") ++ (set_attr "c86_attr" "hplus")]) + + (define_insn "xop_phsubbw" + [(set (match_operand:V8HI 0 "register_operand" "=x") +@@ -23761,7 +23936,8 @@ (define_insn "xop_phsubbw" + (const_int 13) (const_int 15)])))))] + "TARGET_XOP" + "vphsubbw\t{%1, %0|%0, %1}" +- [(set_attr "type" "sseiadd1")]) ++ [(set_attr "type" "sseiadd1") ++ (set_attr "c86_attr" "hplus")]) + + (define_insn "xop_phsubwd" + [(set (match_operand:V4SI 0 "register_operand" "=x") +@@ -23778,7 +23954,8 @@ (define_insn "xop_phsubwd" + (const_int 5) (const_int 7)])))))] + "TARGET_XOP" + "vphsubwd\t{%1, %0|%0, %1}" +- [(set_attr "type" "sseiadd1")]) ++ [(set_attr "type" "sseiadd1") ++ (set_attr "c86_attr" "hplus")]) + + (define_insn "xop_phsubdq" + [(set (match_operand:V2DI 0 "register_operand" "=x") +@@ -23793,7 +23970,8 @@ (define_insn "xop_phsubdq" + (parallel [(const_int 1) (const_int 3)])))))] + "TARGET_XOP" + "vphsubdq\t{%1, %0|%0, %1}" +- [(set_attr "type" "sseiadd1")]) ++ [(set_attr "type" "sseiadd1") ++ (set_attr "c86_attr" "hplus")]) + + ;; XOP permute instructions + (define_insn "xop_pperm" +@@ -24575,6 +24753,7 @@ (define_insn "aesenc" + vaesenc\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sselog1") ++ (set_attr "c86_attr" "aes") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,vex") + (set_attr "btver2_decode" "double,double") +@@ -24591,6 +24770,7 @@ (define_insn "aesenclast" + vaesenclast\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sselog1") ++ (set_attr "c86_attr" "aes") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,vex") + (set_attr "btver2_decode" "double,double") +@@ -24607,6 +24787,7 @@ (define_insn "aesdec" + vaesdec\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sselog1") ++ (set_attr "c86_attr" "aes") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,vex") + (set_attr "btver2_decode" "double,double") +@@ -24623,6 +24804,7 @@ (define_insn "aesdeclast" + vaesdeclast\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sselog1") ++ (set_attr "c86_attr" "aes") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,vex") + (set_attr "btver2_decode" "double,double") +@@ -24635,6 +24817,7 @@ (define_insn "aesimc" + "TARGET_AES" + "%vaesimc\t{%1, %0|%0, %1}" + [(set_attr "type" "sselog1") ++ (set_attr "c86_attr" "aes") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "TI")]) +@@ -24647,6 +24830,7 @@ (define_insn "aeskeygenassist" + "TARGET_AES" + "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sselog1") ++ (set_attr "c86_attr" "aes") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "maybe_vex") +@@ -24783,6 +24967,7 @@ (define_insn "_permvar" + return "vperm\t{%1, %2, %0|%0, %2, %1}"; + } + [(set_attr "type" "sselog") ++ (set_attr "c86_attr" "perm") + (set_attr "prefix" "") + (set_attr "mode" "")]) + +@@ -24795,6 +24980,7 @@ (define_insn "_permvar" + "TARGET_AVX512VBMI && " + "vperm\t{%1, %2, %0|%0, %2, %1}" + [(set_attr "type" "sselog") ++ (set_attr "c86_attr" "perm") + (set_attr "prefix" "") + (set_attr "mode" "")]) + +@@ -24807,6 +24993,7 @@ (define_insn "_permvar" + "TARGET_AVX512BW && " + "vperm\t{%1, %2, %0|%0, %2, %1}" + [(set_attr "type" "sselog") ++ (set_attr "c86_attr" "perm") + (set_attr "prefix" "") + (set_attr "mode" "")]) + +@@ -25025,6 +25212,7 @@ (define_insn "avx2_perm_1" + return "vperm\t{%2, %1, %0|%0, %1, %2}"; + } + [(set_attr "type" "sselog") ++ (set_attr "c86_attr" "perm") + (set_attr "prefix" "") + (set_attr "mode" "")]) + +@@ -25100,6 +25288,7 @@ (define_insn "avx512f_perm_1" + return "vperm\t{%2, %1, %0|%0, %1, %2}"; + } + [(set_attr "type" "sselog") ++ (set_attr "c86_attr" "perm") + (set_attr "prefix" "") + (set_attr "mode" "")]) + +@@ -25182,6 +25371,7 @@ (define_insn "avx512f_broadcast" + vshuf32x4\t{$0x0, %g1, %g1, %0|%0, %g1, %g1, 0x0} + vbroadcast32x4\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") ++ (set_attr "c86_attr" "shufx,*") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + +@@ -25194,6 +25384,7 @@ (define_insn "avx512f_broadcast" + vshuf64x2\t{$0x44, %g1, %g1, %0|%0, %g1, %g1, 0x44} + vbroadcast64x4\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") ++ (set_attr "c86_attr" "shufx,*") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + +@@ -25388,6 +25579,7 @@ (define_insn "avx_vbroadcastf128_" + vinsert32x4\t{$1, %1, %0, %0|%0, %0, %1, 1}" + [(set_attr "isa" "*,*,*,avx512dq,avx512dq,avx512vl,avx512vl") + (set_attr "type" "ssemov,sselog1,sselog1,ssemov,sselog1,ssemov,sselog1") ++ (set_attr "c86_attr" "*,insertx,*,*,insertx,*,insertx") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "0,1,1,0,1,0,1") + (set_attr "prefix" "vex,vex,vex,evex,evex,evex,evex") +@@ -25427,6 +25619,7 @@ (define_insn "avx512vl_broadcast_1" + vshuf32x4\t{$0x0, %t1, %t1, %0|%0, %t1, %t1, 0x0} + vbroadcast32x4\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") ++ (set_attr "c86_attr" "shufx,*") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) +@@ -25440,6 +25633,7 @@ (define_insn "avx512dq_broadcast_1" + vshuf32x4\t{$0x44, %g1, %g1, %0|%0, %g1, %g1, 0x44} + vbroadcast32x8\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") ++ (set_attr "c86_attr" "shufx,*") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) +@@ -25457,6 +25651,7 @@ (define_insn "avx512dq_broadcast_1" + vshuf64x2\t{$0x0, %1, %1, %0|%0, %1, %1, 0x0} + vbroadcast64x2\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") ++ (set_attr "c86_attr" "shufx,*") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) +@@ -25546,6 +25741,7 @@ (define_insn "*_vpermi2var3_mask" + "TARGET_AVX512F" + "vpermi2\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}" + [(set_attr "type" "sselog") ++ (set_attr "c86_attr" "perm2") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + +@@ -25562,6 +25758,7 @@ (define_insn "*_vpermi2var3_mask" + "TARGET_AVX512F" + "vpermi2\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}" + [(set_attr "type" "sselog") ++ (set_attr "c86_attr" "perm2") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + +@@ -25591,6 +25788,7 @@ (define_insn "_vpermt2var3" + vpermt2\t{%3, %1, %0|%0, %1, %3} + vpermi2\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "type" "sselog") ++ (set_attr "c86_attr" "perm2") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + +@@ -25607,6 +25805,7 @@ (define_insn "_vpermt2var3_mask" + "TARGET_AVX512F" + "vpermt2\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}" + [(set_attr "type" "sselog") ++ (set_attr "c86_attr" "perm2") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + +@@ -25682,6 +25881,7 @@ (define_insn "*avx_vperm2f128_nozero" + return "vperm2\t{%3, %2, %1, %0|%0, %1, %2, %3}"; + } + [(set_attr "type" "sselog") ++ (set_attr "c86_attr" "insertx") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "vex") +@@ -25786,6 +25986,7 @@ (define_insn "vec_set_lo_" + return "vinsert\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"; + } + [(set_attr "type" "sselog") ++ (set_attr "c86_attr" "insertx") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "vex") +@@ -25808,6 +26009,7 @@ (define_insn "vec_set_hi_" + return "vinsert\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"; + } + [(set_attr "type" "sselog") ++ (set_attr "c86_attr" "insertx") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "vex") +@@ -25829,6 +26031,7 @@ (define_insn "vec_set_lo_" + return "vinsert\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"; + } + [(set_attr "type" "sselog") ++ (set_attr "c86_attr" "insertx") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "vex") +@@ -25850,6 +26053,7 @@ (define_insn "vec_set_hi_" + return "vinsert\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"; + } + [(set_attr "type" "sselog") ++ (set_attr "c86_attr" "insertx") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "vex") +@@ -25870,6 +26074,7 @@ (define_insn "vec_set_lo_" + vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0} + vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" + [(set_attr "type" "sselog") ++ (set_attr "c86_attr" "insertx") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "vex,evex") +@@ -25890,6 +26095,7 @@ (define_insn "vec_set_hi_" + vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1} + vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" + [(set_attr "type" "sselog") ++ (set_attr "c86_attr" "insertx") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "vex,evex") +@@ -25914,6 +26120,7 @@ (define_insn "vec_set_lo_v32qi" + vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0} + vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" + [(set_attr "type" "sselog") ++ (set_attr "c86_attr" "insertx") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "vex,evex") +@@ -25938,6 +26145,7 @@ (define_insn "vec_set_hi_v32qi" + vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1} + vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" + [(set_attr "type" "sselog") ++ (set_attr "c86_attr" "insertx") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "vex,evex") +@@ -25967,6 +26175,7 @@ (define_insn "_maskstore" + "TARGET_AVX" + "vmaskmov\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sselog1") ++ (set_attr "c86_attr" "blend") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "vex") + (set_attr "btver2_decode" "vector") +@@ -26316,6 +26525,7 @@ (define_insn "avx_vec_concat" + } + } + [(set_attr "type" "sselog,sselog,ssemov,ssemov") ++ (set_attr "c86_attr" "insertx,insertx,*,*") + (set_attr "prefix_extra" "1,1,*,*") + (set_attr "length_immediate" "1,1,*,*") + (set_attr "prefix" "maybe_evex") +@@ -26894,6 +27104,7 @@ (define_insn "_compress_mask" + "TARGET_AVX512F" + "vcompress\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" + [(set_attr "type" "ssemov") ++ (set_attr "c86_attr" "compress") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + +@@ -26907,6 +27118,7 @@ (define_insn "compress_mask" + "TARGET_AVX512VBMI2" + "vpcompress\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" + [(set_attr "type" "ssemov") ++ (set_attr "c86_attr" "compress") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + +@@ -26920,6 +27132,7 @@ (define_insn "_compressstore_mask" + "TARGET_AVX512F" + "vcompress\t{%1, %0%{%2%}|%0%{%2%}, %1}" + [(set_attr "type" "ssemov") ++ (set_attr "c86_attr" "compress") + (set_attr "prefix" "evex") + (set_attr "memory" "store") + (set_attr "mode" "")]) +@@ -26934,6 +27147,7 @@ (define_insn "compressstore_mask" + "TARGET_AVX512VBMI2" + "vpcompress\t{%1, %0%{%2%}|%0%{%2%}, %1}" + [(set_attr "type" "ssemov") ++ (set_attr "c86_attr" "compress") + (set_attr "prefix" "evex") + (set_attr "memory" "store") + (set_attr "mode" "")]) +@@ -26958,6 +27172,7 @@ (define_insn "expand_mask" + "TARGET_AVX512F" + "vexpand\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" + [(set_attr "type" "ssemov") ++ (set_attr "c86_attr" "expand") + (set_attr "prefix" "evex") + (set_attr "memory" "none,load") + (set_attr "mode" "")]) +@@ -26972,6 +27187,7 @@ (define_insn "expand_mask" + "TARGET_AVX512VBMI2" + "vexpand\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" + [(set_attr "type" "ssemov") ++ (set_attr "c86_attr" "expand") + (set_attr "prefix" "evex") + (set_attr "memory" "none,load") + (set_attr "mode" "")]) +@@ -27164,6 +27380,7 @@ (define_insn "avx512bw_dbpsadbw" + "TARGET_AVX512BW" + "vdbpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "sselog1") ++ (set_attr "c86_attr" "sadbw") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) +@@ -27175,6 +27392,7 @@ (define_insn "clz2" + "TARGET_AVX512CD" + "vplzcnt\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") ++ (set_attr "c86_attr" "abs") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + +@@ -27351,6 +27569,7 @@ (define_insn "vpamdd52" + "TARGET_AVX512IFMA" + "vpmadd52\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "type" "ssemuladd") ++ (set_attr "c86_attr" "madd") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + +@@ -27367,6 +27586,7 @@ (define_insn "vpamdd52_mask" + "TARGET_AVX512IFMA" + "vpmadd52\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}" + [(set_attr "type" "ssemuladd") ++ (set_attr "c86_attr" "madd") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + +@@ -28223,7 +28443,7 @@ (define_insn "vaesdec_" + UNSPEC_VAESDEC))] + "TARGET_VAES" + "vaesdec\t{%2, %1, %0|%0, %1, %2}" +-) ++ [(set_attr "c86_attr" "aes")]) + + (define_insn "vaesdeclast_" + [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v") +@@ -28233,7 +28453,7 @@ (define_insn "vaesdeclast_" + UNSPEC_VAESDECLAST))] + "TARGET_VAES" + "vaesdeclast\t{%2, %1, %0|%0, %1, %2}" +-) ++ [(set_attr "c86_attr" "aes")]) + + (define_insn "vaesenc_" + [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v") +@@ -28243,7 +28463,7 @@ (define_insn "vaesenc_" + UNSPEC_VAESENC))] + "TARGET_VAES" + "vaesenc\t{%2, %1, %0|%0, %1, %2}" +-) ++ [(set_attr "c86_attr" "aes")]) + + (define_insn "vaesenclast_" + [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v") +@@ -28253,7 +28473,7 @@ (define_insn "vaesenclast_" + UNSPEC_VAESENCLAST))] + "TARGET_VAES" + "vaesenclast\t{%2, %1, %0|%0, %1, %2}" +-) ++ [(set_attr "c86_attr" "aes")]) + + (define_insn "vpclmulqdq_" + [(set (match_operand:VI8_FVL 0 "register_operand" "=v") +diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h +index c67e002809d..d2cc3ff2f47 100644 +--- a/gcc/config/i386/x86-tune-costs.h ++++ b/gcc/config/i386/x86-tune-costs.h +@@ -3808,3 +3808,279 @@ struct processor_costs core_cost = { + "16", /* Func alignment. */ + }; + ++/* C86_4G_M4 has optimized REP instruction for medium sized blocks, but for ++ very small blocks it is better to use loop. For large blocks, libcall ++ can do nontemporary accesses and beat inline considerably. */ ++static stringop_algs c86_4g_m4_memcpy[2] = { ++ /* 32-bit tuning. */ ++ {libcall, {{6, loop, false}, ++ {14, unrolled_loop, false}, ++ {-1, libcall, false}}}, ++ /* 64-bit tuning. */ ++ {libcall, {{16, loop, false}, ++ {128, rep_prefix_8_byte, false}, ++ {-1, libcall, false}}}}; ++static stringop_algs c86_4g_m4_memset[2] = { ++ /* 32-bit tuning. */ ++ {libcall, {{8, loop, false}, ++ {24, unrolled_loop, false}, ++ {128, rep_prefix_4_byte, false}, ++ {-1, libcall, false}}}, ++ /* 64-bit tuning. */ ++ {libcall, {{48, unrolled_loop, false}, ++ {128, rep_prefix_8_byte, false}, ++ {-1, libcall, false}}}}; ++static const ++struct processor_costs c86_4g_m4_cost = { ++ { ++ /* Start of register allocator costs. integer->integer move cost is 2. */ ++ ++ /* reg-reg moves are done by renaming and thus they are even cheaper than ++ 1 cycle. Because reg-reg move cost is 2 and the following tables ++ correspond to doubles of latencies, we do not model this correctly. ++ It does not seem to make practical difference to bump prices up even ++ more. */ ++ 6, /* cost for loading QImode using ++ movzbl. */ ++ {6, 6, 6}, /* cost of loading integer registers ++ in QImode, HImode and SImode. ++ Relative to reg-reg move (2). */ ++ {8, 8, 8}, /* cost of storing integer ++ registers. */ ++ 2, /* cost of reg,reg fld/fst. */ ++ {6, 6, 16}, /* cost of loading fp registers ++ in SFmode, DFmode and XFmode. */ ++ {8, 8, 16}, /* cost of storing fp registers ++ in SFmode, DFmode and XFmode. */ ++ 2, /* cost of moving MMX register. */ ++ {6, 6}, /* cost of loading MMX registers ++ in SImode and DImode. */ ++ {8, 8}, /* cost of storing MMX registers ++ in SImode and DImode. */ ++ 2, 3, 6, /* cost of moving XMM,YMM,ZMM ++ register. */ ++ {6, 6, 6, 12, 24}, /* cost of loading SSE registers ++ in 32,64,128,256 and 512-bit. */ ++ {8, 8, 8, 16, 32}, /* cost of storing SSE registers ++ in 32,64,128,256 and 512-bit. */ ++ 6, 6, /* SSE->integer and integer->SSE ++ moves. */ ++ 8, 8, /* mask->integer and integer->mask ++ moves. */ ++ {6, 6, 6}, /* cost of loading mask register ++ in QImode, HImode, SImode. */ ++ {8, 8, 8}, /* cost if storing mask register ++ in QImode, HImode, SImode. */ ++ 2, /* cost of moving mask register. */ ++ /* End of register allocator costs. */ ++ }, ++ ++ COSTS_N_INSNS (1), /* cost of an add instruction. */ ++ COSTS_N_INSNS (1), /* cost of a lea instruction. */ ++ COSTS_N_INSNS (1), /* variable shift costs. */ ++ COSTS_N_INSNS (1), /* constant shift costs. */ ++ {COSTS_N_INSNS (3), /* cost of starting multiply for QI. */ ++ COSTS_N_INSNS (3), /* HI. */ ++ COSTS_N_INSNS (3), /* SI. */ ++ COSTS_N_INSNS (3), /* DI. */ ++ COSTS_N_INSNS (3)}, /* other. */ ++ 0, /* cost of multiply per each bit ++ set. */ ++ /* Depending on parameters, idiv can get faster on HYGON. This is upper ++ bound. */ ++ {COSTS_N_INSNS (16), /* cost of a divide/mod for QI. */ ++ COSTS_N_INSNS (22), /* HI. */ ++ COSTS_N_INSNS (30), /* SI. */ ++ COSTS_N_INSNS (45), /* DI. */ ++ COSTS_N_INSNS (45)}, /* other. */ ++ COSTS_N_INSNS (1), /* cost of movsx. */ ++ COSTS_N_INSNS (1), /* cost of movzx. */ ++ 8, /* "large" insn. */ ++ 9, /* MOVE_RATIO. */ ++ 6, /* CLEAR_RATIO. */ ++ {6, 6, 6}, /* cost of loading integer registers ++ in QImode, HImode and SImode. ++ Relative to reg-reg move (2). */ ++ {8, 8, 8}, /* cost of storing integer ++ registers. */ ++ {6, 6, 6, 12, 24}, /* cost of loading SSE register ++ in 32bit, 64bit, 128bit, 256bit ++ and 512bit. */ ++ {8, 8, 8, 16, 32}, /* cost of storing SSE register ++ in 32bit, 64bit, 128bit, 256bit ++ and 512bit. */ ++ {6, 6, 6, 12, 24}, /* cost of unaligned loads. */ ++ {8, 8, 8, 16, 32}, /* cost of unaligned stores. */ ++ 2, 3, 6, /* cost of moving XMM,YMM,ZMM ++ register. */ ++ 6, /* cost of moving SSE register to ++ integer. */ ++ ++ 18, 8, /* Gather load static, per_elt. */ ++ 18, 10, /* Gather store static, per_elt. */ ++ 32, /* size of l1 cache. */ ++ 512, /* size of l2 cache. */ ++ 64, /* size of prefetch block. */ ++ /* C86_4G_M4 processors never drop prefetches; if they cannot be performed ++ immediately, they are queued. We set number of simultaneous prefetches ++ to a large constant to reflect this (it probably is not a good idea not ++ to limit number of prefetches at all, as their execution also takes some ++ time). */ ++ 100, /* number of parallel prefetches. */ ++ 3, /* Branch cost. */ ++ COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */ ++ COSTS_N_INSNS (5), /* cost of FMUL instruction. */ ++ ++ COSTS_N_INSNS (15), /* cost of FDIV instruction. */ ++ COSTS_N_INSNS (1), /* cost of FABS instruction. */ ++ COSTS_N_INSNS (1), /* cost of FCHS instruction. */ ++ ++ COSTS_N_INSNS (10), /* cost of FSQRT instruction. */ ++ ++ COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ ++ COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */ ++ COSTS_N_INSNS (3), /* cost of MULSS instruction. */ ++ COSTS_N_INSNS (4), /* cost of MULSD instruction. */ ++ COSTS_N_INSNS (5), /* cost of FMA SS instruction. */ ++ COSTS_N_INSNS (5), /* cost of FMA SD instruction. */ ++ COSTS_N_INSNS (10), /* cost of DIVSS instruction. */ ++ ++ COSTS_N_INSNS (13), /* cost of DIVSD instruction. */ ++ COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */ ++ COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */ ++ ++ 4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */ ++ c86_4g_m4_memcpy, ++ c86_4g_m4_memset, ++ COSTS_N_INSNS (4), /* cond_taken_branch_cost. */ ++ COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */ ++ "16", /* Loop alignment. */ ++ "16", /* Jump alignment. */ ++ "0:0:8", /* Label alignment. */ ++ "16", /* Func alignment. */ ++}; ++ ++struct processor_costs c86_4g_m6_cost = c86_4g_m4_cost; ++ ++struct processor_costs c86_4g_m7_cost = { ++ { ++ /* Start of register allocator costs. integer->integer move cost is 2. */ ++ ++ /* reg-reg moves are done by renaming and thus they are even cheaper than ++ 1 cycle. Because reg-reg move cost is 2 and following tables correspond ++ to doubles of latencies, we do not model this correctly. It does not ++ seem to make practical difference to bump prices up even more. */ ++ 6, /* cost for loading QImode using ++ movzbl. */ ++ {6, 6, 6}, /* cost of loading integer registers ++ in QImode, HImode and SImode. ++ Relative to reg-reg move (2). */ ++ {8, 8, 8}, /* cost of storing integer ++ registers. */ ++ 2, /* cost of reg,reg fld/fst. */ ++ {14, 14, 17}, /* cost of loading fp registers ++ in SFmode, DFmode and XFmode. */ ++ {12, 12, 16}, /* cost of storing fp registers ++ in SFmode, DFmode and XFmode. */ ++ 2, /* cost of moving MMX register. */ ++ {6, 6}, /* cost of loading MMX registers ++ in SImode and DImode. */ ++ {8, 8}, /* cost of storing MMX registers ++ in SImode and DImode. */ ++ 2, 2, 3, /* cost of moving XMM,YMM,ZMM ++ register. */ ++ {6, 6, 10, 10, 12}, /* cost of loading SSE registers ++ in 32,64,128,256 and 512-bit. */ ++ {8, 8, 8, 12, 12}, /* cost of storing SSE registers ++ in 32,64,128,256 and 512-bit. */ ++ 6, 8, /* SSE->integer and integer->SSE ++ moves. */ ++ 8, 8, /* mask->integer and integer->mask ++ moves. */ ++ {6, 6, 6}, /* cost of loading mask register ++ in QImode, HImode, SImode. */ ++ {8, 8, 8}, /* cost if storing mask register ++ in QImode, HImode, SImode. */ ++ 2, /* cost of moving mask register. */ ++ /* End of register allocator costs. */ ++ }, ++ ++ COSTS_N_INSNS (1), /* cost of an add instruction. */ ++ ++ COSTS_N_INSNS (1), /* cost of a lea instruction. */ ++ COSTS_N_INSNS (1), /* variable shift costs. */ ++ COSTS_N_INSNS (1), /* constant shift costs. */ ++ {COSTS_N_INSNS (3), /* cost of starting multiply for QI. */ ++ COSTS_N_INSNS (3), /* HI. */ ++ COSTS_N_INSNS (3), /* SI. */ ++ COSTS_N_INSNS (3), /* DI. */ ++ COSTS_N_INSNS (3)}, /* other. */ ++ 0, /* cost of multiply per each bit ++ set. */ ++ {COSTS_N_INSNS (15), /* cost of a divide/mod for QI. */ ++ COSTS_N_INSNS (17), /* HI. */ ++ COSTS_N_INSNS (25), /* SI. */ ++ COSTS_N_INSNS (41), /* DI. */ ++ COSTS_N_INSNS (41)}, /* other. */ ++ COSTS_N_INSNS (1), /* cost of movsx. */ ++ COSTS_N_INSNS (1), /* cost of movzx. */ ++ 8, /* "large" insn. */ ++ 9, /* MOVE_RATIO. */ ++ 6, /* CLEAR_RATIO. */ ++ {6, 6, 6}, /* cost of loading integer registers ++ in QImode, HImode and SImode. ++ Relative to reg-reg move (2). */ ++ {8, 8, 8}, /* cost of storing integer ++ registers. */ ++ {6, 6, 10, 10, 12}, /* cost of loading SSE registers ++ in 32bit, 64bit, 128bit, 256bit ++ and 512bit. */ ++ {8, 8, 8, 12, 12}, /* cost of storing SSE register ++ in 32bit, 64bit, 128bit, 256bit and ++ 512bit. */ ++ {6, 6, 10, 10, 12}, /* cost of unaligned loads. */ ++ {8, 8, 8, 12, 12}, /* cost of unaligned stores. */ ++ 2, 2, 3, /* cost of moving XMM,YMM,ZMM ++ register. */ ++ 6, /* cost of moving SSE register to ++ integer. */ ++ ++ 14, 10, /* Gather load static, per_elt. */ ++ 14, 20, /* Gather store static, per_elt. */ ++ 32, /* size of l1 cache. */ ++ 512, /* size of l2 cache. */ ++ 64, /* size of prefetch block. */ ++ ++ 100, /* number of parallel prefetches. */ ++ 3, /* Branch cost. */ ++ COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */ ++ COSTS_N_INSNS (5), /* cost of FMUL instruction. */ ++ ++ COSTS_N_INSNS (15), /* cost of FDIV instruction. */ ++ COSTS_N_INSNS (1), /* cost of FABS instruction. */ ++ COSTS_N_INSNS (1), /* cost of FCHS instruction. */ ++ ++ COSTS_N_INSNS (22), /* cost of FSQRT instruction. */ ++ ++ COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ ++ COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */ ++ COSTS_N_INSNS (3), /* cost of MULSS instruction. */ ++ COSTS_N_INSNS (3), /* cost of MULSD instruction. */ ++ COSTS_N_INSNS (4), /* cost of FMA SS instruction. */ ++ COSTS_N_INSNS (4), /* cost of FMA SD instruction. */ ++ COSTS_N_INSNS (13), /* cost of DIVSS instruction. */ ++ ++ COSTS_N_INSNS (10), /* cost of DIVSD instruction. */ ++ COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */ ++ COSTS_N_INSNS (20), /* cost of SQRTSD instruction. */ ++ ++ 4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */ ++ c86_4g_m4_memcpy, ++ c86_4g_m4_memset, ++ COSTS_N_INSNS (4), /* cond_taken_branch_cost. */ ++ COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */ ++ "16", /* Loop alignment. */ ++ "16", /* Jump alignment. */ ++ "0:0:8", /* Label alignment. */ ++ "16", /* Func alignment. */ ++}; +diff --git a/gcc/config/i386/x86-tune-sched.cc b/gcc/config/i386/x86-tune-sched.cc +index 13b1ba43fe3..a02abdef3f1 100644 +--- a/gcc/config/i386/x86-tune-sched.cc ++++ b/gcc/config/i386/x86-tune-sched.cc +@@ -78,6 +78,9 @@ ix86_issue_rate (void) + case PROCESSOR_YONGFENG: + case PROCESSOR_SHIJIDADAO: + case PROCESSOR_GENERIC: ++ case PROCESSOR_C86_4G_M4: ++ case PROCESSOR_C86_4G_M6: ++ case PROCESSOR_C86_4G_M7: + return 4; + + default: +@@ -404,6 +407,9 @@ ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost, + case PROCESSOR_ZNVER2: + case PROCESSOR_ZNVER3: + case PROCESSOR_ZNVER4: ++ case PROCESSOR_C86_4G_M4: ++ case PROCESSOR_C86_4G_M6: ++ case PROCESSOR_C86_4G_M7: + /* Stack engine allows to execute push&pop instructions in parall. */ + if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP) + && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP)) +diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def +index c57fc972f67..31cdfdf1f9a 100644 +--- a/gcc/config/i386/x86-tune.def ++++ b/gcc/config/i386/x86-tune.def +@@ -42,7 +42,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + DEF_TUNE (X86_TUNE_SCHEDULE, "schedule", + m_PENT | m_LAKEMONT | m_PPRO | m_CORE_ALL | m_BONNELL | m_SILVERMONT + | m_INTEL | m_KNL | m_KNM | m_K6_GEODE | m_AMD_MULTIPLE | m_GOLDMONT +- | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC) ++ | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_C86_4G ++ | m_GENERIC) + + /* X86_TUNE_PARTIAL_REG_DEPENDENCY: Enable more register renaming + on modern chips. Prefer stores affecting whole integer register +@@ -52,7 +53,7 @@ DEF_TUNE (X86_TUNE_PARTIAL_REG_DEPENDENCY, "partial_reg_dependency", + m_P4_NOCONA | m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 + | m_BONNELL | m_SILVERMONT | m_GOLDMONT | m_GOLDMONT_PLUS | m_INTEL + | m_KNL | m_KNM | m_AMD_MULTIPLE | m_TREMONT | m_ALDERLAKE +- | m_ZHAOXIN | m_GENERIC) ++ | m_ZHAOXIN | m_C86_4G | m_GENERIC) + + /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: This knob promotes all store + destinations to be 128bit to allow register renaming on 128bit SSE units, +@@ -62,7 +63,8 @@ DEF_TUNE (X86_TUNE_PARTIAL_REG_DEPENDENCY, "partial_reg_dependency", + that can be partly masked by careful scheduling of moves. */ + DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY, "sse_partial_reg_dependency", + m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10 +- | m_BDVER | m_ZNVER | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC) ++ | m_BDVER | m_ZNVER | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_C86_4G ++ | m_GENERIC) + + /* X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY: This knob avoids + partial write to the destination in scalar SSE conversion from FP +@@ -70,14 +72,14 @@ DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY, "sse_partial_reg_dependency", + DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY, + "sse_partial_reg_fp_converts_dependency", + m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10 +- | m_BDVER | m_ZNVER | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC) ++ | m_BDVER | m_ZNVER | m_ALDERLAKE | m_ZHAOXIN | m_C86_4G | m_GENERIC) + + /* X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY: This knob avoids partial + write to the destination in scalar SSE conversion from integer to FP. */ + DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY, + "sse_partial_reg_converts_dependency", + m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10 +- | m_BDVER | m_ZNVER | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC) ++ | m_BDVER | m_ZNVER | m_ALDERLAKE | m_ZHAOXIN | m_C86_4G | m_GENERIC) + + /* X86_TUNE_DEST_FALSE_DEP_FOR_GLC: This knob inserts zero-idiom before + several insns to break false dependency on the dest register for GLC +@@ -109,32 +111,32 @@ DEF_TUNE (X86_TUNE_MOVX, "movx", + m_PPRO | m_P4_NOCONA | m_CORE2 | m_NEHALEM | m_SANDYBRIDGE + | m_BONNELL | m_SILVERMONT | m_GOLDMONT | m_KNL | m_KNM | m_INTEL + | m_GOLDMONT_PLUS | m_GEODE | m_AMD_MULTIPLE | m_CORE_AVX2 +- | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC) ++ | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_C86_4G | m_GENERIC) + + /* X86_TUNE_MEMORY_MISMATCH_STALL: Avoid partial stores that are followed by + full sized loads. */ + DEF_TUNE (X86_TUNE_MEMORY_MISMATCH_STALL, "memory_mismatch_stall", + m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL + | m_KNL | m_KNM | m_GOLDMONT | m_GOLDMONT_PLUS | m_AMD_MULTIPLE +- | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC) ++ | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_C86_4G | m_GENERIC) + + /* X86_TUNE_FUSE_CMP_AND_BRANCH_32: Fuse compare with a subsequent + conditional jump instruction for 32 bit TARGET. */ + DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_32, "fuse_cmp_and_branch_32", +- m_CORE_ALL | m_BDVER | m_ZNVER | m_ZHAOXIN | m_GENERIC) ++ m_CORE_ALL | m_BDVER | m_ZNVER | m_ZHAOXIN | m_C86_4G | m_GENERIC) + + /* X86_TUNE_FUSE_CMP_AND_BRANCH_64: Fuse compare with a subsequent + conditional jump instruction for TARGET_64BIT. */ + DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_64, "fuse_cmp_and_branch_64", + m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_BDVER +- | m_ZNVER | m_ZHAOXIN | m_GENERIC) ++ | m_ZNVER | m_ZHAOXIN | m_C86_4G | m_GENERIC) + + /* X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS: Fuse compare with a + subsequent conditional jump instruction when the condition jump + check sign flag (SF) or overflow flag (OF). */ + DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS, "fuse_cmp_and_branch_soflags", + m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_BDVER +- | m_ZNVER | m_ZHAOXIN | m_GENERIC) ++ | m_ZNVER | m_ZHAOXIN | m_C86_4G | m_GENERIC) + + /* X86_TUNE_FUSE_ALU_AND_BRANCH: Fuse alu with a subsequent conditional + jump instruction when the alu instruction produces the CCFLAG consumed by +@@ -172,14 +174,14 @@ DEF_TUNE (X86_TUNE_EPILOGUE_USING_MOVE, "epilogue_using_move", + /* X86_TUNE_USE_LEAVE: Use "leave" instruction in epilogues where it fits. */ + DEF_TUNE (X86_TUNE_USE_LEAVE, "use_leave", + m_386 | m_CORE_ALL | m_K6_GEODE | m_AMD_MULTIPLE | m_TREMONT +- | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC) ++ | m_ALDERLAKE | m_ZHAOXIN | m_C86_4G | m_GENERIC) + + /* X86_TUNE_PUSH_MEMORY: Enable generation of "push mem" instructions. + Some chips, like 486 and Pentium works faster with separate load + and push instructions. */ + DEF_TUNE (X86_TUNE_PUSH_MEMORY, "push_memory", + m_386 | m_P4_NOCONA | m_CORE_ALL | m_K6_GEODE | m_AMD_MULTIPLE +- | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC) ++ | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_C86_4G | m_GENERIC) + + /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred + over esp subtraction. */ +@@ -256,7 +258,8 @@ DEF_TUNE (X86_TUNE_USE_INCDEC, "use_incdec", + DEF_TUNE (X86_TUNE_INTEGER_DFMODE_MOVES, "integer_dfmode_moves", + ~(m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT + | m_KNL | m_KNM | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_GOLDMONT +- | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC)) ++ | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_C86_4G ++ | m_GENERIC)) + + /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag + will impact LEA instruction selection. */ +@@ -304,14 +307,14 @@ DEF_TUNE (X86_TUNE_PREFER_KNOWN_REP_MOVSB_STOSB, + DEF_TUNE (X86_TUNE_MISALIGNED_MOVE_STRING_PRO_EPILOGUES, + "misaligned_move_string_pro_epilogues", + m_386 | m_486 | m_CORE_ALL | m_AMD_MULTIPLE | m_ZHAOXIN | m_TREMONT +- | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC) ++ | m_ALDERLAKE | m_ZHAOXIN | m_C86_4G | m_GENERIC) + + /* X86_TUNE_USE_SAHF: Controls use of SAHF. */ + DEF_TUNE (X86_TUNE_USE_SAHF, "use_sahf", + m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT + | m_KNL | m_KNM | m_INTEL | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER + | m_BTVER | m_ZNVER | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT +- | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC) ++ | m_ALDERLAKE | m_ZHAOXIN | m_C86_4G | m_GENERIC) + + /* X86_TUNE_USE_CLTD: Controls use of CLTD and CTQO instructions. */ + DEF_TUNE (X86_TUNE_USE_CLTD, "use_cltd", +@@ -322,7 +325,7 @@ DEF_TUNE (X86_TUNE_USE_CLTD, "use_cltd", + DEF_TUNE (X86_TUNE_USE_BT, "use_bt", + m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL + | m_LAKEMONT | m_AMD_MULTIPLE | m_GOLDMONT | m_GOLDMONT_PLUS +- | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC) ++ | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_C86_4G | m_GENERIC) + + /* X86_TUNE_AVOID_FALSE_DEP_FOR_BMI: Avoid false dependency + for bit-manipulation instructions. */ +@@ -343,7 +346,8 @@ DEF_TUNE (X86_TUNE_ONE_IF_CONV_INSN, "one_if_conv_insn", + + /* X86_TUNE_AVOID_MFENCE: Use lock prefixed instructions instead of mfence. */ + DEF_TUNE (X86_TUNE_AVOID_MFENCE, "avoid_mfence", +- m_CORE_ALL | m_BDVER | m_ZNVER | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC) ++ m_CORE_ALL | m_BDVER | m_ZNVER | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN ++ | m_C86_4G | m_GENERIC) + + /* X86_TUNE_EXPAND_ABS: This enables a new abs pattern by + generating instructions for abs (x) = (((signed) x >> (W-1) ^ x) - +@@ -368,10 +372,11 @@ DEF_TUNE (X86_TUNE_USE_SIMODE_FIOP, "use_simode_fiop", + ~(m_PENT | m_LAKEMONT | m_PPRO | m_CORE_ALL | m_BONNELL + | m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_AMD_MULTIPLE + | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE +- | m_ZHAOXIN | m_GENERIC)) ++ | m_ZHAOXIN | m_C86_4G | m_GENERIC)) + + /* X86_TUNE_USE_FFREEP: Use freep instruction instead of fstp. */ +-DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE | m_ZHAOXIN) ++DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE | m_ZHAOXIN ++ | m_C86_4G) + + /* X86_TUNE_EXT_80387_CONSTANTS: Use fancy 80387 constants, such as PI. */ + DEF_TUNE (X86_TUNE_EXT_80387_CONSTANTS, "ext_80387_constants", +@@ -393,30 +398,32 @@ DEF_TUNE (X86_TUNE_GENERAL_REGS_SSE_SPILL, "general_regs_sse_spill", + DEF_TUNE (X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL, "sse_unaligned_load_optimal", + m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_SILVERMONT | m_KNL | m_KNM + | m_INTEL | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE +- | m_AMDFAM10 | m_BDVER | m_BTVER | m_ZNVER | m_ZHAOXIN | m_GENERIC) ++ | m_AMDFAM10 | m_BDVER | m_BTVER | m_ZNVER | m_ZHAOXIN | m_C86_4G ++ | m_GENERIC) + + /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL: Use movups for misaligned stores + instead of a sequence loading registers by parts. */ + DEF_TUNE (X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL, "sse_unaligned_store_optimal", + m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_SILVERMONT | m_KNL | m_KNM + | m_INTEL | m_GOLDMONT | m_GOLDMONT_PLUS +- | m_TREMONT | m_ALDERLAKE | m_BDVER | m_ZNVER | m_ZHAOXIN | m_GENERIC) ++ | m_TREMONT | m_ALDERLAKE | m_BDVER | m_ZNVER | m_ZHAOXIN | m_C86_4G ++ | m_GENERIC) + + /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL: Use packed single + precision 128bit instructions instead of double where possible. */ + DEF_TUNE (X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL, "sse_packed_single_insn_optimal", +- m_BDVER | m_ZNVER) ++ m_BDVER | m_ZNVER | m_C86_4G) + + /* X86_TUNE_SSE_TYPELESS_STORES: Always movaps/movups for 128bit stores. */ + DEF_TUNE (X86_TUNE_SSE_TYPELESS_STORES, "sse_typeless_stores", + m_AMD_MULTIPLE | m_CORE_ALL | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN +- | m_GENERIC) ++ | m_C86_4G | m_GENERIC) + + /* X86_TUNE_SSE_LOAD0_BY_PXOR: Always use pxor to load0 as opposed to + xorps/xorpd and other variants. */ + DEF_TUNE (X86_TUNE_SSE_LOAD0_BY_PXOR, "sse_load0_by_pxor", + m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BDVER | m_BTVER | m_ZNVER +- | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC) ++ | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_C86_4G | m_GENERIC) + + /* X86_TUNE_INTER_UNIT_MOVES_TO_VEC: Enable moves in from integer + to SSE registers. If disabled, the moves will be done by storing +@@ -469,44 +476,44 @@ DEF_TUNE (X86_TUNE_AVOID_4BYTE_PREFIXES, "avoid_4byte_prefixes", + elements. */ + DEF_TUNE (X86_TUNE_USE_GATHER_2PARTS, "use_gather_2parts", + ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE +- | m_GENERIC | m_GDS | m_YONGFENG | m_SHIJIDADAO)) ++ | m_GENERIC | m_GDS | m_YONGFENG | m_SHIJIDADAO | m_C86_4G)) + + /* X86_TUNE_USE_SCATTER_2PARTS: Use scater instructions for vectors with 2 + elements. */ + DEF_TUNE (X86_TUNE_USE_SCATTER_2PARTS, "use_scatter_2parts", +- ~(m_ZNVER4)) ++ ~(m_ZNVER4 | m_C86_4G_M7)) + + /* X86_TUNE_USE_GATHER_4PARTS: Use gather instructions for vectors with 4 + elements. */ + DEF_TUNE (X86_TUNE_USE_GATHER_4PARTS, "use_gather_4parts", + ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE +- | m_GENERIC | m_GDS | m_YONGFENG | m_SHIJIDADAO)) ++ | m_GENERIC | m_GDS | m_YONGFENG | m_SHIJIDADAO | m_C86_4G)) + + /* X86_TUNE_USE_SCATTER_4PARTS: Use scater instructions for vectors with 4 + elements. */ + DEF_TUNE (X86_TUNE_USE_SCATTER_4PARTS, "use_scatter_4parts", +- ~(m_ZNVER4)) ++ ~(m_ZNVER4 | m_C86_4G_M7)) + + /* X86_TUNE_USE_GATHER: Use gather instructions for vectors with 8 or more + elements. */ + DEF_TUNE (X86_TUNE_USE_GATHER_8PARTS, "use_gather_8parts", + ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER4 | m_ALDERLAKE +- | m_GENERIC | m_GDS | m_YONGFENG | m_SHIJIDADAO)) ++ | m_GENERIC | m_GDS | m_YONGFENG | m_SHIJIDADAO | m_C86_4G)) + + /* X86_TUNE_USE_SCATTER: Use scater instructions for vectors with 8 or more + elements. */ + DEF_TUNE (X86_TUNE_USE_SCATTER_8PARTS, "use_scatter_8parts", +- ~(m_ZNVER4)) ++ ~(m_ZNVER4 | m_C86_4G_M7)) + + /* X86_TUNE_AVOID_128FMA_CHAINS: Avoid creating loops with tight 128bit or + smaller FMA chain. */ +-DEF_TUNE (X86_TUNE_AVOID_128FMA_CHAINS, "avoid_fma_chains", m_ZNVER1 | m_ZNVER2 | m_ZNVER3 +- | m_YONGFENG | m_SHIJIDADAO) ++DEF_TUNE (X86_TUNE_AVOID_128FMA_CHAINS, "avoid_fma_chains", m_ZNVER1 | m_ZNVER2 ++ | m_ZNVER3 | m_YONGFENG | m_SHIJIDADAO | m_C86_4G) + + /* X86_TUNE_AVOID_256FMA_CHAINS: Avoid creating loops with tight 256bit or + smaller FMA chain. */ + DEF_TUNE (X86_TUNE_AVOID_256FMA_CHAINS, "avoid_fma256_chains", m_ZNVER2 | m_ZNVER3 +- | m_ALDERLAKE | m_SAPPHIRERAPIDS) ++ | m_ALDERLAKE | m_SAPPHIRERAPIDS | m_C86_4G | m_GENERIC) + + /* X86_TUNE_AVOID_512FMA_CHAINS: Avoid creating loops with tight 512bit or + smaller FMA chain. */ +@@ -545,27 +552,28 @@ DEF_TUNE (X86_TUNE_AVX128_OPTIMAL, "avx128_optimal", m_BDVER | m_BTVER2 + DEF_TUNE (X86_TUNE_AVX256_OPTIMAL, "avx256_optimal", m_CORE_AVX512) + + /* X86_TUNE_AVX256_SPLIT_REGS: if true, AVX512 ops are split into two AVX256 ops. */ +-DEF_TUNE (X86_TUNE_AVX512_SPLIT_REGS, "avx512_split_regs", m_ZNVER4) ++DEF_TUNE (X86_TUNE_AVX512_SPLIT_REGS, "avx512_split_regs", m_ZNVER4 ++ | m_C86_4G_M7) + + /* X86_TUNE_AVX256_MOVE_BY_PIECES: Optimize move_by_pieces with 256-bit + AVX instructions. */ + DEF_TUNE (X86_TUNE_AVX256_MOVE_BY_PIECES, "avx256_move_by_pieces", +- m_CORE_AVX512) ++ m_CORE_AVX512 | m_C86_4G_M4 | m_C86_4G_M6) + + /* X86_TUNE_AVX256_STORE_BY_PIECES: Optimize store_by_pieces with 256-bit + AVX instructions. */ + DEF_TUNE (X86_TUNE_AVX256_STORE_BY_PIECES, "avx256_store_by_pieces", +- m_CORE_AVX512) ++ m_CORE_AVX512 | m_C86_4G_M4 | m_C86_4G_M6) + + /* X86_TUNE_AVX512_MOVE_BY_PIECES: Optimize move_by_pieces with 512-bit + AVX instructions. */ + DEF_TUNE (X86_TUNE_AVX512_MOVE_BY_PIECES, "avx512_move_by_pieces", +- m_SAPPHIRERAPIDS | m_ZNVER4) ++ m_SAPPHIRERAPIDS | m_ZNVER4 | m_C86_4G_M7) + + /* X86_TUNE_AVX512_STORE_BY_PIECES: Optimize store_by_pieces with 512-bit + AVX instructions. */ + DEF_TUNE (X86_TUNE_AVX512_STORE_BY_PIECES, "avx512_store_by_pieces", +- m_SAPPHIRERAPIDS | m_ZNVER4) ++ m_SAPPHIRERAPIDS | m_ZNVER4 | m_C86_4G_M7) + + /*****************************************************************************/ + /*****************************************************************************/ +diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi +index eb422dc7b23..528c18ffe62 100644 +--- a/gcc/doc/extend.texi ++++ b/gcc/doc/extend.texi +@@ -23921,6 +23921,18 @@ AMD Family 19h Zen version 3. + + @item znver4 + AMD Family 19h Zen version 4. ++ ++@item hygonfam18h ++HYGON Family 18h CPU. ++ ++@item c86-4g-m4 ++HYGON Family 18h model 4 dharma CPU. ++ ++@item c86-4g-m6 ++HYGON Family 18h model 6 shanghai CPU. ++ ++@item c86-4g-m7 ++HYGON Family 18h model 7 chengdu CPU. + @end table + + Here is an example: +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index f40df87219e..3a36c4e3456 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -31943,6 +31943,27 @@ instruction set support. + + @item geode + AMD Geode embedded processor with MMX and 3DNow!@: instruction set support. ++ ++@item c86-4g-m4 ++HYGON c86-4g-m4 CPU with x86-64, MMX, SSE, SSE2, SSE3, SSE4A, CX16, ABM, SSSE3, ++SSE4.1, SSE4.2, AES, PCLMUL, AVX, AVX2, BMI, BMI2, F16C, FMA, PRFCHW, FXSR, SHA, ++XSAVE, XSAVEOPT, XSAVEC, FSGSBASE, RDRND, MOVBE, MWAITX, ADX, RDSEED, CLZERO, ++CLFLUSHOPT, XSAVES, LZCNT, POPCNT instruction set support. ++ ++@item c86-4g-m6 ++HYGON c86-4g-m6 CPU with x86-64, MMX, SSE, SSE2, SSE3, SSE4A, CX16, ABM, SSSE3, ++SSE4.1, SSE4.2, AES, PCLMUL, AVX, AVX2, BMI, BMI2, F16C, FMA, PRFCHW, FXSR, SHA, ++XSAVE, XSAVEOPT, XSAVEC, FSGSBASE, RDRND, MOVBE, MWAITX, ADX, RDSEED, CLZERO, ++CLFLUSHOPT, XSAVES, LZCNT, POPCNT instruction set support. ++ ++@item c86-4g-m7 ++HYGON c86-4g-m7 CPU with x86-64, MMX, SSE, SSE2, SSE3, SSE4A, CX16, ABM, SSSE3, ++SSE4.1, SSE4.2, AES, PCLMUL, AVX, AVX2, BMI, BMI2, F16C, FMA, PRFCHW, FXSR, SHA, ++XSAVE, XSAVEOPT, XSAVEC, FSGSBASE, RDRND, MOVBE, MWAITX, ADX, RDSEED, CLZERO, ++CLFLUSHOPT, XSAVES, LZCNT, POPCNT, AVX512F, AVX512DQ, AVX512IFMA, AVX512CD, ++AVX512BW, AVX512VL, AVX512BF16, AVX512VBMI, AVX512VBMI2, GFNI, AVX512VNNI, VAES, ++AVX512BITALG, AVX512VPOPCNTDQ, AVX512VP2INTERSECT, AVXVNNI, VPCLMULQDQ, ++WBNOINVD instruction set support. + @end table + + @item -mtune=@var{cpu-type} +diff --git a/gcc/testsuite/g++.target/i386/mv33.C b/gcc/testsuite/g++.target/i386/mv33.C +new file mode 100644 +index 00000000000..8591690d2cc +--- /dev/null ++++ b/gcc/testsuite/g++.target/i386/mv33.C +@@ -0,0 +1,42 @@ ++// Test that dispatching can choose the right multiversion ++// for HYGON CPUs with the same internal GCC processor id ++ ++// { dg-do run } ++// { dg-require-ifunc "" } ++// { dg-options "-O2" } ++ ++#include ++ ++int __attribute__ ((target("default"))) ++foo () ++{ ++ return 0; ++} ++ ++int __attribute__ ((target("arch=c86-4g-m4"))) foo () { ++ return 1; ++} ++ ++int __attribute__ ((target("arch=c86-4g-m6"))) foo () { ++ return 2; ++} ++ ++int __attribute__ ((target("arch=c86-4g-m7"))) foo () { ++ return 3; ++} ++ ++int main () ++{ ++ int val = foo (); ++ ++ if (__builtin_cpu_is ("c86-4g-m4")) ++ assert (val == 1); ++ else if (__builtin_cpu_is ("c86-4g-m6")) ++ assert (val == 2); ++ else if (__builtin_cpu_is ("c86-4g-m7")) ++ assert (val == 3); ++ else ++ assert (val == 0); ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/i386/builtin_target.c b/gcc/testsuite/gcc.target/i386/builtin_target.c +index fff643c13b0..fa37f0bdba9 100644 +--- a/gcc/testsuite/gcc.target/i386/builtin_target.c ++++ b/gcc/testsuite/gcc.target/i386/builtin_target.c +@@ -54,6 +54,10 @@ check_detailed () + assert (__builtin_cpu_is ("amd")); + get_amd_cpu (&cpu_model, &cpu_model2, cpu_features2); + break; ++ case VENDOR_HYGON: ++ assert (__builtin_cpu_is ("hygon")); ++ get_hygon_cpu (&cpu_model, &cpu_model2, cpu_features2); ++ break; + default: + break; + } +@@ -131,6 +135,8 @@ quick_check () + + assert (__builtin_cpu_is ("bdver2") >= 0); + ++ assert (__builtin_cpu_is ("c86-4g-m4") >= 0); ++ + return 0; + } + +diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc +index faeff3ae2de..3c49c9990c3 100644 +--- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc ++++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc +@@ -203,6 +203,9 @@ extern void test_arch_znver1 (void) __attribute__((__target__("arch= + extern void test_arch_znver2 (void) __attribute__((__target__("arch=znver2"))); + extern void test_arch_znver3 (void) __attribute__((__target__("arch=znver3"))); + extern void test_arch_znver4 (void) __attribute__((__target__("arch=znver4"))); ++extern void test_arch_c86_4g_m4 (void) __attribute__((__target__("arch=c86-4g-m4"))); ++extern void test_arch_c86_4g_m6 (void) __attribute__((__target__("arch=c86-4g-m6"))); ++extern void test_arch_c86_4g_m7 (void) __attribute__((__target__("arch=c86-4g-m7"))); + + extern void test_tune_nocona (void) __attribute__((__target__("tune=nocona"))); + extern void test_tune_core2 (void) __attribute__((__target__("tune=core2"))); +@@ -229,6 +232,9 @@ extern void test_tune_znver1 (void) __attribute__((__target__("tune= + extern void test_tune_znver2 (void) __attribute__((__target__("tune=znver2"))); + extern void test_tune_znver3 (void) __attribute__((__target__("tune=znver3"))); + extern void test_tune_znver4 (void) __attribute__((__target__("tune=znver4"))); ++extern void test_tune_c86_4g_m4 (void) __attribute__((__target__("tune=c86-4g-m4"))); ++extern void test_tune_c86_4g_m6 (void) __attribute__((__target__("tune=c86-4g-m6"))); ++extern void test_tune_c86_4g_m7 (void) __attribute__((__target__("tune=c86-4g-m7"))); + + extern void test_fpmath_sse (void) __attribute__((__target__("sse2,fpmath=sse"))); + extern void test_fpmath_387 (void) __attribute__((__target__("sse2,fpmath=387"))); +-- +2.34.1 + diff --git a/HYGON-0002-i386-Adjust-some-c86-4g-.md-modeling-to-reduce-build.patch b/HYGON-0002-i386-Adjust-some-c86-4g-.md-modeling-to-reduce-build.patch new file mode 100644 index 0000000..d6ce0e9 --- /dev/null +++ b/HYGON-0002-i386-Adjust-some-c86-4g-.md-modeling-to-reduce-build.patch @@ -0,0 +1,588 @@ +From d90ab6264a879c979f2d75a6d09c1cad241c510e Mon Sep 17 00:00:00 2001 +From: Kewen Lin +Date: Wed, 27 May 2026 20:51:01 +0000 +Subject: [PATCH 2/3] i386: Adjust some c86-4g*.md modeling to reduce build + time + +Upstream reference: https://gcc.gnu.org/g:c776dcd5f86 + +Commit r17-203 caused significant increase in GCC build time +on several environments as folks reported, mainly due to +excessively long execution time of genautomata. + +As Alexander pointed out, the current division modeling in +c86-4g*.md can cause a combinatorial explosion in the +automaton, that further leads to significant build time +increase. + +Following Alexander's suggestion, this patch introduces the +dedicated automatons and cpu_units for idiv and fdiv, uses +them to updates the integer, floating point division and +square root modeling for now. Some evaluated statistics +are listed below. + +With r17-202: + + *Tested stage-1 i686 build -j 32: 255 seconds* + + $ nm -CS -t d --defined-only gcc/insn-automata.o \ + | sed 's/^[0-9]* 0*//' \ + | sort -n | tail -20 + 13896 r slm_transitions + 15360 r znver4_fp_store_transitions + 16760 r znver4_ieu_transitions + 17776 r bdver1_ieu_transitions + 20068 r bdver1_fp_check + 20068 r bdver1_fp_transitions + 20983 t internal_state_transition(int, DFA_chip*) + 22270 t internal_min_issue_delay(int, DFA_chip*) + 26208 r slm_min_issue_delay + 27244 r bdver1_fp_min_issue_delay + 28518 r glm_check + 28518 r glm_transitions + 33690 r geode_min_issue_delay + 45436 r znver4_fpu_min_issue_delay + 46980 r bdver3_fp_min_issue_delay + 49428 r glm_min_issue_delay + 53730 r btver2_fp_min_issue_delay + 53760 r znver1_fp_transitions + 93960 r bdver3_fp_transitions + 181744 r znver4_fpu_transitions + +With culprit commit r17-203: + + *Tested stage-1 i686 build -j 32: 949 seconds* + + $ nm -CS -t d --defined-only gcc/insn-automata.o \ + | sed 's/^[0-9]* 0*//' \ + | sort -n | tail -20 + 28518 r glm_check + 28518 r glm_transitions + 33690 r geode_min_issue_delay + 45436 r znver4_fpu_min_issue_delay + 46980 r bdver3_fp_min_issue_delay + 49428 r glm_min_issue_delay + 53730 r btver2_fp_min_issue_delay + 53760 r znver1_fp_transitions + 68160 r c86_4g_ieu_min_issue_delay + 93960 r bdver3_fp_transitions + 110080 r c86_4g_fp_min_issue_delay + 136320 r c86_4g_ieu_transitions + 181744 r znver4_fpu_transitions + 220160 r c86_4g_fp_transitions + 262988 r c86_4g_m7_fpu_base + 475225 r c86_4g_m7_ieu_min_issue_delay + 950450 r c86_4g_m7_ieu_transitions + 4010567 r c86_4g_m7_fpu_min_issue_delay + 5496908 r c86_4g_m7_fpu_check + 5496908 r c86_4g_m7_fpu_transitions + +With this patch: + + *Tested stage-1 i686 build -j 32: 257 seconds* + + $ nm -CS -t d --defined-only gcc/insn-automata.o \ + | sed 's/^[0-9]* 0*//' \ + | sort -n | tail -20 + + 20068 r bdver1_fp_transitions + 22354 r c86_4g_m7_ieu_min_issue_delay + 25705 t internal_state_transition(int, DFA_chip*) + 26208 r slm_min_issue_delay + 27164 t internal_min_issue_delay(int, DFA_chip*) + 27244 r bdver1_fp_min_issue_delay + 28518 r glm_check + 28518 r glm_transitions + 33690 r geode_min_issue_delay + 33728 r c86_4g_fp_transitions + 45436 r znver4_fpu_min_issue_delay + 46980 r bdver3_fp_min_issue_delay + 49428 r glm_min_issue_delay + 53730 r btver2_fp_min_issue_delay + 53760 r znver1_fp_transitions + 89414 r c86_4g_m7_ieu_transitions + 93960 r bdver3_fp_transitions + 181744 r znver4_fpu_transitions + 326322 r c86_4g_m7_fpu_min_issue_delay + 1305288 r c86_4g_m7_fpu_transitions + +I noticed the number of c86_4g_m7_fpu_transitions is still +large, but this patch can address the build time issue. +To avoid impacting folks' daily builds and regular testings, +I'd like to land this patch first if possible. We can then further +refine the c86-4g modeling and investigate large transition +count as part of the follow-up work, even potentially part +of PR 87832. + +gcc/ChangeLog: + + * config/i386/c86-4g-m7.md (c86_4g_m7_idiv): New automaton. + (c86_4g_m7_fdiv): Ditto. + (c86-4g-m7-idiv): New unit. + (c86-4g-m7-fdiv): Ditto. + (c86_4g_m7_idiv_DI): Adjust unit in the reservation. + (c86_4g_m7_idiv_SI): Ditto. + (c86_4g_m7_idiv_HI): Ditto. + (c86_4g_m7_idiv_QI): Ditto. + (c86_4g_m7_idiv_DI_load): Ditto. + (c86_4g_m7_idiv_SI_load): Ditto. + (c86_4g_m7_idiv_HI_load): Ditto. + (c86_4g_m7_idiv_QI_load): Ditto. + (c86_4g_m7_fp_div): Ditto. + (c86_4g_m7_fp_div_load): Ditto. + (c86_4g_m7_fp_idiv_load): Ditto. + (c86_4g_m7_avx512_ssediv): Ditto. + (c86_4g_m7_avx512_ssediv_mem): Ditto. + (c86_4g_m7_avx512_ssediv_z): Ditto. + (c86_4g_m7_avx512_ssediv_zmem): Ditto. + (c86_4g_m7_avx512_sse_sqrt): Ditto. + (c86_4g_m7_avx512_sse_sqrt_load): Ditto. + (c86_4g_m7_fp_sqrt): Ditto. Rename from ... + (c86_4g_m7fp_sqrt): ... here. + * config/i386/c86-4g.md (c86_4g_idiv): New automaton. + (c86_4g_fdiv): Ditto. + (c86-4g-idiv): New unit. + (c86-4g-fdiv): Ditto. + (c86_4g_idiv_DI): Ditto. + (c86_4g_idiv_SI): Ditto. + (c86_4g_idiv_HI): Ditto. + (c86_4g_idiv_QI): Ditto. + (c86_4g_idiv_mem_DI): Ditto. + (c86_4g_idiv_mem_SI): Ditto. + (c86_4g_idiv_mem_HI): Ditto. + (c86_4g_idiv_mem_QI): Ditto. + (c86_4g_fp_sqrt): Ditto. + (c86_4g_sse_sqrt_sf): Ditto. + (c86_4g_sse_sqrt_sf_mem): Ditto. + (c86_4g_sse_sqrt_df): Ditto. + (c86_4g_sse_sqrt_df_mem): Ditto. + (c86_4g_fp_op_div): Ditto. + (c86_4g_fp_op_div_load): Ditto. + (c86_4g_fp_op_idiv_load): Ditto. + (c86_4g_ssediv_ss_ps): Ditto. + (c86_4g_ssediv_ss_ps_load): Ditto. + (c86_4g_ssediv_ss_pd): Ditto. + (c86_4g_ssediv_ss_pd_load): Ditto. + (c86_4g_ssediv_avx256_ps): Ditto. + (c86_4g_ssediv_avx256_ps_load): Ditto. + (c86_4g_ssediv_avx256_pd): Ditto. + (c86_4g_ssediv_avx256_pd_load): Ditto. + +Signed-off-by: Kewen Lin +--- + gcc/config/i386/c86-4g-m7.md | 47 ++++++++++++++++------------- + gcc/config/i386/c86-4g.md | 57 ++++++++++++++++++++---------------- + 2 files changed, 57 insertions(+), 47 deletions(-) + +diff --git a/gcc/config/i386/c86-4g-m7.md b/gcc/config/i386/c86-4g-m7.md +index 214c45d1468..3a1f18337b6 100644 +--- a/gcc/config/i386/c86-4g-m7.md ++++ b/gcc/config/i386/c86-4g-m7.md +@@ -19,8 +19,9 @@ + + ;; HYGON c86-4g-m7 Scheduling + ;; Modeling automatons for decoders, integer execution pipes, +-;; AGU pipes, branch, floating point execution and fp store units. +-(define_automaton "c86_4g_m7, c86_4g_m7_ieu, c86_4g_m7_agu, c86_4g_m7_fpu") ++;; AGU pipes, branch, floating point execution, fp store units, ++;; integer and floating point dividers. ++(define_automaton "c86_4g_m7, c86_4g_m7_ieu, c86_4g_m7_agu, c86_4g_m7_fpu, c86_4g_m7_idiv, c86_4g_m7_fdiv") + + ;; Decoders unit has 4 decoders and all of them can decode fast path + ;; and vector type instructions. +@@ -29,6 +30,10 @@ (define_cpu_unit "c86-4g-m7-decode1" "c86_4g_m7") + (define_cpu_unit "c86-4g-m7-decode2" "c86_4g_m7") + (define_cpu_unit "c86-4g-m7-decode3" "c86_4g_m7") + ++;; Two separated dividers for int and fp. ++(define_cpu_unit "c86-4g-m7-idiv" "c86_4g_m7_idiv") ++(define_cpu_unit "c86-4g-m7-fdiv" "c86_4g_m7_fdiv") ++ + ;; Currently blocking all decoders for vector path instructions as + ;; they are dispatched separetely as microcode sequence. + (define_reservation "c86-4g-m7-vector" "c86-4g-m7-decode0+c86-4g-m7-decode1+c86-4g-m7-decode2+c86-4g-m7-decode3") +@@ -168,56 +173,56 @@ (define_insn_reservation "c86_4g_m7_idiv_DI" 41 + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "DI") + (eq_attr "memory" "none")))) +- "c86-4g-m7-double,c86-4g-m7-ieu3*41") ++ "c86-4g-m7-double,c86-4g-m7-ieu3,c86-4g-m7-idiv*41") + + (define_insn_reservation "c86_4g_m7_idiv_SI" 25 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "SI") + (eq_attr "memory" "none")))) +- "c86-4g-m7-double,c86-4g-m7-ieu3*25") ++ "c86-4g-m7-double,c86-4g-m7-ieu3,c86-4g-m7-idiv*25") + + (define_insn_reservation "c86_4g_m7_idiv_HI" 17 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "HI") + (eq_attr "memory" "none")))) +- "c86-4g-m7-double,c86-4g-m7-ieu3*17") ++ "c86-4g-m7-double,c86-4g-m7-ieu3,c86-4g-m7-idiv*17") + + (define_insn_reservation "c86_4g_m7_idiv_QI" 15 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "QI") + (eq_attr "memory" "none")))) +- "c86-4g-m7-direct,c86-4g-m7-ieu3*15") ++ "c86-4g-m7-direct,c86-4g-m7-ieu3,c86-4g-m7-idiv*15") + + (define_insn_reservation "c86_4g_m7_idiv_DI_load" 45 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "DI") + (eq_attr "memory" "load")))) +- "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-ieu3*41") ++ "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-ieu3,c86-4g-m7-idiv*41") + + (define_insn_reservation "c86_4g_m7_idiv_SI_load" 29 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "SI") + (eq_attr "memory" "load")))) +- "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-ieu3*25") ++ "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-ieu3,c86-4g-m7-idiv*25") + + (define_insn_reservation "c86_4g_m7_idiv_HI_load" 21 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "HI") + (eq_attr "memory" "load")))) +- "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-ieu3*17") ++ "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-ieu3,c86-4g-m7-idiv*17") + + (define_insn_reservation "c86_4g_m7_idiv_QI_load" 19 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "QI") + (eq_attr "memory" "load")))) +- "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-ieu3*15") ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-ieu3,c86-4g-m7-idiv*15") + + ;; Integer/genaral Instructions + (define_insn_reservation "c86_4g_m7_insn" 1 +@@ -435,11 +440,11 @@ (define_insn_reservation "c86_4g_m7_fp_mov_direct" 1 + "c86-4g-m7-direct,c86-4g-m7-fpu1") + + ;; FSQRT +-(define_insn_reservation "c86_4g_m7fp_sqrt" 22 ++(define_insn_reservation "c86_4g_m7_fp_sqrt" 22 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "fpspc") + (eq_attr "c86_attr" "sqrt"))) +- "c86-4g-m7-direct,c86-4g-m7-fpu1*22") ++ "c86-4g-m7-direct,c86-4g-m7-fpu1,c86-4g-m7-fdiv*22") + + ;; FPSPC + (define_insn_reservation "c86_4g_m7_fp_spc_direct" 5 +@@ -482,21 +487,21 @@ (define_insn_reservation "c86_4g_m7_fp_div" 15 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "fdiv") + (eq_attr "memory" "none"))) +- "c86-4g-m7-direct,c86-4g-m7-fpu1*7") ++ "c86-4g-m7-direct,c86-4g-m7-fpu1,c86-4g-m7-fdiv*15") + + (define_insn_reservation "c86_4g_m7_fp_div_load" 22 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "fdiv") + (and (eq_attr "fp_int_src" "false") + (eq_attr "memory" "!none")))) +- "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu1*7") ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu1,c86-4g-m7-fdiv*15") + + (define_insn_reservation "c86_4g_m7_fp_idiv_load" 26 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "fdiv") + (and (eq_attr "fp_int_src" "true") + (eq_attr "memory" "!none")))) +- "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu1*7") ++ "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu1,c86-4g-m7-fdiv*15") + + (define_insn_reservation "c86_4g_m7_fp_fsgn" 1 + (and (eq_attr "cpu" "c86_4g_m7") +@@ -1518,28 +1523,28 @@ (define_insn_reservation "c86_4g_m7_avx512_ssediv" 13 + (and (eq_attr "type" "ssediv") + (and (not (eq_attr "mode" "V16SF,V8DF")) + (eq_attr "memory" "none")))) +- "c86-4g-m7-direct,c86-4g-m7-fpu3*7") ++ "c86-4g-m7-direct,c86-4g-m7-fpu3,c86-4g-m7-fdiv*13") + + (define_insn_reservation "c86_4g_m7_avx512_ssediv_mem" 20 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssediv") + (and (not (eq_attr "mode" "V16SF,V8DF")) + (eq_attr "memory" "load")))) +- "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu3*7") ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu3,c86-4g-m7-fdiv*13") + + (define_insn_reservation "c86_4g_m7_avx512_ssediv_z" 24 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssediv") + (and (eq_attr "mode" "V16SF,V8DF") + (eq_attr "memory" "none")))) +- "c86-4g-m7-double,c86-4g-m7-fpu3*7") ++ "c86-4g-m7-double,c86-4g-m7-fpu3,c86-4g-m7-fdiv*24") + + (define_insn_reservation "c86_4g_m7_avx512_ssediv_zmem" 31 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssediv") + (and (eq_attr "mode" "V16SF,V8DF") + (eq_attr "memory" "load")))) +- "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu3*7") ++ "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu3,c86-4g-m7-fdiv*24") + + ;; SSECMP + (define_insn_reservation "c86_4g_m7_avx512_ssecmp" 5 +@@ -1919,14 +1924,14 @@ (define_insn_reservation "c86_4g_m7_avx512_sse_sqrt" 16 + (and (eq_attr "type" "sse") + (and (eq_attr "c86_attr" "sqrt") + (eq_attr "memory" "none")))) +- "c86-4g-m7-direct,c86-4g-m7-fpu1*7|c86-4g-m7-fpu3*7") ++ "c86-4g-m7-direct,c86-4g-m7-fpu1|c86-4g-m7-fpu3,c86-4g-m7-fdiv*16") + + (define_insn_reservation "c86_4g_m7_avx512_sse_sqrt_load" 23 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sse") + (and (eq_attr "c86_attr" "sqrt") + (eq_attr "memory" "load")))) +- "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu1*7|c86-4g-m7-fpu3*7") ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu1|c86-4g-m7-fpu3,c86-4g-m7-fdiv*16") + + ;; MSKLOG/MSKMOV + (define_insn_reservation "c86_4g_m7_avx512_msklog" 1 +diff --git a/gcc/config/i386/c86-4g.md b/gcc/config/i386/c86-4g.md +index 66c4e2cf744..49a46a8aa19 100644 +--- a/gcc/config/i386/c86-4g.md ++++ b/gcc/config/i386/c86-4g.md +@@ -29,8 +29,9 @@ (define_attr "c86_attr" "other,abs,sqrt,maxmin,blend,blendv,rcp,movnt,avg, + + ;; HYGON Scheduling + ;; Modeling automatons for decoders, integer execution pipes, +-;; AGU pipes and floating point execution units. +-(define_automaton "c86_4g, c86_4g_ieu, c86_4g_fp, c86_4g_agu") ++;; AGU pipes, floating point execution units, integer and ++;; floating point dividers. ++(define_automaton "c86_4g, c86_4g_ieu, c86_4g_fp, c86_4g_agu, c86_4g_idiv, c86_4g_fdiv") + + ;; Decoders unit has 4 decoders and all of them can decode fast path + ;; and vector type instructions. +@@ -39,6 +40,10 @@ (define_cpu_unit "c86-4g-decode1" "c86_4g") + (define_cpu_unit "c86-4g-decode2" "c86_4g") + (define_cpu_unit "c86-4g-decode3" "c86_4g") + ++;; Two separated dividers for int and fp. ++(define_cpu_unit "c86-4g-idiv" "c86_4g_idiv") ++(define_cpu_unit "c86-4g-fdiv" "c86_4g_fdiv") ++ + ;; Currently blocking all decoders for vector path instructions as + ;; they are dispatched separetely as microcode sequence. + ;; Fix me: Need to revisit this. +@@ -146,28 +151,28 @@ (define_insn_reservation "c86_4g_idiv_DI" 41 + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "DI") + (eq_attr "memory" "none")))) +- "c86-4g-double,c86-4g-ieu2*41") ++ "c86-4g-double,c86-4g-ieu2,c86-4g-idiv*41") + + (define_insn_reservation "c86_4g_idiv_SI" 25 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "SI") + (eq_attr "memory" "none")))) +- "c86-4g-double,c86-4g-ieu2*25") ++ "c86-4g-double,c86-4g-ieu2,c86-4g-idiv*25") + + (define_insn_reservation "c86_4g_idiv_HI" 17 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "HI") + (eq_attr "memory" "none")))) +- "c86-4g-double,c86-4g-ieu2*17") ++ "c86-4g-double,c86-4g-ieu2,c86-4g-idiv*17") + + (define_insn_reservation "c86_4g_idiv_QI" 15 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "QI") + (eq_attr "memory" "none")))) +- "c86-4g-direct,c86-4g-ieu2*15") ++ "c86-4g-direct,c86-4g-ieu2,c86-4g-idiv*15") + + ;; Mem operands + (define_insn_reservation "c86_4g_idiv_mem_DI" 45 +@@ -175,28 +180,28 @@ (define_insn_reservation "c86_4g_idiv_mem_DI" 45 + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "DI") + (eq_attr "memory" "load")))) +- "c86-4g-double,c86-4g-load,c86-4g-ieu2*41") ++ "c86-4g-double,c86-4g-load,c86-4g-ieu2,c86-4g-idiv*41") + + (define_insn_reservation "c86_4g_idiv_mem_SI" 29 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "SI") + (eq_attr "memory" "load")))) +- "c86-4g-double,c86-4g-load,c86-4g-ieu2*25") ++ "c86-4g-double,c86-4g-load,c86-4g-ieu2,c86-4g-idiv*25") + + (define_insn_reservation "c86_4g_idiv_mem_HI" 21 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "HI") + (eq_attr "memory" "load")))) +- "c86-4g-double,c86-4g-load,c86-4g-ieu2*17") ++ "c86-4g-double,c86-4g-load,c86-4g-ieu2,c86-4g-idiv*17") + + (define_insn_reservation "c86_4g_idiv_mem_QI" 19 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "QI") + (eq_attr "memory" "load")))) +- "c86-4g-direct,c86-4g-load,c86-4g-ieu2*15") ++ "c86-4g-direct,c86-4g-load,c86-4g-ieu2,c86-4g-idiv*15") + + ;; STR ISHIFT which are micro coded. + ;; Fix me: Latency need to be rechecked. +@@ -382,7 +387,7 @@ (define_insn_reservation "c86_4g_fp_sqrt" 22 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "fpspc") + (eq_attr "c86_attr" "sqrt"))) +- "c86-4g-direct,c86-4g-fp1*22") ++ "c86-4g-direct,c86-4g-fp1,c86-4g-fdiv*22") + + (define_insn_reservation "c86_4g_sse_sqrt_sf" 14 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") +@@ -390,7 +395,7 @@ (define_insn_reservation "c86_4g_sse_sqrt_sf" 14 + (and (eq_attr "memory" "none,unknown") + (and (eq_attr "c86_attr" "sqrt") + (eq_attr "type" "sse"))))) +- "c86-4g-direct,c86-4g-fp1*14") ++ "c86-4g-direct,c86-4g-fp1,c86-4g-fdiv*14") + + (define_insn_reservation "c86_4g_sse_sqrt_sf_mem" 21 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") +@@ -398,7 +403,7 @@ (define_insn_reservation "c86_4g_sse_sqrt_sf_mem" 21 + (and (eq_attr "memory" "load") + (and (eq_attr "c86_attr" "sqrt") + (eq_attr "type" "sse"))))) +- "c86-4g-direct,c86-4g-load,c86-4g-fp1*14") ++ "c86-4g-direct,c86-4g-load,c86-4g-fp1,c86-4g-fdiv*14") + + (define_insn_reservation "c86_4g_sse_sqrt_df" 20 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") +@@ -406,7 +411,7 @@ (define_insn_reservation "c86_4g_sse_sqrt_df" 20 + (and (eq_attr "memory" "none,unknown") + (and (eq_attr "c86_attr" "sqrt") + (eq_attr "type" "sse"))))) +- "c86-4g-direct,c86-4g-fp1*20") ++ "c86-4g-direct,c86-4g-fp1,c86-4g-fdiv*20") + + (define_insn_reservation "c86_4g_sse_sqrt_df_mem" 27 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") +@@ -414,7 +419,7 @@ (define_insn_reservation "c86_4g_sse_sqrt_df_mem" 27 + (and (eq_attr "memory" "load") + (and (eq_attr "c86_attr" "sqrt") + (eq_attr "type" "sse"))))) +- "c86-4g-direct,c86-4g-load,c86-4g-fp1*20") ++ "c86-4g-direct,c86-4g-load,c86-4g-fp1,c86-4g-fdiv*20") + + ;; RCP + (define_insn_reservation "c86_4g_sse_rcp" 5 +@@ -487,20 +492,20 @@ (define_insn_reservation "c86_4g_fp_op_div" 15 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "fdiv") + (eq_attr "memory" "none"))) +- "c86-4g-direct,c86-4g-fp1*15") ++ "c86-4g-direct,c86-4g-fp1,c86-4g-fdiv*15") + + (define_insn_reservation "c86_4g_fp_op_div_load" 22 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "fdiv") + (eq_attr "memory" "load"))) +- "c86-4g-direct,c86-4g-load,c86-4g-fp1*15") ++ "c86-4g-direct,c86-4g-load,c86-4g-fp1,c86-4g-fdiv*15") + + (define_insn_reservation "c86_4g_fp_op_idiv_load" 27 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "fdiv") + (and (eq_attr "fp_int_src" "true") + (eq_attr "memory" "load")))) +- "c86-4g-double,c86-4g-load,c86-4g-fp1*19") ++ "c86-4g-double,c86-4g-load,c86-4g-fp1,c86-4g-fdiv*19") + + ;; MMX, SSE, SSEn.n, AVX, AVX2 instructions + (define_insn_reservation "c86_4g_fp_insn" 1 +@@ -1019,28 +1024,28 @@ (define_insn_reservation "c86_4g_ssediv_ss_ps" 10 + (eq_attr "mode" "V4SF,SF")) + (and (eq_attr "type" "ssediv") + (eq_attr "memory" "none"))) +- "c86-4g-direct,c86-4g-fp1*10") ++ "c86-4g-direct,c86-4g-fp1,c86-4g-fdiv*10") + + (define_insn_reservation "c86_4g_ssediv_ss_ps_load" 17 + (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "mode" "V4SF,SF")) + (and (eq_attr "type" "ssediv") + (eq_attr "memory" "load"))) +- "c86-4g-direct,c86-4g-load,c86-4g-fp1*10") ++ "c86-4g-direct,c86-4g-load,c86-4g-fp1,c86-4g-fdiv*10") + + (define_insn_reservation "c86_4g_ssediv_sd_pd" 13 + (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "mode" "V2DF,DF")) + (and (eq_attr "type" "ssediv") + (eq_attr "memory" "none"))) +- "c86-4g-direct,c86-4g-fp1*13") ++ "c86-4g-direct,c86-4g-fp1,c86-4g-fdiv*13") + + (define_insn_reservation "c86_4g_ssediv_sd_pd_load" 20 + (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "mode" "V2DF,DF")) + (and (eq_attr "type" "ssediv") + (eq_attr "memory" "load"))) +- "c86-4g-direct,c86-4g-load,c86-4g-fp1*13") ++ "c86-4g-direct,c86-4g-load,c86-4g-fp1,c86-4g-fdiv*13") + + + (define_insn_reservation "c86_4g_ssediv_avx256_ps" 10 +@@ -1048,28 +1053,28 @@ (define_insn_reservation "c86_4g_ssediv_avx256_ps" 10 + (and (eq_attr "mode" "V8SF") + (and (eq_attr "memory" "none") + (eq_attr "type" "ssediv")))) +- "c86-4g-double,c86-4g-fp1*10") ++ "c86-4g-double,c86-4g-fp1,c86-4g-fdiv*10") + + (define_insn_reservation "c86_4g_ssediv_avx256_ps_load" 17 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "mode" "V8SF") + (and (eq_attr "type" "ssediv") + (eq_attr "memory" "load")))) +- "c86-4g-double,c86-4g-load,c86-4g-fp1*10") ++ "c86-4g-double,c86-4g-load,c86-4g-fp1,c86-4g-fdiv*10") + + (define_insn_reservation "c86_4g_ssediv_avx256_pd" 13 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "mode" "V4DF") + (and (eq_attr "type" "ssediv") + (eq_attr "memory" "none")))) +- "c86-4g-double,c86-4g-fp1*13") ++ "c86-4g-double,c86-4g-fp1,c86-4g-fdiv*13") + + (define_insn_reservation "c86_4g_ssediv_avx256_pd_load" 20 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "mode" "V4DF") + (and (eq_attr "type" "ssediv") + (eq_attr "memory" "load")))) +- "c86-4g-double,c86-4g-load,c86-4g-fp1*13") ++ "c86-4g-double,c86-4g-load,c86-4g-fp1,c86-4g-fdiv*13") + ;; SSE MUL + (define_insn_reservation "c86_4g_ssemul_ss_ps" 3 + (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") +-- +2.34.1 + diff --git a/HYGON-0003-i386-Refine-c86-4g-fdiv-scheduling-model.patch b/HYGON-0003-i386-Refine-c86-4g-fdiv-scheduling-model.patch new file mode 100644 index 0000000..9249b39 --- /dev/null +++ b/HYGON-0003-i386-Refine-c86-4g-fdiv-scheduling-model.patch @@ -0,0 +1,1410 @@ +From deb6777079ec44f6c8de49465ce4e36eb635cb56 Mon Sep 17 00:00:00 2001 +From: Kewen Lin +Date: Fri, 29 May 2026 15:03:36 +0000 +Subject: [PATCH 3/3] i386: Refine c86-4g fdiv scheduling model + +Commit r17-258 introduced separated c86-4g fdiv units to avoid the +automaton explosion caused by modeling the whole divider latency on +normal FPU pipes. But the real hardware may keep the associated FPU +pipe occupied for some cycles at both the beginning and the end of +an fdiv or sqrt operation. Following Alexander's suggestion in [1], +this patch still keeps the long-latency part on the dedicated fdiv +unit but models only a bounded part of the FPU pipe occupancy. It +makes the first four cycles reserve both the selected FPU pipe and +the fdiv unit, then keep only the fdiv unit for the remaining cycles. + +Taking r17-258 as baseline, I tried K = 1,2,3,4 for + + fpu,divider*N -> (fpu+divider)*K, divider*(N-K) + +and measured the time for build/genautomata and the top 100 symbol +sizes of insn-automata.o (baseline normalized as 100) as below: + +1) without any other changes: + time size + baseline 100 100 + r17-203 340.0 629.3 + K1 100.3 100 + K2 105.5 112.5 + K3 112.8 129 + K4 119.4 141 + +2) Splitting fpu0/fpu2 and fpu1/fpu3 to paired automatons: + time size + baseline 100 100 + r17-203 340.0 629.3 + KS1 79.6 43.3 + KS2 79.8 43.3 + KS3 79.6 43.3 + KS4 79.4 43.3 + +It turns out that if we want to model the FPU occupancy for some +beginning cycles, separating the involved fpu1/fpu3 from the +original fpu looks better. So this patch splits fpu0/fpu2 and +fpu1/fpu3 into two paired automata and this extra coupling does +not grow the main FPU automata significantly. + +This patch also corrects some other modeling omissions like: + + - Fix c86_4g_fp_op_idiv_load latency typo by one cycle. + - Merge the old c86_4g_m7 idiv DI/SI/HI reservations after + aligning their latency and divider unit occupancy (with + updated values), while keeping QI separate. + - Adjust reservation units in templates like + c86_4g_m7_avx_vpinsr_reg_load and c86_4g_m7_avx512_sseadd_xy + etc. + - Add missing reservation units and unit occupancy in templates + like c86_4g_m7_avx512_permi2_ymm and + c86_4g_m7_sse_sseiadd_hplus_load etc. + - Adjust reservation units and unit occupancy in templates like + c86_4g_m7_avx512_perm_zmm_imm, c86_4g_m7_avx512_expand and + c86_4g_m7_avx512_ssemul etc. + +And also introduces some reusable reservation aliases to simplify +some modelings. + +I tested build time for i686 bootstrapping in a docker container: + - r17-202: 2437s (before c86-4g support) + - r17-203: 7291s (c86-4g support) + - r17-258: 2646s (tweaking for build time) + - this: 2358s +It looks this patch improves build time (even better than r17-202 +though the trivial gap can be due to some jitter). + +The symbol sizes are improved as below: + +nm -CS -t d --defined-only gcc/insn-automata.o \ + | sed 's/^[0-9]* 0*//' \ + | sort -n | tail -20 + +with r17-258: + + 20068 r bdver1_fp_transitions + 22354 r c86_4g_m7_ieu_min_issue_delay + 26208 r slm_min_issue_delay + 26580 t internal_min_issue_delay(int, DFA_chip*) + 26869 t internal_state_transition(int, DFA_chip*) + 27244 r bdver1_fp_min_issue_delay + 28518 r glm_check + 28518 r glm_transitions + 33690 r geode_min_issue_delay + 33728 r c86_4g_fp_transitions + 45436 r znver4_fpu_min_issue_delay + 46980 r bdver3_fp_min_issue_delay + 49428 r glm_min_issue_delay + 53730 r btver2_fp_min_issue_delay + 53760 r znver1_fp_transitions + 89414 r c86_4g_m7_ieu_transitions + 93960 r bdver3_fp_transitions + 181744 r znver4_fpu_transitions + 326322 r c86_4g_m7_fpu_min_issue_delay + 1305288 r c86_4g_m7_fpu_transitions + +with this: + + 17872 r print_reservation(_IO_FILE*, rtx_insn*)::... + 20068 r bdver1_fp_check + 20068 r bdver1_fp_transitions + 22016 r c86_4g_m7_fpu02_transitions + 22354 r c86_4g_m7_ieu_min_issue_delay + 26208 r slm_min_issue_delay + 27244 r bdver1_fp_min_issue_delay + 28199 t internal_min_issue_delay(int, DFA_chip*) + 28362 t internal_state_transition(int, DFA_chip*) + 28518 r glm_check + 28518 r glm_transitions + 33690 r geode_min_issue_delay + 45436 r znver4_fpu_min_issue_delay + 46980 r bdver3_fp_min_issue_delay + 49428 r glm_min_issue_delay + 53730 r btver2_fp_min_issue_delay + 53760 r znver1_fp_transitions + 89414 r c86_4g_m7_ieu_transitions + 93960 r bdver3_fp_transitions + 181744 r znver4_fpu_transitions + +Based on random sampling of SPEC2017 benchmarks 525.x264_r and +521.wrf_r, I verified that the new modeling introduces no +significant compilation overhead. Testing with a single job on a +c86-4g-m7 machine revealed no impact on x264 and a tiny increase +for wrf (~0.3%). + +[1] https://gcc.gnu.org/pipermail/gcc-patches/2026-May/716681.html + +gcc/ChangeLog: + + * config/i386/c86-4g-m7.md (c86_4g_m7_fpu): Remove automaton. + (c86_4g_m7_fpu02): New automaton. + (c86_4g_m7_fpu13): Ditto. + (c86-4g-m7-fpu0): Move to c86_4g_m7_fpu02 automaton. + (c86-4g-m7-fpu1): Move to c86_4g_m7_fpu13 automaton. + (c86-4g-m7-fpu2): Move to c86_4g_m7_fpu02 automaton. + (c86-4g-m7-fpu3): Move to c86_4g_m7_fpu13 automaton. + (c86-4g-m7-fdiv): Remove cpu unit. + (c86-4g-m7-fdiv1): New cpu unit. + (c86-4g-m7-fdiv3): Ditto. + (c86-4g-m7-fpu_0_3): New reservation. + (c86-4g-m7-fpu_1_3x2): Ditto. + (c86-4g-m7-fpu_1_3x3): Ditto. + (c86-4g-m7-fpu_1_3x6): Ditto. + (c86-4g-m7-fpux2): Ditto. + (c86-4g-m7-fpux4): Ditto. + (c86-4g-m7-fpux6): Ditto. + (c86-4g-m7-fpux8): Ditto. + (c86-4g-m7-fpux16): Ditto. + (c86-4g-m7-fp1fdiv1x4): Ditto. + (c86-4g-m7-fp3fdiv3x4): Ditto. + (c86-4g-m7-fdiv13): Ditto. + (c86-4g-m7-fp13div13): Ditto. + (c86-4g-m7-fp13div13x4): Ditto. + (c86-4g-m7-fp1div1_fp3div3_x4x8): Ditto. + (c86-4g-m7-fp1div1_fp3div3_x4x9): Ditto. + (c86-4g-m7-fp1div1_fp3div3_x4x11): Ditto. + (c86-4g-m7-fp1div1_fp3div3_x4x15): Ditto. + (c86-4g-m7-fp1div1_fp3div3_x4x18): Ditto. + (c86_4g_m7_idiv): New reservation. + (c86_4g_m7_idiv_QI): Adjust reservation latency and unit occupancy. + (c86_4g_m7_idiv_load): New reservation. + (c86_4g_m7_idiv_QI_load): Adjust reservation latency and unit + occupancy. + (c86_4g_m7_idiv_DI): Remove reservation. + (c86_4g_m7_idiv_SI): Ditto. + (c86_4g_m7_idiv_HI): Ditto. + (c86_4g_m7_idiv_DI_load): Ditto. + (c86_4g_m7_idiv_SI_load): Ditto. + (c86_4g_m7_idiv_HI_load): Ditto. + (c86_4g_m7_sse_insertimm): Adjust reservation units and unit + occupancy. + (c86_4g_m7_sse_insert): Ditto. + (c86_4g_m7_fp_sqrt): Adjust reservation. + (c86_4g_m7_fp_div): Ditto. + (c86_4g_m7_fp_div_load): Ditto. + (c86_4g_m7_fp_idiv_load): Ditto. + (c86_4g_m7_sse_pinsr_reg): Adjust reservation units and unit + occupancy. + (c86_4g_m7_sse_pinsr_reg_load): Ditto. + (c86_4g_m7_avx_vpinsr_reg): Ditto. + (c86_4g_m7_avx_vpinsr_reg_load): Ditto. + (c86_4g_m7_avx512_perm_xmm): Delete the prefix condition. + (c86_4g_m7_avx512_perm_xmm_opload): Ditto. + (c86_4g_m7_avx512_permi2_ymm): Adjust reservation units and unit + occupancy. + (c86_4g_m7_avx512_permi2_zmm): Ditto. + (c86_4g_m7_avx512_permi2_ymm_load): Ditto. + (c86_4g_m7_avx512_permi2_zmm_load): Ditto. + (c86_4g_m7_avx512_perm_zmm_imm): Ditto. + (c86_4g_m7_avx512_perm_zmm_imm_load): Ditto. + (c86_4g_m7_avx512_perm_zmm_noimm): Ditto. + (c86_4g_m7_sse_perm_zmm_noimm_load): Ditto. + (c86_4g_m7_avx_perm_ymm): Remove. + (c86_4g_m7_avx_perm_ymem): Ditto. + (c86_4g_m7_avx512_shuf_zmm): Adjust reservation units and unit + occupancy. + (c86_4g_m7_avx512_shuf_zmem): Ditto. + (c86_4g_m7_avx512_cmpestr): Ditto. + (c86_4g_m7_avx512_cmpestr_load): Ditto. + (c86_4g_m7_avx512_vdbpsadbw_zmm): Ditto. + (c86_4g_m7_avx512_vdbpsadbw_zmem): Ditto. + (c86_4g_m7_avx_ssecomi_comi): Ditto. + (c86_4g_m7_avx_ssecomi_comi_load): Ditto. + (c86_4g_m7_avx512_expand): Ditto. + (c86_4g_m7_avx512_expand_load): Ditto. + (c86_4g_m7_avx512_expand_z): Ditto. + (c86_4g_m7_avx512_expand_z_load): Ditto. + (c86_4g_m7_sse_movnt_xy): Rename to c86_4g_m7_sse_movnt. + (c86_4g_m7_avx512_sseadd_xy): Adjust reservation units. + (c86_4g_m7_avx512_sseadd_xy_load): Ditto. + (c86_4g_m7_sse_sseiadd_hplus): Adjust reservation units and unit + occupancy. + (c86_4g_m7_sse_sseiadd_hplus_load): Ditto. + (c86_4g_m7_avx512_ssemul): Adjust reservation units. + (c86_4g_m7_avx512_ssemul_load): Ditto. + (c86_4g_m7_avx512_ssediv): Remove. + (c86_4g_m7_avx512_ssediv_mem): Remove. + (c86_4g_m7_avx512_ssediv_x): New. + (c86_4g_m7_avx512_ssediv_xmem): New. + (c86_4g_m7_avx512_ssediv_y): New. + (c86_4g_m7_avx512_ssediv_ymem): New. + (c86_4g_m7_avx512_ssediv_z): Adjust reservation units. + (c86_4g_m7_avx512_ssediv_zmem): Ditto. + (c86_4g_m7_avx512_ssecmp_z): Add reservation units and unit + occupancy. + (c86_4g_m7_avx512_ssecmp_z_load): Ditto. + (c86_4g_m7_avx512_ssecmp_vp_z): New reservation. + (c86_4g_m7_avx512_ssecmp_vp_z_load): Ditto. + (c86_4g_m7_avx512_ssecmp_test_z): Remove reservation. + (c86_4g_m7_avx512_ssecmp_test_z_load): Ditto. + (c86_4g_m7_avx512_muladd): Broaden matching condition. + (c86_4g_m7_avx512_muladd_load): Ditto. + (c86_4g_m7_fma_muladd): Remove reservation. + (c86_4g_m7_fma_muladd_load): Ditto. + (c86_4g_m7_avx512_sse_conflict_x): Add reservation units and unit + occupancy. + (c86_4g_m7_avx512_sse_conflict_x_load): Ditto. + (c86_4g_m7_avx512_sse_conflict_y): Ditto. + (c86_4g_m7_avx512_sse_conflict_y_load): Ditto. + (c86_4g_m7_avx512_sse_conflict_z): Ditto. + (c86_4g_m7_avx512_sse_conflict_z_load): Ditto. + (c86_4g_m7_avx512_sse_class_z): Add reservation units and unit + occupancy. + (c86_4g_m7_avx512_sse_class_z_load): Ditto. + (c86_4g_m7_avx512_sse_sqrt): Remove. + (c86_4g_m7_avx512_sse_sqrt_load): Remove. + (c86_4g_m7_avx512_sse_sqrt_sf_x): New. + (c86_4g_m7_avx512_sse_sqrt_sf_xload): New. + (c86_4g_m7_avx512_sse_sqrt_sf_y): New. + (c86_4g_m7_avx512_sse_sqrt_sf_yload): New. + (c86_4g_m7_avx512_sse_sqrt_sf_z): New. + (c86_4g_m7_avx512_sse_sqrt_sf_zload): New. + (c86_4g_m7_avx512_sse_sqrt_df_x): New. + (c86_4g_m7_avx512_sse_sqrt_df_xload): New. + (c86_4g_m7_avx512_sse_sqrt_df_y): New. + (c86_4g_m7_avx512_sse_sqrt_df_yload): New. + (c86_4g_m7_avx512_sse_sqrt_df_z): New. + (c86_4g_m7_avx512_sse_sqrt_df_zload): New. + (c86_4g_m7_avx512_msklog_vector): Add reservation units and unit + occupancy. + (c86_4g_m7_avx512_mskmov_z_k): Ditto. + (c86_4g_m7_avx512_mskmov_k_reg): Ditto. + * config/i386/c86-4g.md (c86_4g_fp): Remove automaton. + (c86_4g_fp024): New automaton. + (c86_4g_fp1): Ditto. + (c86-4g-fp0): Move to c86_4g_fp024 automaton. + (c86-4g-fp1): Move to c86_4g_fp1 automaton. + (c86-4g-fp2): Move to c86_4g_fp024 automaton. + (c86-4g-fp3): Ditto. + (c86-4g-fp1fdivx4): New reservation. + (c86_4g_fp_sqrt): Adjust reservation. + (c86_4g_sse_sqrt_sf): Ditto. + (c86_4g_sse_sqrt_sf_mem): Ditto. + (c86_4g_sse_sqrt_df): Ditto. + (c86_4g_sse_sqrt_df_mem): Ditto. + (c86_4g_fp_op_div): Ditto. + (c86_4g_fp_op_div_load): Ditto. + (c86_4g_fp_op_idiv_load): Adjust reservation latency. + (c86_4g_ssediv_ss_ps): Adjust reservation. + (c86_4g_ssediv_ss_ps_load): Ditto. + (c86_4g_ssediv_sd_pd): Ditto. + (c86_4g_ssediv_sd_pd_load): Ditto. + (c86_4g_ssediv_avx256_ps): Ditto. + (c86_4g_ssediv_avx256_ps_load): Ditto. + (c86_4g_ssediv_avx256_pd): Ditto. + (c86_4g_ssediv_avx256_pd_load): Ditto. + +Co-authored-by: Xin Liu +Signed-off-by: Xin Liu +Signed-off-by: Kewen Lin +--- + gcc/config/i386/c86-4g-m7.md | 412 ++++++++++++++++++++--------------- + gcc/config/i386/c86-4g.md | 61 +++--- + 2 files changed, 270 insertions(+), 203 deletions(-) + +diff --git a/gcc/config/i386/c86-4g-m7.md b/gcc/config/i386/c86-4g-m7.md +index 3a1f18337b6..0fd8422422b 100644 +--- a/gcc/config/i386/c86-4g-m7.md ++++ b/gcc/config/i386/c86-4g-m7.md +@@ -20,8 +20,10 @@ + ;; HYGON c86-4g-m7 Scheduling + ;; Modeling automatons for decoders, integer execution pipes, + ;; AGU pipes, branch, floating point execution, fp store units, +-;; integer and floating point dividers. +-(define_automaton "c86_4g_m7, c86_4g_m7_ieu, c86_4g_m7_agu, c86_4g_m7_fpu, c86_4g_m7_idiv, c86_4g_m7_fdiv") ++;; integer and floating point dividers. Split fpu1 and fpu3 ++;; into their own automata to keep these units independent ++;; without increasing the main c86_4g_m7_fpu state space. ++(define_automaton "c86_4g_m7, c86_4g_m7_ieu, c86_4g_m7_agu, c86_4g_m7_fpu02, c86_4g_m7_fpu13, c86_4g_m7_idiv, c86_4g_m7_fdiv") + + ;; Decoders unit has 4 decoders and all of them can decode fast path + ;; and vector type instructions. +@@ -30,10 +32,6 @@ (define_cpu_unit "c86-4g-m7-decode1" "c86_4g_m7") + (define_cpu_unit "c86-4g-m7-decode2" "c86_4g_m7") + (define_cpu_unit "c86-4g-m7-decode3" "c86_4g_m7") + +-;; Two separated dividers for int and fp. +-(define_cpu_unit "c86-4g-m7-idiv" "c86_4g_m7_idiv") +-(define_cpu_unit "c86-4g-m7-fdiv" "c86_4g_m7_fdiv") +- + ;; Currently blocking all decoders for vector path instructions as + ;; they are dispatched separetely as microcode sequence. + (define_reservation "c86-4g-m7-vector" "c86-4g-m7-decode0+c86-4g-m7-decode1+c86-4g-m7-decode2+c86-4g-m7-decode3") +@@ -50,6 +48,9 @@ (define_cpu_unit "c86-4g-m7-ieu1" "c86_4g_m7_ieu") + (define_cpu_unit "c86-4g-m7-ieu2" "c86_4g_m7_ieu") + (define_cpu_unit "c86-4g-m7-ieu3" "c86_4g_m7_ieu") + ++;; One separated integer divider. ++(define_cpu_unit "c86-4g-m7-idiv" "c86_4g_m7_idiv") ++ + ;; c86-4g-m7 has an additional branch unit. + (define_cpu_unit "c86-4g-m7-bru0" "c86_4g_m7_ieu") + (define_reservation "c86-4g-m7-ieu" "c86-4g-m7-ieu0|c86-4g-m7-ieu1|c86-4g-m7-ieu2|c86-4g-m7-ieu3") +@@ -67,23 +68,48 @@ (define_reservation "c86-4g-m7-store" "c86-4g-m7-agu-reserve") + ;; vectorpath (microcoded) instructions are single issue instructions. + ;; So, they occupy all the integer units. + (define_reservation "c86-4g-m7-ivector" "c86-4g-m7-ieu0+c86-4g-m7-ieu1 +- +c86-4g-m7-ieu2+c86-4g-m7-ieu3+c86-4g-m7-bru0 +- +c86-4g-m7-agu0+c86-4g-m7-agu1+c86-4g-m7-agu2") ++ +c86-4g-m7-ieu2+c86-4g-m7-ieu3+c86-4g-m7-bru0 ++ +c86-4g-m7-agu0+c86-4g-m7-agu1+c86-4g-m7-agu2") + + ;; Floating point unit 4 FP pipes. +-(define_cpu_unit "c86-4g-m7-fpu0" "c86_4g_m7_fpu") +-(define_cpu_unit "c86-4g-m7-fpu1" "c86_4g_m7_fpu") +-(define_cpu_unit "c86-4g-m7-fpu2" "c86_4g_m7_fpu") +-(define_cpu_unit "c86-4g-m7-fpu3" "c86_4g_m7_fpu") ++(define_cpu_unit "c86-4g-m7-fpu0" "c86_4g_m7_fpu02") ++(define_cpu_unit "c86-4g-m7-fpu1" "c86_4g_m7_fpu13") ++(define_cpu_unit "c86-4g-m7-fpu2" "c86_4g_m7_fpu02") ++(define_cpu_unit "c86-4g-m7-fpu3" "c86_4g_m7_fpu13") ++ + (define_reservation "c86-4g-m7-fpu" "c86-4g-m7-fpu0|c86-4g-m7-fpu1|c86-4g-m7-fpu2|c86-4g-m7-fpu3") +-(define_reservation "c86-4g-m7-fpu_0_2" "c86-4g-m7-fpu0|c86-4g-m7-fpu2") +-(define_reservation "c86-4g-m7-fpu_1_3" "c86-4g-m7-fpu1|c86-4g-m7-fpu3") + (define_reservation "c86-4g-m7-fpu_0_1" "c86-4g-m7-fpu0|c86-4g-m7-fpu1") ++(define_reservation "c86-4g-m7-fpu_0_2" "c86-4g-m7-fpu0|c86-4g-m7-fpu2") + (define_reservation "c86-4g-m7-fpu_0_2x2" "c86-4g-m7-fpu0*2|c86-4g-m7-fpu2*2") + (define_reservation "c86-4g-m7-fpu_0_2x4" "c86-4g-m7-fpu0*4|c86-4g-m7-fpu2*4") ++(define_reservation "c86-4g-m7-fpu_0_3" "c86-4g-m7-fpu0|c86-4g-m7-fpu3") ++(define_reservation "c86-4g-m7-fpu_1_3" "c86-4g-m7-fpu1|c86-4g-m7-fpu3") ++(define_reservation "c86-4g-m7-fpu_1_3x2" "c86-4g-m7-fpu1*2|c86-4g-m7-fpu3*2") ++(define_reservation "c86-4g-m7-fpu_1_3x3" "c86-4g-m7-fpu1*3|c86-4g-m7-fpu3*3") ++(define_reservation "c86-4g-m7-fpu_1_3x6" "c86-4g-m7-fpu1*6|c86-4g-m7-fpu3*6") ++(define_reservation "c86-4g-m7-fpux2" "c86-4g-m7-fpu0*2|c86-4g-m7-fpu1*2|c86-4g-m7-fpu2*2|c86-4g-m7-fpu3*2") ++(define_reservation "c86-4g-m7-fpux4" "c86-4g-m7-fpu0*4|c86-4g-m7-fpu1*4|c86-4g-m7-fpu2*4|c86-4g-m7-fpu3*4") ++(define_reservation "c86-4g-m7-fpux8" "c86-4g-m7-fpu0*8|c86-4g-m7-fpu1*8|c86-4g-m7-fpu2*8|c86-4g-m7-fpu3*8") ++(define_reservation "c86-4g-m7-fpux6" "c86-4g-m7-fpu0*6|c86-4g-m7-fpu1*6|c86-4g-m7-fpu2*6|c86-4g-m7-fpu3*6") ++(define_reservation "c86-4g-m7-fpux16" "c86-4g-m7-fpu0*16|c86-4g-m7-fpu1*16|c86-4g-m7-fpu2*16|c86-4g-m7-fpu3*16") + (define_reservation "c86-4g-m7-fvector" "c86-4g-m7-fpu0+c86-4g-m7-fpu1 +- +c86-4g-m7-fpu2+c86-4g-m7-fpu3 +- +c86-4g-m7-agu0+c86-4g-m7-agu1+c86-4g-m7-agu2") ++ +c86-4g-m7-fpu2+c86-4g-m7-fpu3 ++ +c86-4g-m7-agu0+c86-4g-m7-agu1+c86-4g-m7-agu2") ++ ++;; Two FP dividers. ++(define_cpu_unit "c86-4g-m7-fdiv1" "c86_4g_m7_fdiv") ++(define_cpu_unit "c86-4g-m7-fdiv3" "c86_4g_m7_fdiv") ++ ++(define_reservation "c86-4g-m7-fp1fdiv1x4" "(c86-4g-m7-fpu1+c86-4g-m7-fdiv1)*4") ++(define_reservation "c86-4g-m7-fp3fdiv3x4" "(c86-4g-m7-fpu3+c86-4g-m7-fdiv3)*4") ++(define_reservation "c86-4g-m7-fdiv13" "(c86-4g-m7-fdiv1+c86-4g-m7-fdiv3)") ++(define_reservation "c86-4g-m7-fp13div13" "(c86-4g-m7-fpu1+c86-4g-m7-fpu3+c86-4g-m7-fdiv1+c86-4g-m7-fdiv3)") ++(define_reservation "c86-4g-m7-fp13div13x4" "c86-4g-m7-fp13div13*4") ++(define_reservation "c86-4g-m7-fp1div1_fp3div3_x4x8" "(c86-4g-m7-fp1fdiv1x4,c86-4g-m7-fdiv1*8)|(c86-4g-m7-fp3fdiv3x4,c86-4g-m7-fdiv3*8)") ++(define_reservation "c86-4g-m7-fp1div1_fp3div3_x4x9" "(c86-4g-m7-fp1fdiv1x4,c86-4g-m7-fdiv1*9)|(c86-4g-m7-fp3fdiv3x4,c86-4g-m7-fdiv3*9)") ++(define_reservation "c86-4g-m7-fp1div1_fp3div3_x4x11" "(c86-4g-m7-fp1fdiv1x4,c86-4g-m7-fdiv1*11)|(c86-4g-m7-fp3fdiv3x4,c86-4g-m7-fdiv3*11)") ++(define_reservation "c86-4g-m7-fp1div1_fp3div3_x4x15" "(c86-4g-m7-fp1fdiv1x4,c86-4g-m7-fdiv1*15)|(c86-4g-m7-fp3fdiv3x4,c86-4g-m7-fdiv3*15)") ++(define_reservation "c86-4g-m7-fp1div1_fp3div3_x4x18" "(c86-4g-m7-fp1fdiv1x4,c86-4g-m7-fdiv1*18)|(c86-4g-m7-fp3fdiv3x4,c86-4g-m7-fdiv3*18)") + + ;; IMOV/IMOVX + (define_insn_reservation "c86_4g_m7_imov_xchg" 1 +@@ -168,61 +194,33 @@ (define_insn_reservation "c86_4g_m7_imul_load" 7 + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-ieu1") + + ;; IDIV +-(define_insn_reservation "c86_4g_m7_idiv_DI" 41 +- (and (eq_attr "cpu" "c86_4g_m7") +- (and (eq_attr "type" "idiv") +- (and (eq_attr "mode" "DI") +- (eq_attr "memory" "none")))) +- "c86-4g-m7-double,c86-4g-m7-ieu3,c86-4g-m7-idiv*41") +- +-(define_insn_reservation "c86_4g_m7_idiv_SI" 25 +- (and (eq_attr "cpu" "c86_4g_m7") +- (and (eq_attr "type" "idiv") +- (and (eq_attr "mode" "SI") +- (eq_attr "memory" "none")))) +- "c86-4g-m7-double,c86-4g-m7-ieu3,c86-4g-m7-idiv*25") +- +-(define_insn_reservation "c86_4g_m7_idiv_HI" 17 ++(define_insn_reservation "c86_4g_m7_idiv" 7 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "idiv") +- (and (eq_attr "mode" "HI") ++ (and (eq_attr "mode" "!QI") + (eq_attr "memory" "none")))) +- "c86-4g-m7-double,c86-4g-m7-ieu3,c86-4g-m7-idiv*17") ++ "c86-4g-m7-double,c86-4g-m7-ieu3,c86-4g-m7-idiv*7") + +-(define_insn_reservation "c86_4g_m7_idiv_QI" 15 ++(define_insn_reservation "c86_4g_m7_idiv_QI" 6 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "QI") + (eq_attr "memory" "none")))) +- "c86-4g-m7-direct,c86-4g-m7-ieu3,c86-4g-m7-idiv*15") +- +-(define_insn_reservation "c86_4g_m7_idiv_DI_load" 45 +- (and (eq_attr "cpu" "c86_4g_m7") +- (and (eq_attr "type" "idiv") +- (and (eq_attr "mode" "DI") +- (eq_attr "memory" "load")))) +- "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-ieu3,c86-4g-m7-idiv*41") +- +-(define_insn_reservation "c86_4g_m7_idiv_SI_load" 29 +- (and (eq_attr "cpu" "c86_4g_m7") +- (and (eq_attr "type" "idiv") +- (and (eq_attr "mode" "SI") +- (eq_attr "memory" "load")))) +- "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-ieu3,c86-4g-m7-idiv*25") ++ "c86-4g-m7-double,c86-4g-m7-ieu3,c86-4g-m7-idiv*6") + +-(define_insn_reservation "c86_4g_m7_idiv_HI_load" 21 ++(define_insn_reservation "c86_4g_m7_idiv_load" 11 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "idiv") +- (and (eq_attr "mode" "HI") ++ (and (eq_attr "mode" "!QI") + (eq_attr "memory" "load")))) +- "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-ieu3,c86-4g-m7-idiv*17") ++ "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-ieu3,c86-4g-m7-idiv*7") + +-(define_insn_reservation "c86_4g_m7_idiv_QI_load" 19 ++(define_insn_reservation "c86_4g_m7_idiv_QI_load" 10 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "QI") + (eq_attr "memory" "load")))) +- "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-ieu3,c86-4g-m7-idiv*15") ++ "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-ieu3,c86-4g-m7-idiv*6") + + ;; Integer/genaral Instructions + (define_insn_reservation "c86_4g_m7_insn" 1 +@@ -385,14 +383,14 @@ (define_insn_reservation "c86_4g_m7_sse_insertimm" 3 + (and (eq_attr "type" "sseins") + (and (eq_attr "memory" "none") + (eq_attr "length_immediate" "2")))) +- "c86-4g-m7-double,c86-4g-m7-fpu0|c86-4g-m7-fpu3,c86-4g-m7-fpu1") ++ "c86-4g-m7-double,c86-4g-m7-fpu_0_3,c86-4g-m7-fpu1") + + (define_insn_reservation "c86_4g_m7_sse_insert" 3 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sseins") + (and (eq_attr "memory" "none") + (eq_attr "length_immediate" "!2")))) +- "c86-4g-m7-direct,c86-4g-m7-fpu1") ++ "c86-4g-m7-direct,c86-4g-m7-fpu1*2") + + ;; FCMOV + (define_insn_reservation "c86_4g_m7_fp_cmov" 4 +@@ -444,7 +442,7 @@ (define_insn_reservation "c86_4g_m7_fp_sqrt" 22 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "fpspc") + (eq_attr "c86_attr" "sqrt"))) +- "c86-4g-m7-direct,c86-4g-m7-fpu1,c86-4g-m7-fdiv*22") ++ "c86-4g-m7-direct,c86-4g-m7-fp1div1_fp3div3_x4x18") + + ;; FPSPC + (define_insn_reservation "c86_4g_m7_fp_spc_direct" 5 +@@ -487,21 +485,21 @@ (define_insn_reservation "c86_4g_m7_fp_div" 15 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "fdiv") + (eq_attr "memory" "none"))) +- "c86-4g-m7-direct,c86-4g-m7-fpu1,c86-4g-m7-fdiv*15") ++ "c86-4g-m7-direct,c86-4g-m7-fp1div1_fp3div3_x4x11") + + (define_insn_reservation "c86_4g_m7_fp_div_load" 22 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "fdiv") + (and (eq_attr "fp_int_src" "false") + (eq_attr "memory" "!none")))) +- "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu1,c86-4g-m7-fdiv*15") ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fp1div1_fp3div3_x4x11") + + (define_insn_reservation "c86_4g_m7_fp_idiv_load" 26 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "fdiv") + (and (eq_attr "fp_int_src" "true") + (eq_attr "memory" "!none")))) +- "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu1,c86-4g-m7-fdiv*15") ++ "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu1*4,c86-4g-m7-fp1div1_fp3div3_x4x11") + + (define_insn_reservation "c86_4g_m7_fp_fsgn" 1 + (and (eq_attr "cpu" "c86_4g_m7") +@@ -634,7 +632,7 @@ (define_insn_reservation "c86_4g_m7_sse_pinsr_reg" 1 + (and (eq_attr "c86_attr" "insr") + (and (eq_attr "prefix" "orig") + (eq_attr "memory" "none"))))) +- "c86-4g-m7-double,c86-4g-m7-ieu2,c86-4g-m7-fpu_0_1") ++ "c86-4g-m7-double,c86-4g-m7-ieu2,c86-4g-m7-fpu") + + (define_insn_reservation "c86_4g_m7_sse_pinsr_reg_load" 3 + (and (eq_attr "cpu" "c86_4g_m7") +@@ -642,7 +640,7 @@ (define_insn_reservation "c86_4g_m7_sse_pinsr_reg_load" 3 + (and (eq_attr "c86_attr" "insr") + (and (eq_attr "prefix" "orig") + (eq_attr "memory" "load"))))) +- "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_1") ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu") + + (define_insn_reservation "c86_4g_m7_avx_vpinsr_reg" 2 + (and (eq_attr "cpu" "c86_4g_m7") +@@ -650,7 +648,7 @@ (define_insn_reservation "c86_4g_m7_avx_vpinsr_reg" 2 + (and (eq_attr "c86_attr" "insr") + (and (eq_attr "prefix" "!orig") + (eq_attr "memory" "none"))))) +- "c86-4g-m7-double,c86-4g-m7-fpu2*2") ++ "c86-4g-m7-double,c86-4g-m7-fpu_1_3x2") + + (define_insn_reservation "c86_4g_m7_avx_vpinsr_reg_load" 8 + (and (eq_attr "cpu" "c86_4g_m7") +@@ -658,7 +656,7 @@ (define_insn_reservation "c86_4g_m7_avx_vpinsr_reg_load" 8 + (and (eq_attr "c86_attr" "insr") + (and (eq_attr "prefix" "!orig") + (eq_attr "memory" "load"))))) +- "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu1|c86-4g-m7-fpu2|c86-4g-m7-fpu3") ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_1_3") + + ;; PERM + (define_insn_reservation "c86_4g_m7_avx512_perm_xmm" 3 +@@ -668,8 +666,7 @@ (define_insn_reservation "c86_4g_m7_avx512_perm_xmm" 3 + (eq_attr "mode" "V4SF,V2DF,TI")) + (and (eq_attr "c86_attr" "perm") + (eq_attr "mode" "V8SF,V4DF,TI,OI"))) +- (and (eq_attr "prefix" "evex") +- (eq_attr "memory" "none"))))) ++ (eq_attr "memory" "none")))) + "c86-4g-m7-direct,c86-4g-m7-fpu_0_2x2") + + (define_insn_reservation "c86_4g_m7_avx512_perm_xmm_opload" 10 +@@ -679,8 +676,7 @@ (define_insn_reservation "c86_4g_m7_avx512_perm_xmm_opload" 10 + (eq_attr "mode" "V4SF,V2DF,TI")) + (and (eq_attr "c86_attr" "perm") + (eq_attr "mode" "V8SF,V4DF,TI,OI"))) +- (and (eq_attr "prefix" "evex") +- (eq_attr "memory" "load"))))) ++ (eq_attr "memory" "load")))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_2x2") + + (define_insn_reservation "c86_4g_m7_avx512_permi2_ymm" 4 +@@ -689,7 +685,7 @@ (define_insn_reservation "c86_4g_m7_avx512_permi2_ymm" 4 + (and (eq_attr "c86_attr" "perm2") + (and (eq_attr "mode" "V8SF,V4DF,OI") + (eq_attr "memory" "none"))))) +- "c86-4g-m7-vector") ++ "c86-4g-m7-vector,c86-4g-m7-fpux4") + + (define_insn_reservation "c86_4g_m7_avx512_permi2_zmm" 16 + (and (eq_attr "cpu" "c86_4g_m7") +@@ -697,7 +693,7 @@ (define_insn_reservation "c86_4g_m7_avx512_permi2_zmm" 16 + (and (eq_attr "c86_attr" "perm2") + (and (eq_attr "mode" "V16SF,V8DF,XI") + (eq_attr "memory" "none"))))) +- "c86-4g-m7-vector") ++ "c86-4g-m7-vector,c86-4g-m7-fpux16") + + (define_insn_reservation "c86_4g_m7_avx512_permi2_ymm_load" 11 + (and (eq_attr "cpu" "c86_4g_m7") +@@ -705,7 +701,7 @@ (define_insn_reservation "c86_4g_m7_avx512_permi2_ymm_load" 11 + (and (eq_attr "c86_attr" "perm2") + (and (eq_attr "mode" "V8SF,V4DF,OI") + (eq_attr "memory" "load"))))) +- "c86-4g-m7-vector,c86-4g-m7-load") ++ "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-fpux4") + + (define_insn_reservation "c86_4g_m7_avx512_permi2_zmm_load" 23 + (and (eq_attr "cpu" "c86_4g_m7") +@@ -713,7 +709,7 @@ (define_insn_reservation "c86_4g_m7_avx512_permi2_zmm_load" 23 + (and (eq_attr "c86_attr" "perm2") + (and (eq_attr "mode" "V16SF,V8DF,XI") + (eq_attr "memory" "load"))))) +- "c86-4g-m7-vector,c86-4g-m7-load") ++ "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-fpux16") + + (define_insn_reservation "c86_4g_m7_avx512_perm_zmm_imm" 4 + (and (eq_attr "cpu" "c86_4g_m7") +@@ -722,7 +718,7 @@ (define_insn_reservation "c86_4g_m7_avx512_perm_zmm_imm" 4 + (and (eq_attr "mode" "V16SF,V8DF,XI") + (and (match_operand 2 "immediate_operand") + (eq_attr "memory" "none")))))) +- "c86-4g-m7-direct,c86-4g-m7-fpu_0_2x4") ++ "c86-4g-m7-direct,c86-4g-m7-fpux4") + + (define_insn_reservation "c86_4g_m7_avx512_perm_zmm_imm_load" 11 + (and (eq_attr "cpu" "c86_4g_m7") +@@ -731,7 +727,7 @@ (define_insn_reservation "c86_4g_m7_avx512_perm_zmm_imm_load" 11 + (and (eq_attr "mode" "V16SF,V8DF,XI") + (and (match_operand 2 "immediate_operand") + (eq_attr "memory" "load")))))) +- "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_2x4") ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpux4") + + (define_insn_reservation "c86_4g_m7_avx512_perm_zmm_noimm" 8 + (and (eq_attr "cpu" "c86_4g_m7") +@@ -740,7 +736,7 @@ (define_insn_reservation "c86_4g_m7_avx512_perm_zmm_noimm" 8 + (and (eq_attr "mode" "V16SF,V8DF,XI") + (and (match_operand 2 "nonimmediate_operand") + (eq_attr "memory" "none")))))) +- "c86-4g-m7-vector") ++ "c86-4g-m7-vector,c86-4g-m7-fpux8") + + (define_insn_reservation "c86_4g_m7_sse_perm_zmm_noimm_load" 15 + (and (eq_attr "cpu" "c86_4g_m7") +@@ -749,23 +745,7 @@ (define_insn_reservation "c86_4g_m7_sse_perm_zmm_noimm_load" 15 + (and (eq_attr "mode" "V16SF,V8DF,XI") + (and (match_operand 2 "nonimmediate_operand") + (eq_attr "memory" "load")))))) +- "c86-4g-m7-vector,c86-4g-m7-load") +- +-(define_insn_reservation "c86_4g_m7_avx_perm_ymm" 3 +- (and (eq_attr "cpu" "c86_4g_m7") +- (and (eq_attr "type" "sselog") +- (and (eq_attr "c86_attr" "perm") +- (and (eq_attr "prefix" "!evex") +- (eq_attr "memory" "none"))))) +- "c86-4g-m7-vector") +- +-(define_insn_reservation "c86_4g_m7_avx_perm_ymem" 10 +- (and (eq_attr "cpu" "c86_4g_m7") +- (and (eq_attr "type" "sselog") +- (and (eq_attr "c86_attr" "perm") +- (and (eq_attr "prefix" "!evex") +- (eq_attr "memory" "load"))))) +- "c86-4g-m7-vector,c86-4g-m7-load") ++ "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-fpux8") + + ;; VINSERT + (define_insn_reservation "c86_4g_m7_avx512_insertx_ymm" 3 +@@ -853,7 +833,7 @@ (define_insn_reservation "c86_4g_m7_avx512_shuf_zmm" 4 + (and (eq_attr "c86_attr" "shufx") + (and (eq_attr "mode" "V8DF,V16SF,XI") + (eq_attr "memory" "none"))))) +- "c86-4g-m7-vector") ++ "c86-4g-m7-vector,c86-4g-m7-fpu_0_2x4") + + (define_insn_reservation "c86_4g_m7_avx512_shuf_xymem" 10 + (and (eq_attr "cpu" "c86_4g_m7") +@@ -869,7 +849,7 @@ (define_insn_reservation "c86_4g_m7_avx512_shuf_zmem" 11 + (and (eq_attr "c86_attr" "shufx") + (and (eq_attr "mode" "V8DF,V16SF,XI") + (eq_attr "memory" "load"))))) +- "c86-4g-m7-vector,c86-4g-m7-load") ++ "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-fpu_0_2x4") + + ;; SSELOGIC + (define_insn_reservation "c86_4g_m7_sselogic_xymm" 1 +@@ -892,14 +872,14 @@ (define_insn_reservation "c86_4g_m7_avx512_cmpestr" 6 + (and (eq_attr "type" "sselog") + (and (eq_attr "c86_attr" "cmpestr") + (eq_attr "memory" "none")))) +- "c86-4g-m7-vector") ++ "c86-4g-m7-vector,c86-4g-m7-fpux6") + + (define_insn_reservation "c86_4g_m7_avx512_cmpestr_load" 13 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog") + (and (eq_attr "c86_attr" "cmpestr") + (eq_attr "memory" "load")))) +- "c86-4g-m7-vector,c86-4g-m7-load") ++ "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-fpux6") + + ;; SSELOG + (define_insn_reservation "c86_4g_m7_avx512_log" 1 +@@ -940,7 +920,7 @@ (define_insn_reservation "c86_4g_m7_avx512_vdbpsadbw_zmm" 4 + (and (eq_attr "c86_attr" "sadbw") + (and (eq_attr "mode" "XI") + (eq_attr "memory" "none"))))) +- "c86-4g-m7-vector") ++ "c86-4g-m7-vector,c86-4g-m7-fpu_0_2,c86-4g-m7-fpu_1_3x2") + + (define_insn_reservation "c86_4g_m7_avx512_vdbpsadbw_zmem" 11 + (and (eq_attr "cpu" "c86_4g_m7") +@@ -948,7 +928,7 @@ (define_insn_reservation "c86_4g_m7_avx512_vdbpsadbw_zmem" 11 + (and (eq_attr "c86_attr" "sadbw") + (and (eq_attr "mode" "XI") + (eq_attr "memory" "load"))))) +- "c86-4g-m7-vector,c86-4g-m7-load") ++ "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-fpu_0_2,c86-4g-m7-fpu_1_3x2") + + ;; ABS + (define_insn_reservation "c86_4g_m7_avx512_abs" 1 +@@ -1052,14 +1032,14 @@ (define_insn_reservation "c86_4g_m7_avx_ssecomi_comi" 1 + (and (eq_attr "type" "ssecomi") + (and (eq_attr "prefix_extra" "0") + (eq_attr "memory" "none")))) +- "c86-4g-m7-double,c86-4g-m7-fpu2|c86-4g-m7-fpu3") ++ "c86-4g-m7-double,c86-4g-m7-fpu") + + (define_insn_reservation "c86_4g_m7_avx_ssecomi_comi_load" 8 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssecomi") + (and (eq_attr "prefix_extra" "0") + (eq_attr "memory" "load")))) +- "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu2|c86-4g-m7-fpu3") ++ "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu") + + (define_insn_reservation "c86_4g_m7_avx_ssecomi_test" 1 + (and (eq_attr "cpu" "c86_4g_m7") +@@ -1201,7 +1181,7 @@ (define_insn_reservation "c86_4g_m7_avx512_expand" 3 + (and (eq_attr "c86_attr" "expand,compress") + (and (not (eq_attr "mode" "XI,V16SF,V8DF")) + (eq_attr "memory" "none"))))) +- "c86-4g-m7-direct,c86-4g-m7-fpu3*2,c86-4g-m7-fpu1*2|c86-4g-m7-fpu3*2") ++ "c86-4g-m7-direct,c86-4g-m7-fpu3,c86-4g-m7-fpu_0_3") + + (define_insn_reservation "c86_4g_m7_avx512_expand_load" 10 + (and (eq_attr "cpu" "c86_4g_m7") +@@ -1209,7 +1189,7 @@ (define_insn_reservation "c86_4g_m7_avx512_expand_load" 10 + (and (eq_attr "c86_attr" "expand,compress") + (and (not (eq_attr "mode" "XI,V16SF,V8DF")) + (eq_attr "memory" "load"))))) +- "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu3*2,c86-4g-m7-fpu1*2|c86-4g-m7-fpu3*2") ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu3,c86-4g-m7-fpu_0_3") + + (define_insn_reservation "c86_4g_m7_avx512_expand_z" 10 + (and (eq_attr "cpu" "c86_4g_m7") +@@ -1217,7 +1197,7 @@ (define_insn_reservation "c86_4g_m7_avx512_expand_z" 10 + (and (eq_attr "c86_attr" "expand,compress") + (and (eq_attr "mode" "XI,V16SF,V8DF") + (eq_attr "memory" "none"))))) +- "c86-4g-m7-vector") ++ "c86-4g-m7-vector,c86-4g-m7-fpu3,c86-4g-m7-fpu_0_3") + + (define_insn_reservation "c86_4g_m7_avx512_expand_z_load" 17 + (and (eq_attr "cpu" "c86_4g_m7") +@@ -1225,7 +1205,7 @@ (define_insn_reservation "c86_4g_m7_avx512_expand_z_load" 17 + (and (eq_attr "c86_attr" "expand,compress") + (and (eq_attr "mode" "XI,V16SF,V8DF") + (eq_attr "memory" "load"))))) +- "c86-4g-m7-vector,c86-4g-m7-load") ++ "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-fpu3,c86-4g-m7-fpu_0_3") + + ;; MOVNT + (define_insn_reservation "c86_4g_m7_avx512_movnt_load" 8 +@@ -1252,7 +1232,7 @@ (define_insn_reservation "c86_4g_m7_sse_movnt_store" 4 + (eq_attr "memory" "!none"))))) + "c86-4g-m7-direct,c86-4g-m7-store,c86-4g-m7-fpu1") + +-(define_insn_reservation "c86_4g_m7_sse_movnt_xy" 4 ++(define_insn_reservation "c86_4g_m7_sse_movnt" 4 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssemov") + (and (eq_attr "c86_attr" "movnt") +@@ -1364,14 +1344,14 @@ (define_insn_reservation "c86_4g_m7_avx512_sseadd_xy" 3 + (and (eq_attr "type" "sseadd") + (and (eq_attr "c86_attr" "other") + (eq_attr "memory" "none")))) +- "c86-4g-m7-direct,c86-4g-m7-fpu3") ++ "c86-4g-m7-direct,c86-4g-m7-fpu_1_3") + + (define_insn_reservation "c86_4g_m7_avx512_sseadd_xy_load" 10 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sseadd") + (and (eq_attr "c86_attr" "other") + (eq_attr "memory" "load")))) +- "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu3") ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_1_3") + + ;; HADD/HSUB + (define_insn_reservation "c86_4g_m7_avx_sseadd_hplus" 7 +@@ -1494,7 +1474,7 @@ (define_insn_reservation "c86_4g_m7_sse_sseiadd_hplus" 3 + (and (eq_attr "c86_attr" "hplus") + (and (eq_attr "prefix" "orig") + (eq_attr "memory" "none"))))) +- "c86-4g-m7-vector,c86-4g-m7-fpu0*2") ++ "c86-4g-m7-vector,c86-4g-m7-fpux2") + + (define_insn_reservation "c86_4g_m7_sse_sseiadd_hplus_load" 10 + (and (eq_attr "cpu" "c86_4g_m7") +@@ -1502,49 +1482,63 @@ (define_insn_reservation "c86_4g_m7_sse_sseiadd_hplus_load" 10 + (and (eq_attr "c86_attr" "hplus") + (and (eq_attr "prefix" "orig") + (eq_attr "memory" "load"))))) +- "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-fpu0*2") ++ "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-fpux2") + + ;; SSEMUL + (define_insn_reservation "c86_4g_m7_avx512_ssemul" 3 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssemul") + (eq_attr "memory" "none"))) +- "c86-4g-m7-direct,c86-4g-m7-fpu0") ++ "c86-4g-m7-direct,c86-4g-m7-fpu_0_2") + + (define_insn_reservation "c86_4g_m7_avx512_ssemul_load" 10 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssemul") + (eq_attr "memory" "load"))) +- "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu0") ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_2") + + ;; SSEDIV +-(define_insn_reservation "c86_4g_m7_avx512_ssediv" 13 ++(define_insn_reservation "c86_4g_m7_avx512_ssediv_x" 13 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssediv") ++ (and (eq_attr "mode" "SF,DF,V4SF,V2DF") ++ (eq_attr "memory" "none")))) ++ "c86-4g-m7-direct,c86-4g-m7-fp1div1_fp3div3_x4x8") ++ ++(define_insn_reservation "c86_4g_m7_avx512_ssediv_xmem" 20 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssediv") ++ (and (eq_attr "mode" "SF,DF,V4SF,V2DF") ++ (eq_attr "memory" "load")))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fp1div1_fp3div3_x4x8") ++ ++(define_insn_reservation "c86_4g_m7_avx512_ssediv_y" 13 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssediv") +- (and (not (eq_attr "mode" "V16SF,V8DF")) ++ (and (eq_attr "mode" "V8SF,V4DF") + (eq_attr "memory" "none")))) +- "c86-4g-m7-direct,c86-4g-m7-fpu3,c86-4g-m7-fdiv*13") ++ "c86-4g-m7-direct,c86-4g-m7-fp13div13x4,c86-4g-m7-fdiv13*8") + +-(define_insn_reservation "c86_4g_m7_avx512_ssediv_mem" 20 ++(define_insn_reservation "c86_4g_m7_avx512_ssediv_ymem" 20 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssediv") +- (and (not (eq_attr "mode" "V16SF,V8DF")) ++ (and (eq_attr "mode" "V8SF,V4DF") + (eq_attr "memory" "load")))) +- "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu3,c86-4g-m7-fdiv*13") ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fp13div13x4,c86-4g-m7-fdiv13*8") + + (define_insn_reservation "c86_4g_m7_avx512_ssediv_z" 24 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssediv") + (and (eq_attr "mode" "V16SF,V8DF") + (eq_attr "memory" "none")))) +- "c86-4g-m7-double,c86-4g-m7-fpu3,c86-4g-m7-fdiv*24") ++ "c86-4g-m7-double,c86-4g-m7-fp13div13x4,c86-4g-m7-fdiv13*20") + + (define_insn_reservation "c86_4g_m7_avx512_ssediv_zmem" 31 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssediv") + (and (eq_attr "mode" "V16SF,V8DF") + (eq_attr "memory" "load")))) +- "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu3,c86-4g-m7-fdiv*24") ++ "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fp13div13x4,c86-4g-m7-fdiv13*20") + + ;; SSECMP + (define_insn_reservation "c86_4g_m7_avx512_ssecmp" 5 +@@ -1569,7 +1563,7 @@ (define_insn_reservation "c86_4g_m7_avx512_ssecmp_z" 5 + (and (eq_attr "mode" "V16SF,V8DF,XI") + (and (eq_attr "c86_attr" "other") + (eq_attr "memory" "none"))))) +- "c86-4g-m7-vector") ++ "c86-4g-m7-vector,c86-4g-m7-fpu_0_2,c86-4g-m7-fpu_1_3") + + (define_insn_reservation "c86_4g_m7_avx512_ssecmp_z_load" 12 + (and (eq_attr "cpu" "c86_4g_m7") +@@ -1577,7 +1571,7 @@ (define_insn_reservation "c86_4g_m7_avx512_ssecmp_z_load" 12 + (and (eq_attr "mode" "V16SF,V8DF,XI") + (and (eq_attr "c86_attr" "other") + (eq_attr "memory" "load"))))) +- "c86-4g-m7-vector,c86-4g-m7-load") ++ "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-fpu_0_2,c86-4g-m7-fpu_1_3x2") + + (define_insn_reservation "c86_4g_m7_avx512_ssecmp_vp" 5 + (and (eq_attr "cpu" "c86_4g_m7") +@@ -1597,6 +1591,24 @@ (define_insn_reservation "c86_4g_m7_avx512_ssecmp_vp_load" 12 + (eq_attr "memory" "load")))))) + "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu,c86-4g-m7-fpu_1_3") + ++(define_insn_reservation "c86_4g_m7_avx512_ssecmp_vp_z" 5 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssecmp") ++ (and (eq_attr "prefix" "evex") ++ (and (eq_attr "mode" "XI") ++ (and (eq_attr "c86_attr" "other,ptest") ++ (eq_attr "memory" "none")))))) ++ "c86-4g-m7-double,c86-4g-m7-fpu,c86-4g-m7-fpu_1_3") ++ ++(define_insn_reservation "c86_4g_m7_avx512_ssecmp_vp_z_load" 12 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "ssecmp") ++ (and (eq_attr "prefix" "evex") ++ (and (eq_attr "mode" "XI") ++ (and (eq_attr "c86_attr" "other,ptest") ++ (eq_attr "memory" "load")))))) ++ "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu,c86-4g-m7-fpu_1_3x2") ++ + (define_insn_reservation "c86_4g_m7_avx_ssecmp_vp" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssecmp") +@@ -1628,22 +1640,6 @@ (define_insn_reservation "c86_4g_m7_avx512_ssecmp_test_load" 13 + (eq_attr "memory" "load"))))) + "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu1,c86-4g-m7-fpu_1_3") + +-(define_insn_reservation "c86_4g_m7_avx512_ssecmp_test_z" 4 +- (and (eq_attr "cpu" "c86_4g_m7") +- (and (eq_attr "type" "ssecmp") +- (and (eq_attr "mode" "XI") +- (and (eq_attr "c86_attr" "ptest") +- (eq_attr "memory" "none"))))) +- "c86-4g-m7-vector") +- +-(define_insn_reservation "c86_4g_m7_avx512_ssecmp_test_z_load" 11 +- (and (eq_attr "cpu" "c86_4g_m7") +- (and (eq_attr "type" "ssecmp") +- (and (eq_attr "mode" "XI") +- (and (eq_attr "c86_attr" "ptest") +- (eq_attr "memory" "load"))))) +- "c86-4g-m7-vector,c86-4g-m7-load") +- + ;; SSECVT + (define_insn_reservation "c86_4g_m7_avx512_ssecvt_xy" 4 + (and (eq_attr "cpu" "c86_4g_m7") +@@ -1755,17 +1751,14 @@ (define_insn_reservation "c86_4g_m7_avx512_muladd" 4 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssemuladd") + (and (eq_attr "c86_attr" "other") +- (and (not (eq_attr "isa" "fma,fma4")) +- (eq_attr "mode" "V32HF,V16SF,V8DF,XI") +- (eq_attr "memory" "none"))))) ++ (eq_attr "memory" "none")))) + "c86-4g-m7-direct,c86-4g-m7-fpu_0_2") + + (define_insn_reservation "c86_4g_m7_avx512_muladd_load" 11 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssemuladd") + (and (eq_attr "c86_attr" "other") +- (and (not (eq_attr "isa" "fma,fma4")) +- (eq_attr "memory" "load"))))) ++ (eq_attr "memory" "load")))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_2") + + (define_insn_reservation "c86_4g_m7_avx512_muladd_madd" 4 +@@ -1784,20 +1777,6 @@ (define_insn_reservation "c86_4g_m7_avx512_muladd_madd_load" 11 + (eq_attr "memory" "load"))))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_2") + +-(define_insn_reservation "c86_4g_m7_fma_muladd" 4 +- (and (eq_attr "cpu" "c86_4g_m7") +- (and (eq_attr "type" "ssemuladd") +- (and (eq_attr "isa" "fma,fma4") +- (eq_attr "memory" "none")))) +- "c86-4g-m7-direct,c86-4g-m7-fpu_0_1") +- +-(define_insn_reservation "c86_4g_m7_fma_muladd_load" 11 +- (and (eq_attr "cpu" "c86_4g_m7") +- (and (eq_attr "type" "ssemuladd") +- (and (eq_attr "isa" "fma,fma4") +- (eq_attr "memory" "load")))) +- "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_1") +- + ;; SSE + (define_insn_reservation "c86_4g_m7_avx512_sse_range" 1 + (and (eq_attr "cpu" "c86_4g_m7") +@@ -1825,7 +1804,7 @@ (define_insn_reservation "c86_4g_m7_avx512_sse_conflict_x" 2 + (and (eq_attr "c86_decode" "vector") + (and (eq_attr "mode" "TI") + (eq_attr "memory" "none"))))) +- "c86-4g-m7-vector") ++ "c86-4g-m7-vector,c86-4g-m7-fpu_1_3x2") + + (define_insn_reservation "c86_4g_m7_avx512_sse_conflict_x_load" 9 + (and (eq_attr "cpu" "c86_4g_m7") +@@ -1833,7 +1812,7 @@ (define_insn_reservation "c86_4g_m7_avx512_sse_conflict_x_load" 9 + (and (eq_attr "c86_decode" "vector") + (and (eq_attr "mode" "TI") + (eq_attr "memory" "load"))))) +- "c86-4g-m7-vector,c86-4g-m7-load") ++ "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-fpu_1_3x2") + + (define_insn_reservation "c86_4g_m7_avx512_sse_conflict_y" 5 + (and (eq_attr "cpu" "c86_4g_m7") +@@ -1841,7 +1820,7 @@ (define_insn_reservation "c86_4g_m7_avx512_sse_conflict_y" 5 + (and (eq_attr "c86_decode" "vector") + (and (eq_attr "mode" "OI") + (eq_attr "memory" "none"))))) +- "c86-4g-m7-vector") ++ "c86-4g-m7-vector,c86-4g-m7-fpu_1_3x3") + + (define_insn_reservation "c86_4g_m7_avx512_sse_conflict_y_load" 12 + (and (eq_attr "cpu" "c86_4g_m7") +@@ -1849,7 +1828,7 @@ (define_insn_reservation "c86_4g_m7_avx512_sse_conflict_y_load" 12 + (and (eq_attr "c86_decode" "vector") + (and (eq_attr "mode" "OI") + (eq_attr "memory" "load"))))) +- "c86-4g-m7-vector,c86-4g-m7-load") ++ "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-fpu_1_3x3") + + (define_insn_reservation "c86_4g_m7_avx512_sse_conflict_z" 8 + (and (eq_attr "cpu" "c86_4g_m7") +@@ -1857,7 +1836,7 @@ (define_insn_reservation "c86_4g_m7_avx512_sse_conflict_z" 8 + (and (eq_attr "c86_decode" "vector") + (and (eq_attr "mode" "XI") + (eq_attr "memory" "none"))))) +- "c86-4g-m7-vector") ++ "c86-4g-m7-vector,c86-4g-m7-fpu_1_3x6") + + (define_insn_reservation "c86_4g_m7_avx512_sse_conflict_z_load" 15 + (and (eq_attr "cpu" "c86_4g_m7") +@@ -1865,7 +1844,7 @@ (define_insn_reservation "c86_4g_m7_avx512_sse_conflict_z_load" 15 + (and (eq_attr "c86_decode" "vector") + (and (eq_attr "mode" "XI") + (eq_attr "memory" "load"))))) +- "c86-4g-m7-vector,c86-4g-m7-load") ++ "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-fpu_1_3x6") + + (define_insn_reservation "c86_4g_m7_avx512_sse_class" 4 + (and (eq_attr "cpu" "c86_4g_m7") +@@ -1892,7 +1871,7 @@ (define_insn_reservation "c86_4g_m7_avx512_sse_class_z" 4 + (and (eq_attr "length_immediate" "1") + (and (eq_attr "mode" "V32HF,V16SF,V8DF") + (eq_attr "memory" "none")))))) +- "c86-4g-m7-vector") ++ "c86-4g-m7-vector,c86-4g-m7-fpu_1_3,c86-4g-m7-fpu_1_3") + + (define_insn_reservation "c86_4g_m7_avx512_sse_class_z_load" 11 + (and (eq_attr "cpu" "c86_4g_m7") +@@ -1901,7 +1880,7 @@ (define_insn_reservation "c86_4g_m7_avx512_sse_class_z_load" 11 + (and (eq_attr "length_immediate" "1") + (and (eq_attr "mode" "V32HF,V16SF,V8DF") + (eq_attr "memory" "load")))))) +- "c86-4g-m7-vector,c86-4g-m7-load") ++ "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-fpu_1_3,c86-4g-m7-fpu_1_3") + + (define_insn_reservation "c86_4g_m7_avx_sse" 5 + (and (eq_attr "cpu" "c86_4g_m7") +@@ -1919,19 +1898,102 @@ (define_insn_reservation "c86_4g_m7_avx_sse_load" 12 + (eq_attr "memory" "load"))))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_1") + +-(define_insn_reservation "c86_4g_m7_avx512_sse_sqrt" 16 ++;; SSE SQRT ++(define_insn_reservation "c86_4g_m7_avx512_sse_sqrt_sf_x" 14 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sse") +- (and (eq_attr "c86_attr" "sqrt") +- (eq_attr "memory" "none")))) +- "c86-4g-m7-direct,c86-4g-m7-fpu1|c86-4g-m7-fpu3,c86-4g-m7-fdiv*16") ++ (and (eq_attr "mode" "SF,V4SF") ++ (and (eq_attr "c86_attr" "sqrt") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-direct,c86-4g-m7-fp1div1_fp3div3_x4x9") + +-(define_insn_reservation "c86_4g_m7_avx512_sse_sqrt_load" 23 ++(define_insn_reservation "c86_4g_m7_avx512_sse_sqrt_sf_xload" 21 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sse") +- (and (eq_attr "c86_attr" "sqrt") +- (eq_attr "memory" "load")))) +- "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu1|c86-4g-m7-fpu3,c86-4g-m7-fdiv*16") ++ (and (eq_attr "mode" "SF,V4SF") ++ (and (eq_attr "c86_attr" "sqrt") ++ (eq_attr "memory" "load"))))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fp1div1_fp3div3_x4x9") ++ ++(define_insn_reservation "c86_4g_m7_avx512_sse_sqrt_sf_y" 14 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sse") ++ (and (eq_attr "mode" "V8SF") ++ (and (eq_attr "c86_attr" "sqrt") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-direct,c86-4g-m7-fp13div13x4,c86-4g-m7-fdiv13*9") ++ ++(define_insn_reservation "c86_4g_m7_avx512_sse_sqrt_sf_yload" 21 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sse") ++ (and (eq_attr "mode" "V8SF") ++ (and (eq_attr "c86_attr" "sqrt") ++ (eq_attr "memory" "load"))))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fp13div13x4,c86-4g-m7-fdiv13*9") ++ ++(define_insn_reservation "c86_4g_m7_avx512_sse_sqrt_sf_z" 26 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sse") ++ (and (eq_attr "mode" "V16SF") ++ (and (eq_attr "c86_attr" "sqrt") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-direct,c86-4g-m7-fp13div13x4,c86-4g-m7-fdiv13*22") ++ ++(define_insn_reservation "c86_4g_m7_avx512_sse_sqrt_sf_zload" 33 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sse") ++ (and (eq_attr "mode" "V16SF") ++ (and (eq_attr "c86_attr" "sqrt") ++ (eq_attr "memory" "load"))))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fp13div13x4,c86-4g-m7-fdiv13*22") ++ ++(define_insn_reservation "c86_4g_m7_avx512_sse_sqrt_df_x" 20 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sse") ++ (and (eq_attr "mode" "DF,V2DF") ++ (and (eq_attr "c86_attr" "sqrt") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-direct,c86-4g-m7-fp1div1_fp3div3_x4x15") ++ ++(define_insn_reservation "c86_4g_m7_avx512_sse_sqrt_df_xload" 27 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sse") ++ (and (eq_attr "mode" "DF,V2DF") ++ (and (eq_attr "c86_attr" "sqrt") ++ (eq_attr "memory" "load"))))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fp1div1_fp3div3_x4x15") ++ ++(define_insn_reservation "c86_4g_m7_avx512_sse_sqrt_df_y" 20 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sse") ++ (and (eq_attr "mode" "V4DF") ++ (and (eq_attr "c86_attr" "sqrt") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-direct,c86-4g-m7-fp13div13x4,c86-4g-m7-fdiv13*15") ++ ++(define_insn_reservation "c86_4g_m7_avx512_sse_sqrt_df_yload" 27 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sse") ++ (and (eq_attr "mode" "V4DF") ++ (and (eq_attr "c86_attr" "sqrt") ++ (eq_attr "memory" "load"))))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fp13div13x4,c86-4g-m7-fdiv13*15") ++ ++(define_insn_reservation "c86_4g_m7_avx512_sse_sqrt_df_z" 38 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sse") ++ (and (eq_attr "mode" "V8DF") ++ (and (eq_attr "c86_attr" "sqrt") ++ (eq_attr "memory" "none"))))) ++ "c86-4g-m7-direct,c86-4g-m7-fp13div13x4,c86-4g-m7-fdiv13*34") ++ ++(define_insn_reservation "c86_4g_m7_avx512_sse_sqrt_df_zload" 45 ++ (and (eq_attr "cpu" "c86_4g_m7") ++ (and (eq_attr "type" "sse") ++ (and (eq_attr "mode" "V8DF") ++ (and (eq_attr "c86_attr" "sqrt") ++ (eq_attr "memory" "load"))))) ++ "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fp13div13x4,c86-4g-m7-fdiv13*34") + + ;; MSKLOG/MSKMOV + (define_insn_reservation "c86_4g_m7_avx512_msklog" 1 +@@ -1944,7 +2006,7 @@ (define_insn_reservation "c86_4g_m7_avx512_msklog_vector" 4 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "msklog") + (eq_attr "c86_decode" "vector"))) +- "c86-4g-m7-vector") ++ "c86-4g-m7-vector,c86-4g-m7-fpu_1_3") + + (define_insn_reservation "c86_4g_m7_avx512_mskmov_reg_k" 1 + (and (eq_attr "cpu" "c86_4g_m7") +@@ -1964,7 +2026,7 @@ (define_insn_reservation "c86_4g_m7_avx512_mskmov_z_k" 3 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "mskmov") + (match_operand:V8DI 0 "register_operand" "v"))) +- "c86-4g-m7-vector,c86-4g-m7-fpu3*2,c86-4g-m7-fpu1*2|c86-4g-m7-fpu3*2") ++ "c86-4g-m7-vector,c86-4g-m7-fpu3,c86-4g-m7-fpu_1_3") + + (define_insn_reservation "c86_4g_m7_avx512_mskmov_k_k" 1 + (and (eq_attr "cpu" "c86_4g_m7") +@@ -1978,7 +2040,7 @@ (define_insn_reservation "c86_4g_m7_avx512_mskmov_k_reg" 3 + (and (eq_attr "type" "mskmov") + (and (match_operand 0 "register_operand" "k") + (match_operand 1 "register_operand" "r")))) +- "c86-4g-m7-double,c86-4g-m7-fpu1*2,c86-4g-m7-fpu1*2|c86-4g-m7-fpu3*2") ++ "c86-4g-m7-double,c86-4g-m7-fpu1,c86-4g-m7-fpu_1_3") + + (define_insn_reservation "c86_4g_m7_avx512_mskmov_k_m" 8 + (and (eq_attr "cpu" "c86_4g_m7") +diff --git a/gcc/config/i386/c86-4g.md b/gcc/config/i386/c86-4g.md +index 49a46a8aa19..8b81fcaabb2 100644 +--- a/gcc/config/i386/c86-4g.md ++++ b/gcc/config/i386/c86-4g.md +@@ -30,8 +30,10 @@ (define_attr "c86_attr" "other,abs,sqrt,maxmin,blend,blendv,rcp,movnt,avg, + ;; HYGON Scheduling + ;; Modeling automatons for decoders, integer execution pipes, + ;; AGU pipes, floating point execution units, integer and +-;; floating point dividers. +-(define_automaton "c86_4g, c86_4g_ieu, c86_4g_fp, c86_4g_agu, c86_4g_idiv, c86_4g_fdiv") ++;; floating point dividers. Split fp1 into its own automaton ++;; to keep this unit independent without increasing the main ++;; c86_4g_fp state space. ++(define_automaton "c86_4g, c86_4g_ieu, c86_4g_fp024, c86_4g_fp1, c86_4g_agu, c86_4g_idiv, c86_4g_fdiv") + + ;; Decoders unit has 4 decoders and all of them can decode fast path + ;; and vector type instructions. +@@ -40,10 +42,6 @@ (define_cpu_unit "c86-4g-decode1" "c86_4g") + (define_cpu_unit "c86-4g-decode2" "c86_4g") + (define_cpu_unit "c86-4g-decode3" "c86_4g") + +-;; Two separated dividers for int and fp. +-(define_cpu_unit "c86-4g-idiv" "c86_4g_idiv") +-(define_cpu_unit "c86-4g-fdiv" "c86_4g_fdiv") +- + ;; Currently blocking all decoders for vector path instructions as + ;; they are dispatched separetely as microcode sequence. + ;; Fix me: Need to revisit this. +@@ -55,7 +53,6 @@ (define_reservation "c86-4g-direct" "c86-4g-decode0|c86-4g-decode1|c86-4g-decode + ;; Fix me: Need to revisit this later to simulate fast path double behavior. + (define_reservation "c86-4g-double" "c86-4g-direct") + +- + ;; Integer unit 4 ALU pipes. + (define_cpu_unit "c86-4g-ieu0" "c86_4g_ieu") + (define_cpu_unit "c86-4g-ieu1" "c86_4g_ieu") +@@ -63,6 +60,9 @@ (define_cpu_unit "c86-4g-ieu2" "c86_4g_ieu") + (define_cpu_unit "c86-4g-ieu3" "c86_4g_ieu") + (define_reservation "c86-4g-ieu" "c86-4g-ieu0|c86-4g-ieu1|c86-4g-ieu2|c86-4g-ieu3") + ++;; One separated integer divider. ++(define_cpu_unit "c86-4g-idiv" "c86_4g_idiv") ++ + ;; 2 AGU pipes in c86_4g + ;; According to CPU diagram last AGU unit is used only for stores. + (define_cpu_unit "c86-4g-agu0" "c86_4g_agu") +@@ -81,10 +81,10 @@ (define_reservation "c86-4g-ivector" "c86-4g-ieu0+c86-4g-ieu1 + +c86-4g-agu0+c86-4g-agu1") + + ;; Floating point unit 4 FP pipes. +-(define_cpu_unit "c86-4g-fp0" "c86_4g_fp") +-(define_cpu_unit "c86-4g-fp1" "c86_4g_fp") +-(define_cpu_unit "c86-4g-fp2" "c86_4g_fp") +-(define_cpu_unit "c86-4g-fp3" "c86_4g_fp") ++(define_cpu_unit "c86-4g-fp0" "c86_4g_fp024") ++(define_cpu_unit "c86-4g-fp1" "c86_4g_fp1") ++(define_cpu_unit "c86-4g-fp2" "c86_4g_fp024") ++(define_cpu_unit "c86-4g-fp3" "c86_4g_fp024") + + (define_reservation "c86-4g-fpu" "c86-4g-fp0|c86-4g-fp1|c86-4g-fp2|c86-4g-fp3") + +@@ -92,6 +92,11 @@ (define_reservation "c86-4g-fvector" "c86-4g-fp0+c86-4g-fp1 + +c86-4g-fp2+c86-4g-fp3 + +c86-4g-agu0+c86-4g-agu1") + ++;; One separated FP divider. ++(define_cpu_unit "c86-4g-fdiv" "c86_4g_fdiv") ++ ++(define_reservation "c86-4g-fp1fdivx4" "(c86-4g-fp1+c86-4g-fdiv)*4") ++ + ;; Call instruction + (define_insn_reservation "c86_4g_call" 1 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") +@@ -387,7 +392,7 @@ (define_insn_reservation "c86_4g_fp_sqrt" 22 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "fpspc") + (eq_attr "c86_attr" "sqrt"))) +- "c86-4g-direct,c86-4g-fp1,c86-4g-fdiv*22") ++ "c86-4g-direct,c86-4g-fp1fdivx4,c86-4g-fdiv*18") + + (define_insn_reservation "c86_4g_sse_sqrt_sf" 14 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") +@@ -395,7 +400,7 @@ (define_insn_reservation "c86_4g_sse_sqrt_sf" 14 + (and (eq_attr "memory" "none,unknown") + (and (eq_attr "c86_attr" "sqrt") + (eq_attr "type" "sse"))))) +- "c86-4g-direct,c86-4g-fp1,c86-4g-fdiv*14") ++ "c86-4g-direct,c86-4g-fp1fdivx4,c86-4g-fdiv*10") + + (define_insn_reservation "c86_4g_sse_sqrt_sf_mem" 21 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") +@@ -403,7 +408,7 @@ (define_insn_reservation "c86_4g_sse_sqrt_sf_mem" 21 + (and (eq_attr "memory" "load") + (and (eq_attr "c86_attr" "sqrt") + (eq_attr "type" "sse"))))) +- "c86-4g-direct,c86-4g-load,c86-4g-fp1,c86-4g-fdiv*14") ++ "c86-4g-direct,c86-4g-load,c86-4g-fp1fdivx4,c86-4g-fdiv*10") + + (define_insn_reservation "c86_4g_sse_sqrt_df" 20 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") +@@ -411,7 +416,7 @@ (define_insn_reservation "c86_4g_sse_sqrt_df" 20 + (and (eq_attr "memory" "none,unknown") + (and (eq_attr "c86_attr" "sqrt") + (eq_attr "type" "sse"))))) +- "c86-4g-direct,c86-4g-fp1,c86-4g-fdiv*20") ++ "c86-4g-direct,c86-4g-fp1fdivx4,c86-4g-fdiv*16") + + (define_insn_reservation "c86_4g_sse_sqrt_df_mem" 27 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") +@@ -419,7 +424,7 @@ (define_insn_reservation "c86_4g_sse_sqrt_df_mem" 27 + (and (eq_attr "memory" "load") + (and (eq_attr "c86_attr" "sqrt") + (eq_attr "type" "sse"))))) +- "c86-4g-direct,c86-4g-load,c86-4g-fp1,c86-4g-fdiv*20") ++ "c86-4g-direct,c86-4g-load,c86-4g-fp1fdivx4,c86-4g-fdiv*16") + + ;; RCP + (define_insn_reservation "c86_4g_sse_rcp" 5 +@@ -492,20 +497,20 @@ (define_insn_reservation "c86_4g_fp_op_div" 15 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "fdiv") + (eq_attr "memory" "none"))) +- "c86-4g-direct,c86-4g-fp1,c86-4g-fdiv*15") ++ "c86-4g-direct,c86-4g-fp1fdivx4,c86-4g-fdiv*11") + + (define_insn_reservation "c86_4g_fp_op_div_load" 22 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "fdiv") + (eq_attr "memory" "load"))) +- "c86-4g-direct,c86-4g-load,c86-4g-fp1,c86-4g-fdiv*15") ++ "c86-4g-direct,c86-4g-load,c86-4g-fp1fdivx4,c86-4g-fdiv*11") + +-(define_insn_reservation "c86_4g_fp_op_idiv_load" 27 ++(define_insn_reservation "c86_4g_fp_op_idiv_load" 26 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "fdiv") + (and (eq_attr "fp_int_src" "true") + (eq_attr "memory" "load")))) +- "c86-4g-double,c86-4g-load,c86-4g-fp1,c86-4g-fdiv*19") ++ "c86-4g-double,c86-4g-load,c86-4g-fp1*4,c86-4g-fp1fdivx4,c86-4g-fdiv*11") + + ;; MMX, SSE, SSEn.n, AVX, AVX2 instructions + (define_insn_reservation "c86_4g_fp_insn" 1 +@@ -1024,28 +1029,28 @@ (define_insn_reservation "c86_4g_ssediv_ss_ps" 10 + (eq_attr "mode" "V4SF,SF")) + (and (eq_attr "type" "ssediv") + (eq_attr "memory" "none"))) +- "c86-4g-direct,c86-4g-fp1,c86-4g-fdiv*10") ++ "c86-4g-direct,c86-4g-fp1fdivx4,c86-4g-fdiv*6") + + (define_insn_reservation "c86_4g_ssediv_ss_ps_load" 17 + (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "mode" "V4SF,SF")) + (and (eq_attr "type" "ssediv") + (eq_attr "memory" "load"))) +- "c86-4g-direct,c86-4g-load,c86-4g-fp1,c86-4g-fdiv*10") ++ "c86-4g-direct,c86-4g-load,c86-4g-fp1fdivx4,c86-4g-fdiv*6") + + (define_insn_reservation "c86_4g_ssediv_sd_pd" 13 + (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "mode" "V2DF,DF")) + (and (eq_attr "type" "ssediv") + (eq_attr "memory" "none"))) +- "c86-4g-direct,c86-4g-fp1,c86-4g-fdiv*13") ++ "c86-4g-direct,c86-4g-fp1fdivx4,c86-4g-fdiv*9") + + (define_insn_reservation "c86_4g_ssediv_sd_pd_load" 20 + (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "mode" "V2DF,DF")) + (and (eq_attr "type" "ssediv") + (eq_attr "memory" "load"))) +- "c86-4g-direct,c86-4g-load,c86-4g-fp1,c86-4g-fdiv*13") ++ "c86-4g-direct,c86-4g-load,c86-4g-fp1fdivx4,c86-4g-fdiv*9") + + + (define_insn_reservation "c86_4g_ssediv_avx256_ps" 10 +@@ -1053,28 +1058,28 @@ (define_insn_reservation "c86_4g_ssediv_avx256_ps" 10 + (and (eq_attr "mode" "V8SF") + (and (eq_attr "memory" "none") + (eq_attr "type" "ssediv")))) +- "c86-4g-double,c86-4g-fp1,c86-4g-fdiv*10") ++ "c86-4g-double,c86-4g-fp1fdivx4,c86-4g-fdiv*6") + + (define_insn_reservation "c86_4g_ssediv_avx256_ps_load" 17 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "mode" "V8SF") + (and (eq_attr "type" "ssediv") + (eq_attr "memory" "load")))) +- "c86-4g-double,c86-4g-load,c86-4g-fp1,c86-4g-fdiv*10") ++ "c86-4g-double,c86-4g-load,c86-4g-fp1fdivx4,c86-4g-fdiv*6") + + (define_insn_reservation "c86_4g_ssediv_avx256_pd" 13 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "mode" "V4DF") + (and (eq_attr "type" "ssediv") + (eq_attr "memory" "none")))) +- "c86-4g-double,c86-4g-fp1,c86-4g-fdiv*13") ++ "c86-4g-double,c86-4g-fp1fdivx4,c86-4g-fdiv*9") + + (define_insn_reservation "c86_4g_ssediv_avx256_pd_load" 20 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "mode" "V4DF") + (and (eq_attr "type" "ssediv") + (eq_attr "memory" "load")))) +- "c86-4g-double,c86-4g-load,c86-4g-fp1,c86-4g-fdiv*13") ++ "c86-4g-double,c86-4g-load,c86-4g-fp1fdivx4,c86-4g-fdiv*9") + ;; SSE MUL + (define_insn_reservation "c86_4g_ssemul_ss_ps" 3 + (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") +-- +2.34.1 + diff --git a/gcc.spec b/gcc.spec index 7dca0a6..2190779 100644 --- a/gcc.spec +++ b/gcc.spec @@ -1,6 +1,6 @@ %global gcc_version 12.3.1.8 %global gcc_major 12 -%global gcc_release 2 +%global gcc_release 3 %global isl_version 0.18 %global tgcc_summary Tencent Compiler %global _unpackaged_files_terminate_build 0 @@ -151,6 +151,9 @@ Patch3008: gcc12-d-shared-libphobos.patch # tgcc 12.3.1.3 backports a common modify from high version upstream gcc, revert it temply in x64 and arm64 Patch3010: Revert-tree-optimization-110702-avoid-zero-based-memory-ref.patch Patch3011: ZHAOXIN-0001-Backport-zhaoxin-lujiazui-yongfeng-shijidadao-enable.patch +Patch3012: HYGON-0001-i386-Support-HYGON-c86-4g-series-processors.patch +Patch3013: HYGON-0002-i386-Adjust-some-c86-4g-.md-modeling-to-reduce-build.patch +Patch3014: HYGON-0003-i386-Refine-c86-4g-fdiv-scheduling-model.patch BuildRequires: binutils >= 2.31, elfutils-devel >= 0.147, elfutils-libelf-devel >= 0.147, sharutils, gcc, gcc-c++, make BuildRequires: glibc-static, glibc-devel >= 2.4.90-13, gdb @@ -880,6 +883,9 @@ for cross toolchains %endif %patch 3011 -p1 +%patch 3012 -p1 +%patch 3013 -p1 +%patch 3014 -p1 rm -f libphobos/testsuite/libphobos.gc/forkgc2.d @@ -2435,6 +2441,11 @@ end %changelog +* Wed Jun 03 2026 Kewen Lin - 12.3.1.8-3 +- [Type] sync +- [DESC] [X86] Support HYGON C86-4G series processors +- backport HYGON C86-4G support from fsf gcc upstream + * Fri May 22 2026 Tim Hu - 12.3.1.8-2 - [Type] sync - [DESC] [X86] zhaoxin patch -- Gitee