diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h index f2b4b9593b742d2b93b55ef5d76360c0b632b962..9ec5d9f475091452873066d846d1ff27e1e9bb7e 100644 --- a/gcc/common/config/i386/cpuinfo.h +++ b/gcc/common/config/i386/cpuinfo.h @@ -315,6 +315,48 @@ get_amd_cpu (struct __processor_model *cpu_model, return cpu; } +/* Get the specific type of HYGON CPU and return HYGON CPU name. Return + NULL for unknown HYGON CPU. */ + +static inline const char * +get_hygon_cpu (struct __processor_model *cpu_model, + struct __processor_model2 *cpu_model2, + unsigned int *cpu_features2 __attribute__((unused))) +{ + const char *cpu = NULL; + unsigned int family = cpu_model2->__cpu_family; + unsigned int model = cpu_model2->__cpu_model; + + switch (family) + { + case 0x18: + cpu_model->__cpu_type = HYGONFAM18H; + if (model == 0x4) + { + cpu = "c86-4g-m4"; + CHECK___builtin_cpu_is ("c86-4g-m4"); + cpu_model->__cpu_subtype = HYGONFAM18H_C86_4G_M4; + } + else if (model == 0x6) + { + cpu = "c86-4g-m6"; + CHECK___builtin_cpu_is ("c86-4g-m6"); + cpu_model->__cpu_subtype = HYGONFAM18H_C86_4G_M6; + } + else if (model == 0x7) + { + cpu = "c86-4g-m7"; + CHECK___builtin_cpu_is ("c86-4g-m7"); + cpu_model->__cpu_subtype = HYGONFAM18H_C86_4G_M7; + } + break; + default: + break; + } + + return cpu; +} + /* Get the specific type of Intel CPU and return Intel CPU name. Return NULL for unknown Intel CPU. */ @@ -1042,6 +1084,21 @@ cpu_indicator_init (struct __processor_model *cpu_model, cpu_model->__cpu_vendor = VENDOR_CYRIX; else if (vendor == signature_NSC_ebx) cpu_model->__cpu_vendor = VENDOR_NSC; + else if (vendor == signature_HYGON_ebx) + { + /* Adjust model and family for HYGON CPUS. */ + if (family == 0x0f) + { + family += extended_family; + model += extended_model; + } + cpu_model2->__cpu_family = family; + cpu_model2->__cpu_model = model; + + /* Get CPU type. */ + get_hygon_cpu (cpu_model, cpu_model2, cpu_features2); + cpu_model->__cpu_vendor = VENDOR_HYGON; + } else cpu_model->__cpu_vendor = VENDOR_OTHER; diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc index dff43c9c7860d4bd58a334ffa4c08af39d7f6ba0..e2877e0d5479cd1d3692f4b454dca083c73128ca 100644 --- a/gcc/common/config/i386/i386-common.cc +++ b/gcc/common/config/i386/i386-common.cc @@ -1834,7 +1834,10 @@ const char *const processor_names[] = "znver1", "znver2", "znver3", - "znver4" + "znver4", + "c86-4g-m4", + "c86-4g-m6", + "c86-4g-m7" }; /* Guarantee that the array is aligned with enum processor_type. */ @@ -2091,6 +2094,15 @@ const pta processor_alias_table[] = | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT, M_CPU_TYPE (AMD_BTVER2), P_PROC_BMI}, + {"c86-4g-m4", PROCESSOR_C86_4G_M4, CPU_C86_4G_M4, + PTA_C86_4G_M4, + M_CPU_SUBTYPE (HYGONFAM18H_C86_4G_M4), P_PROC_AVX2}, + {"c86-4g-m6", PROCESSOR_C86_4G_M6, CPU_C86_4G_M6, + PTA_C86_4G_M6, + M_CPU_SUBTYPE (HYGONFAM18H_C86_4G_M6), P_PROC_AVX2}, + {"c86-4g-m7", PROCESSOR_C86_4G_M7, CPU_C86_4G_M7, + PTA_C86_4G_M7, + M_CPU_SUBTYPE (HYGONFAM18H_C86_4G_M7), P_PROC_AVX512F}, {"generic", PROCESSOR_GENERIC, CPU_GENERIC, PTA_64BIT @@ -2111,10 +2123,14 @@ const pta processor_alias_table[] = M_CPU_TYPE (AMDFAM10H_SHANGHAI), P_NONE}, {"istanbul", PROCESSOR_GENERIC, CPU_GENERIC, 0, M_CPU_TYPE (AMDFAM10H_ISTANBUL), P_NONE}, + {"hygon", PROCESSOR_GENERIC, CPU_GENERIC, 0, + M_VENDOR (VENDOR_HYGON), P_NONE}, + {"hygonfam18h", PROCESSOR_GENERIC, CPU_GENERIC, 0, + M_CPU_TYPE (HYGONFAM18H), P_NONE}, }; /* NB: processor_alias_table stops at the "generic" entry. */ -unsigned int const pta_size = ARRAY_SIZE (processor_alias_table) - 7; +unsigned int const pta_size = ARRAY_SIZE (processor_alias_table) - 9; unsigned int const num_arch_names = ARRAY_SIZE (processor_alias_table); /* Provide valid option values for -march and -mtune options. */ diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h index cd1fa29b3a20957aee6e23299912cd8cf8a51f33..99254b72e1b4e1553788ecd90bf078dbebde6209 100644 --- a/gcc/common/config/i386/i386-cpuinfo.h +++ b/gcc/common/config/i386/i386-cpuinfo.h @@ -30,6 +30,7 @@ enum processor_vendor VENDOR_INTEL = 1, VENDOR_AMD, VENDOR_ZHAOXIN, + VENDOR_HYGON, VENDOR_OTHER, VENDOR_CENTAUR, VENDOR_CYRIX, @@ -60,6 +61,7 @@ enum processor_types INTEL_TREMONT, AMDFAM19H, ZHAOXIN_FAM7H, + HYGONFAM18H, CPU_TYPE_MAX, BUILTIN_CPU_TYPE_MAX = CPU_TYPE_MAX }; @@ -97,6 +99,9 @@ enum processor_subtypes AMDFAM19H_ZNVER4, ZHAOXIN_FAM7H_YONGFENG, ZHAOXIN_FAM7H_SHIJIDADAO, + HYGONFAM18H_C86_4G_M4, + HYGONFAM18H_C86_4G_M6, + HYGONFAM18H_C86_4G_M7, CPU_SUBTYPE_MAX }; diff --git a/gcc/config.gcc b/gcc/config.gcc index e0e6e6f9a5a5da90c1cdd040082dbf0c6f10c1c0..35641a74d6807b27b6387f1c054ad50a59a45cd1 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -672,7 +672,7 @@ silvermont knl knm skylake-avx512 cannonlake icelake-client icelake-server \ skylake goldmont goldmont-plus tremont cascadelake tigerlake cooperlake \ sapphirerapids alderlake rocketlake eden-x2 nano nano-1000 nano-2000 nano-3000 \ nano-x2 eden-x4 nano-x4 x86-64 x86-64-v2 x86-64-v3 x86-64-v4 lujiazui yongfeng \ -shijidadao native" +shijidadao c86-4g-m4 c86-4g-m6 c86-4g-m7 native" # Additional x86 processors supported by --with-cpu=. Each processor # MUST be separated by exactly one space. @@ -3877,6 +3877,18 @@ case ${target} in cpu=pentiumpro arch_without_sse2=yes ;; + c86_4g_m4-*) + arch=c86-4g-m4 + cpu=c86-4g-m4 + ;; + c86_4g_m6-*) + arch=c86-4g-m6 + cpu=c86-4g-m6 + ;; + c86_4g_m7-*) + arch=c86-4g-m7 + cpu=c86-4g-m7 + ;; *) arch=pentiumpro cpu=generic @@ -3971,6 +3983,18 @@ case ${target} in arch=corei7 cpu=corei7 ;; + c86_4g_m4-*) + arch=c86-4g-m4 + cpu=c86-4g-m4 + ;; + c86_4g_m6-*) + arch=c86-4g-m6 + cpu=c86-4g-m6 + ;; + c86_4g_m7-*) + arch=c86-4g-m7 + cpu=c86-4g-m7 + ;; *) arch=x86-64 cpu=generic diff --git a/gcc/config/i386/c86-4g-m7.md b/gcc/config/i386/c86-4g-m7.md new file mode 100644 index 0000000000000000000000000000000000000000..0fd8422422b73d9b7cbd792491301672150a32a6 --- /dev/null +++ b/gcc/config/i386/c86-4g-m7.md @@ -0,0 +1,2050 @@ +;; Copyright (C) 2026 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . +;; + +;; HYGON c86-4g-m7 Scheduling +;; Modeling automatons for decoders, integer execution pipes, +;; AGU pipes, branch, floating point execution, fp store units, +;; integer and floating point dividers. Split fpu1 and fpu3 +;; into their own automata to keep these units independent +;; without increasing the main c86_4g_m7_fpu state space. +(define_automaton "c86_4g_m7, c86_4g_m7_ieu, c86_4g_m7_agu, c86_4g_m7_fpu02, c86_4g_m7_fpu13, c86_4g_m7_idiv, c86_4g_m7_fdiv") + +;; Decoders unit has 4 decoders and all of them can decode fast path +;; and vector type instructions. +(define_cpu_unit "c86-4g-m7-decode0" "c86_4g_m7") +(define_cpu_unit "c86-4g-m7-decode1" "c86_4g_m7") +(define_cpu_unit "c86-4g-m7-decode2" "c86_4g_m7") +(define_cpu_unit "c86-4g-m7-decode3" "c86_4g_m7") + +;; Currently blocking all decoders for vector path instructions as +;; they are dispatched separetely as microcode sequence. +(define_reservation "c86-4g-m7-vector" "c86-4g-m7-decode0+c86-4g-m7-decode1+c86-4g-m7-decode2+c86-4g-m7-decode3") + +;; Direct instructions can be issued to any of the four decoders. +(define_reservation "c86-4g-m7-direct" "c86-4g-m7-decode0|c86-4g-m7-decode1|c86-4g-m7-decode2|c86-4g-m7-decode3") + +;; Fix me: Need to revisit this later to simulate fast path double behavior. +(define_reservation "c86-4g-m7-double" "c86-4g-m7-direct") + +;; Integer unit 4 ALU pipes. +(define_cpu_unit "c86-4g-m7-ieu0" "c86_4g_m7_ieu") +(define_cpu_unit "c86-4g-m7-ieu1" "c86_4g_m7_ieu") +(define_cpu_unit "c86-4g-m7-ieu2" "c86_4g_m7_ieu") +(define_cpu_unit "c86-4g-m7-ieu3" "c86_4g_m7_ieu") + +;; One separated integer divider. +(define_cpu_unit "c86-4g-m7-idiv" "c86_4g_m7_idiv") + +;; c86-4g-m7 has an additional branch unit. +(define_cpu_unit "c86-4g-m7-bru0" "c86_4g_m7_ieu") +(define_reservation "c86-4g-m7-ieu" "c86-4g-m7-ieu0|c86-4g-m7-ieu1|c86-4g-m7-ieu2|c86-4g-m7-ieu3") + +;; 3 AGU pipes in c86-4g-m7 +(define_cpu_unit "c86-4g-m7-agu0" "c86_4g_m7_agu") +(define_cpu_unit "c86-4g-m7-agu1" "c86_4g_m7_agu") +(define_cpu_unit "c86-4g-m7-agu2" "c86_4g_m7_agu") +(define_reservation "c86-4g-m7-agu-reserve" "c86-4g-m7-agu0|c86-4g-m7-agu1|c86-4g-m7-agu2") + +;; Load is 4 cycles. We do not model reservation of load unit. +(define_reservation "c86-4g-m7-load" "c86-4g-m7-agu-reserve") +(define_reservation "c86-4g-m7-store" "c86-4g-m7-agu-reserve") + +;; vectorpath (microcoded) instructions are single issue instructions. +;; So, they occupy all the integer units. +(define_reservation "c86-4g-m7-ivector" "c86-4g-m7-ieu0+c86-4g-m7-ieu1 + +c86-4g-m7-ieu2+c86-4g-m7-ieu3+c86-4g-m7-bru0 + +c86-4g-m7-agu0+c86-4g-m7-agu1+c86-4g-m7-agu2") + +;; Floating point unit 4 FP pipes. +(define_cpu_unit "c86-4g-m7-fpu0" "c86_4g_m7_fpu02") +(define_cpu_unit "c86-4g-m7-fpu1" "c86_4g_m7_fpu13") +(define_cpu_unit "c86-4g-m7-fpu2" "c86_4g_m7_fpu02") +(define_cpu_unit "c86-4g-m7-fpu3" "c86_4g_m7_fpu13") + +(define_reservation "c86-4g-m7-fpu" "c86-4g-m7-fpu0|c86-4g-m7-fpu1|c86-4g-m7-fpu2|c86-4g-m7-fpu3") +(define_reservation "c86-4g-m7-fpu_0_1" "c86-4g-m7-fpu0|c86-4g-m7-fpu1") +(define_reservation "c86-4g-m7-fpu_0_2" "c86-4g-m7-fpu0|c86-4g-m7-fpu2") +(define_reservation "c86-4g-m7-fpu_0_2x2" "c86-4g-m7-fpu0*2|c86-4g-m7-fpu2*2") +(define_reservation "c86-4g-m7-fpu_0_2x4" "c86-4g-m7-fpu0*4|c86-4g-m7-fpu2*4") +(define_reservation "c86-4g-m7-fpu_0_3" "c86-4g-m7-fpu0|c86-4g-m7-fpu3") +(define_reservation "c86-4g-m7-fpu_1_3" "c86-4g-m7-fpu1|c86-4g-m7-fpu3") +(define_reservation "c86-4g-m7-fpu_1_3x2" "c86-4g-m7-fpu1*2|c86-4g-m7-fpu3*2") +(define_reservation "c86-4g-m7-fpu_1_3x3" "c86-4g-m7-fpu1*3|c86-4g-m7-fpu3*3") +(define_reservation "c86-4g-m7-fpu_1_3x6" "c86-4g-m7-fpu1*6|c86-4g-m7-fpu3*6") +(define_reservation "c86-4g-m7-fpux2" "c86-4g-m7-fpu0*2|c86-4g-m7-fpu1*2|c86-4g-m7-fpu2*2|c86-4g-m7-fpu3*2") +(define_reservation "c86-4g-m7-fpux4" "c86-4g-m7-fpu0*4|c86-4g-m7-fpu1*4|c86-4g-m7-fpu2*4|c86-4g-m7-fpu3*4") +(define_reservation "c86-4g-m7-fpux8" "c86-4g-m7-fpu0*8|c86-4g-m7-fpu1*8|c86-4g-m7-fpu2*8|c86-4g-m7-fpu3*8") +(define_reservation "c86-4g-m7-fpux6" "c86-4g-m7-fpu0*6|c86-4g-m7-fpu1*6|c86-4g-m7-fpu2*6|c86-4g-m7-fpu3*6") +(define_reservation "c86-4g-m7-fpux16" "c86-4g-m7-fpu0*16|c86-4g-m7-fpu1*16|c86-4g-m7-fpu2*16|c86-4g-m7-fpu3*16") +(define_reservation "c86-4g-m7-fvector" "c86-4g-m7-fpu0+c86-4g-m7-fpu1 + +c86-4g-m7-fpu2+c86-4g-m7-fpu3 + +c86-4g-m7-agu0+c86-4g-m7-agu1+c86-4g-m7-agu2") + +;; Two FP dividers. +(define_cpu_unit "c86-4g-m7-fdiv1" "c86_4g_m7_fdiv") +(define_cpu_unit "c86-4g-m7-fdiv3" "c86_4g_m7_fdiv") + +(define_reservation "c86-4g-m7-fp1fdiv1x4" "(c86-4g-m7-fpu1+c86-4g-m7-fdiv1)*4") +(define_reservation "c86-4g-m7-fp3fdiv3x4" "(c86-4g-m7-fpu3+c86-4g-m7-fdiv3)*4") +(define_reservation "c86-4g-m7-fdiv13" "(c86-4g-m7-fdiv1+c86-4g-m7-fdiv3)") +(define_reservation "c86-4g-m7-fp13div13" "(c86-4g-m7-fpu1+c86-4g-m7-fpu3+c86-4g-m7-fdiv1+c86-4g-m7-fdiv3)") +(define_reservation "c86-4g-m7-fp13div13x4" "c86-4g-m7-fp13div13*4") +(define_reservation "c86-4g-m7-fp1div1_fp3div3_x4x8" "(c86-4g-m7-fp1fdiv1x4,c86-4g-m7-fdiv1*8)|(c86-4g-m7-fp3fdiv3x4,c86-4g-m7-fdiv3*8)") +(define_reservation "c86-4g-m7-fp1div1_fp3div3_x4x9" "(c86-4g-m7-fp1fdiv1x4,c86-4g-m7-fdiv1*9)|(c86-4g-m7-fp3fdiv3x4,c86-4g-m7-fdiv3*9)") +(define_reservation "c86-4g-m7-fp1div1_fp3div3_x4x11" "(c86-4g-m7-fp1fdiv1x4,c86-4g-m7-fdiv1*11)|(c86-4g-m7-fp3fdiv3x4,c86-4g-m7-fdiv3*11)") +(define_reservation "c86-4g-m7-fp1div1_fp3div3_x4x15" "(c86-4g-m7-fp1fdiv1x4,c86-4g-m7-fdiv1*15)|(c86-4g-m7-fp3fdiv3x4,c86-4g-m7-fdiv3*15)") +(define_reservation "c86-4g-m7-fp1div1_fp3div3_x4x18" "(c86-4g-m7-fp1fdiv1x4,c86-4g-m7-fdiv1*18)|(c86-4g-m7-fp3fdiv3x4,c86-4g-m7-fdiv3*18)") + +;; IMOV/IMOVX +(define_insn_reservation "c86_4g_m7_imov_xchg" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "imov") + (and (eq_attr "c86_decode" "vector") + (eq_attr "memory" "none")))) + "c86-4g-m7-direct") + +(define_insn_reservation "c86_4g_m7_imov_xchg_load" 5 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "imov") + (and (eq_attr "c86_decode" "vector") + (eq_attr "memory" "!none")))) + "c86-4g-m7-direct,c86-4g-m7-load") + +(define_insn_reservation "c86_4g_m7_imovx_cwde" 2 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "imovx") + (and (eq_attr "c86_decode" "double") + (eq_attr "memory" "none")))) + "c86-4g-m7-direct,c86-4g-m7-ieu") + +(define_insn_reservation "c86_4g_m7_imov" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "imov,imovx") + (and (eq_attr "c86_decode" "direct") + (eq_attr "memory" "none")))) + "c86-4g-m7-direct,c86-4g-m7-ieu") + +(define_insn_reservation "c86_4g_m7_imov_load" 5 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "imov,imovx") + (and (eq_attr "c86_decode" "!vector") + (eq_attr "memory" "load")))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-ieu") + +(define_insn_reservation "c86_4g_m7_imov_store" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "imov,imovx") + (and (eq_attr "c86_decode" "!vector") + (eq_attr "memory" "store")))) + "c86-4g-m7-direct,c86-4g-m7-store,c86-4g-m7-ieu") + +;; PUSH +(define_insn_reservation "c86_4g_m7_push" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "push,sse") + (eq_attr "memory" "store"))) + "c86-4g-m7-direct,c86-4g-m7-store") + +(define_insn_reservation "c86_4g_m7_push_mem" 5 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "push") + (eq_attr "memory" "both"))) + "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-store") + +;; POP +(define_insn_reservation "c86_4g_m7_pop" 4 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "pop") + (eq_attr "memory" "load"))) + "c86-4g-m7-direct,c86-4g-m7-load") + +(define_insn_reservation "c86_4g_m7_pop_mem" 5 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "pop") + (eq_attr "memory" "both"))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-store") + +;; IMUL/IMULX +(define_insn_reservation "c86_4g_m7_imul" 3 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "imul,imulx") + (eq_attr "memory" "none"))) + "c86-4g-m7-direct,c86-4g-m7-ieu1") + +(define_insn_reservation "c86_4g_m7_imul_load" 7 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "imul") + (eq_attr "memory" "!none"))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-ieu1") + +;; IDIV +(define_insn_reservation "c86_4g_m7_idiv" 7 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "!QI") + (eq_attr "memory" "none")))) + "c86-4g-m7-double,c86-4g-m7-ieu3,c86-4g-m7-idiv*7") + +(define_insn_reservation "c86_4g_m7_idiv_QI" 6 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "QI") + (eq_attr "memory" "none")))) + "c86-4g-m7-double,c86-4g-m7-ieu3,c86-4g-m7-idiv*6") + +(define_insn_reservation "c86_4g_m7_idiv_load" 11 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "!QI") + (eq_attr "memory" "load")))) + "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-ieu3,c86-4g-m7-idiv*7") + +(define_insn_reservation "c86_4g_m7_idiv_QI_load" 10 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "QI") + (eq_attr "memory" "load")))) + "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-ieu3,c86-4g-m7-idiv*6") + +;; Integer/genaral Instructions +(define_insn_reservation "c86_4g_m7_insn" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "alu,negnot,rotate1,ishift1,test,incdec,icmp, + rotate,rotatex,ishift,ishiftx,icmov") + (eq_attr "memory" "none,unknown"))) + "c86-4g-m7-direct,c86-4g-m7-ieu") + +(define_insn_reservation "c86_4g_m7_insn_load" 5 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "alu,incdec,icmp,test,ishift, + ishiftx,icmov,rotate,rotatex") + (eq_attr "memory" "load"))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-ieu") + +(define_insn_reservation "c86_4g_m7_insn_store" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ishift1,rotate1,rotate,incdec, + alu,icmov,ishift,negnot,alu1") + (eq_attr "memory" "store"))) + "c86-4g-m7-direct,c86-4g-m7-ieu,c86-4g-m7-store") + +(define_insn_reservation "c86_4g_m7_insn2_store" 5 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "icmp") + (eq_attr "memory" "store"))) + "c86-4g-m7-direct,c86-4g-m7-ieu,c86-4g-m7-store") + +(define_insn_reservation "c86_4g_m7_insn_both" 5 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "alu,negnot,rotate1,ishift1,incdec,rotate, + rotatex,ishift,ishiftx,icmov") + (eq_attr "memory" "both"))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-ieu,c86-4g-m7-store") + +(define_insn_reservation "c86_4g_m7_setcc" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "setcc") + (eq_attr "memory" "none,unknown"))) + "c86-4g-m7-direct,c86-4g-m7-ieu0|c86-4g-m7-ieu3") + +(define_insn_reservation "c86_4g_m7_setcc_load" 5 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "setcc") + (eq_attr "memory" "load"))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-ieu0|c86-4g-m7-ieu3") + +(define_insn_reservation "c86_4g_m7_setcc_store" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "setcc") + (eq_attr "memory" "store"))) + "c86-4g-m7-direct,c86-4g-m7-store,c86-4g-m7-ieu0|c86-4g-m7-ieu3") + +;; ALU1 +(define_insn_reservation "c86_4g_m7_alu1_double" 2 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "alu1") + (and (eq_attr "c86_decode" "double") + (eq_attr "memory" "none,unknown")))) + "c86-4g-m7-double,c86-4g-m7-ieu") + +(define_insn_reservation "c86_4g_m7_alu1_double_load" 6 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "alu1") + (and (eq_attr "c86_decode" "double") + (eq_attr "memory" "both")))) + "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-store,c86-4g-m7-ieu") + +(define_insn_reservation "c86_4g_m7_alu1_vector" 3 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "alu1") + (and (eq_attr "c86_decode" "vector") + (eq_attr "memory" "none,unknown")))) + "c86-4g-m7-vector,c86-4g-m7-ivector*3") + +(define_insn_reservation "c86_4g_m7_alu1_vector_load" 7 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "alu1") + (and (eq_attr "c86_decode" "vector") + (eq_attr "memory" "both")))) + "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-store,c86-4g-m7-ivector*3") + +(define_insn_reservation "c86_4g_m7_alu1_direct" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "alu1") + (and (eq_attr "c86_decode" "direct") + (eq_attr "memory" "none,unknown")))) + "c86-4g-m7-direct,c86-4g-m7-ieu") + +(define_insn_reservation "c86_4g_m7_alu1_direct_load" 5 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "alu1") + (and (eq_attr "c86_decode" "direct") + (eq_attr "memory" "both")))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-store,c86-4g-m7-ieu") + +;; CALL/CALLV +(define_insn_reservation "c86_4g_m7_call" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (eq_attr "type" "call,callv")) + "c86-4g-m7-double,c86-4g-m7-ieu0|c86-4g-m7-bru0,c86-4g-m7-store") + +;; IBR +(define_insn_reservation "c86_4g_m7_branch" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ibr") + (eq_attr "memory" "none"))) + "c86-4g-m7-direct,c86-4g-m7-ieu0|c86-4g-m7-bru0") + +(define_insn_reservation "c86_4g_m7_branch_load" 5 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ibr") + (eq_attr "memory" "load"))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-ieu0|c86-4g-m7-bru0") + +;; LEA +(define_insn_reservation "c86_4g_m7_lea" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (eq_attr "type" "lea")) + "c86-4g-m7-direct,c86-4g-m7-ieu") + +;; LEAVE +(define_insn_reservation "c86_4g_m7_leave" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (eq_attr "type" "leave")) + "c86-4g-m7-double,c86-4g-m7-ieu,c86-4g-m7-store") + +;; STR +(define_insn_reservation "c86_4g_m7_str" 3 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "str") + (eq_attr "memory" "none"))) + "c86-4g-m7-vector,c86-4g-m7-ivector*3") + +(define_insn_reservation "c86_4g_m7_str_load" 7 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "str") + (eq_attr "memory" "load"))) + "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-ivector*3") + + +(define_insn_reservation "c86_4g_m7_ieu_vector" 5 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "other,multi") + (and (eq_attr "unit" "!i387") + (eq_attr "memory" "none,unknown")))) + "c86-4g-m7-vector,c86-4g-m7-ivector*5") + +(define_insn_reservation "c86_4g_m7_ieu_vector_load" 9 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "other,multi") + (and (eq_attr "unit" "!i387") + (eq_attr "memory" "load")))) + "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-ivector*5") + +;; SSEINS +(define_insn_reservation "c86_4g_m7_sse_insertimm" 3 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sseins") + (and (eq_attr "memory" "none") + (eq_attr "length_immediate" "2")))) + "c86-4g-m7-double,c86-4g-m7-fpu_0_3,c86-4g-m7-fpu1") + +(define_insn_reservation "c86_4g_m7_sse_insert" 3 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sseins") + (and (eq_attr "memory" "none") + (eq_attr "length_immediate" "!2")))) + "c86-4g-m7-direct,c86-4g-m7-fpu1*2") + +;; FCMOV +(define_insn_reservation "c86_4g_m7_fp_cmov" 4 + (and (eq_attr "cpu" "c86_4g_m7") + (eq_attr "type" "fcmov")) + "c86-4g-m7-vector,c86-4g-m7-fvector*3") + +;; FLD +(define_insn_reservation "c86_4g_m7_fp_mov_direct_load" 8 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "c86_decode" "direct") + (and (eq_attr "type" "fmov") + (eq_attr "memory" "load")))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu1") + +;; FST +(define_insn_reservation "c86_4g_m7_fp_mov_direct_store" 8 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "c86_decode" "direct") + (and (eq_attr "type" "fmov") + (eq_attr "memory" "store")))) + "c86-4g-m7-direct,c86-4g-m7-fpu1,c86-4g-m7-store") + +;; FILD +(define_insn_reservation "c86_4g_m7_fp_mov_double_load" 11 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "c86_decode" "double") + (and (eq_attr "type" "fmov") + (eq_attr "memory" "load")))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu1") + +;; FIST +(define_insn_reservation "c86_4g_m7_fp_mov_double_store" 8 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "c86_decode" "double") + (and (eq_attr "type" "fmov") + (eq_attr "memory" "store")))) + "c86-4g-m7-double,c86-4g-m7-fpu1,c86-4g-m7-store") + +(define_insn_reservation "c86_4g_m7_fp_mov_direct" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "c86_decode" "direct") + (and (eq_attr "type" "fmov") + (eq_attr "memory" "none")))) + "c86-4g-m7-direct,c86-4g-m7-fpu1") + +;; FSQRT +(define_insn_reservation "c86_4g_m7_fp_sqrt" 22 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "fpspc") + (eq_attr "c86_attr" "sqrt"))) + "c86-4g-m7-direct,c86-4g-m7-fp1div1_fp3div3_x4x18") + +;; FPSPC +(define_insn_reservation "c86_4g_m7_fp_spc_direct" 5 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "fpspc") + (and (eq_attr "c86_decode" "direct") + (and (eq_attr "c86_attr" "other") + (eq_attr "memory" "store"))))) + "c86-4g-m7-direct,c86-4g-m7-fpu3") + +(define_insn_reservation "c86_4g_m7_fp_spc" 6 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "fpspc") + (and (eq_attr "c86_attr" "other") + (eq_attr "memory" "none")))) + "c86-4g-m7-vector,c86-4g-m7-fvector*6") + +(define_insn_reservation "c86_4g_m7_fp_op_mul" 5 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "fop,fmul") + (eq_attr "memory" "none"))) + "c86-4g-m7-direct,c86-4g-m7-fpu_0_2") + +(define_insn_reservation "c86_4g_m7_fp_op_mul_load" 12 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "fop,fmul") + (and (eq_attr "fp_int_src" "false") + (eq_attr "memory" "load")))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_2") + +(define_insn_reservation "c86_4g_m7_fp_op_imul_load" 16 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "fmul") + (and (eq_attr "fp_int_src" "true") + (eq_attr "memory" "!none")))) + "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu0,c86-4g-m7-fpu_0_2") + +;; FDIV +(define_insn_reservation "c86_4g_m7_fp_div" 15 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "fdiv") + (eq_attr "memory" "none"))) + "c86-4g-m7-direct,c86-4g-m7-fp1div1_fp3div3_x4x11") + +(define_insn_reservation "c86_4g_m7_fp_div_load" 22 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "fdiv") + (and (eq_attr "fp_int_src" "false") + (eq_attr "memory" "!none")))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fp1div1_fp3div3_x4x11") + +(define_insn_reservation "c86_4g_m7_fp_idiv_load" 26 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "fdiv") + (and (eq_attr "fp_int_src" "true") + (eq_attr "memory" "!none")))) + "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu1*4,c86-4g-m7-fp1div1_fp3div3_x4x11") + +(define_insn_reservation "c86_4g_m7_fp_fsgn" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (eq_attr "type" "fsgn")) + "c86-4g-m7-direct,c86-4g-m7-fpu_1_3") + +;; FCMP +(define_insn_reservation "c86_4g_m7_fp_fcmp" 5 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "fcmp") + (eq_attr "memory" "none"))) + "c86-4g-m7-double,c86-4g-m7-fpu0,c86-4g-m7-fpu1") + +(define_insn_reservation "c86_4g_m7_fp_fcmp_load" 12 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "fcmp") + (eq_attr "memory" "load"))) + "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu0,c86-4g-m7-fpu1") + +;; MMX +(define_insn_reservation "c86_4g_m7_fp_mmx" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (eq_attr "type" "mmx")) + "c86-4g-m7-direct") + +(define_insn_reservation "c86_4g_m7_mmx_add_cmp" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "mmxadd,mmxcmp") + (eq_attr "memory" "none"))) + "c86-4g-m7-direct,c86-4g-m7-fpu") + +(define_insn_reservation "c86_4g_m7_mmx_add_cmp_load" 8 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "mmxadd,mmxcmp") + (eq_attr "memory" "load"))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu") + +(define_insn_reservation "c86_4g_m7_mmx_cvt" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "mmxcvt") + (and (eq_attr "c86_attr" "other") + (eq_attr "memory" "none")))) + "c86-4g-m7-direct,c86-4g-m7-fpu_0_1") + +(define_insn_reservation "c86_4g_m7_mmx_cvt_load" 8 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "mmxcvt") + (and (eq_attr "c86_attr" "other") + (eq_attr "memory" "load")))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_1") + +(define_insn_reservation "c86_4g_m7_mmx_shift" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "mmxshft") + (and (eq_attr "c86_attr" "other") + (eq_attr "memory" "none")))) + "c86-4g-m7-direct,c86-4g-m7-fpu1") + +(define_insn_reservation "c86_4g_m7_mmx_shift_load" 8 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "mmxshft") + (and (eq_attr "c86_attr" "other") + (eq_attr "memory" "load")))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu1") + +(define_insn_reservation "c86_4g_m7_mmx_shift_avg" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "mmxshft") + (and (eq_attr "c86_attr" "avg") + (eq_attr "memory" "none")))) + "c86-4g-m7-direct,c86-4g-m7-fpu") + +(define_insn_reservation "c86_4g_m7_mmx_shift_avg_load" 8 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "mmxshft") + (and (eq_attr "c86_attr" "avg") + (eq_attr "memory" "load")))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu") + +;; SADBW +(define_insn_reservation "c86_4g_m7_mmx_shift_sadbw" 3 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "mmxshft") + (and (eq_attr "c86_attr" "sadbw") + (eq_attr "memory" "none")))) + "c86-4g-m7-direct,c86-4g-m7-fpu0") + +(define_insn_reservation "c86_4g_m7_mmx_shift_sadbw_load" 10 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "mmxshft") + (and (eq_attr "c86_attr" "sadbw") + (eq_attr "memory" "load")))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu0") + +(define_insn_reservation "c86_4g_m7_mmx_mov" 4 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "mmxmov") + (eq_attr "memory" "none"))) + "c86-4g-m7-direct,c86-4g-m7-fpu1") + +(define_insn_reservation "c86_4g_m7_mmx_mov_store" 4 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "mmxmov") + (and (eq_attr "c86_attr" "other") + (eq_attr "memory" "store")))) + "c86-4g-m7-direct,c86-4g-m7-store,c86-4g-m7-fpu1") + +(define_insn_reservation "c86_4g_m7_mmx_mov_load" 11 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "mmxmov") + (eq_attr "memory" "load"))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu1") + +(define_insn_reservation "c86_4g_m7_mmx_mul" 3 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "mmxmul") + (eq_attr "memory" "none"))) + "c86-4g-m7-direct,c86-4g-m7-fpu0") + +(define_insn_reservation "c86_4g_m7_mmx_mul_load" 10 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "mmxmul") + (eq_attr "memory" "load"))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu0") + +;; PINSR +(define_insn_reservation "c86_4g_m7_sse_pinsr_reg" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog,mmxcvt") + (and (eq_attr "c86_attr" "insr") + (and (eq_attr "prefix" "orig") + (eq_attr "memory" "none"))))) + "c86-4g-m7-double,c86-4g-m7-ieu2,c86-4g-m7-fpu") + +(define_insn_reservation "c86_4g_m7_sse_pinsr_reg_load" 3 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog,mmxcvt") + (and (eq_attr "c86_attr" "insr") + (and (eq_attr "prefix" "orig") + (eq_attr "memory" "load"))))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu") + +(define_insn_reservation "c86_4g_m7_avx_vpinsr_reg" 2 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog") + (and (eq_attr "c86_attr" "insr") + (and (eq_attr "prefix" "!orig") + (eq_attr "memory" "none"))))) + "c86-4g-m7-double,c86-4g-m7-fpu_1_3x2") + +(define_insn_reservation "c86_4g_m7_avx_vpinsr_reg_load" 8 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog") + (and (eq_attr "c86_attr" "insr") + (and (eq_attr "prefix" "!orig") + (eq_attr "memory" "load"))))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_1_3") + +;; PERM +(define_insn_reservation "c86_4g_m7_avx512_perm_xmm" 3 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog") + (and (ior (and (eq_attr "c86_attr" "perm2") + (eq_attr "mode" "V4SF,V2DF,TI")) + (and (eq_attr "c86_attr" "perm") + (eq_attr "mode" "V8SF,V4DF,TI,OI"))) + (eq_attr "memory" "none")))) + "c86-4g-m7-direct,c86-4g-m7-fpu_0_2x2") + +(define_insn_reservation "c86_4g_m7_avx512_perm_xmm_opload" 10 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog") + (and (ior (and (eq_attr "c86_attr" "perm2") + (eq_attr "mode" "V4SF,V2DF,TI")) + (and (eq_attr "c86_attr" "perm") + (eq_attr "mode" "V8SF,V4DF,TI,OI"))) + (eq_attr "memory" "load")))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_2x2") + +(define_insn_reservation "c86_4g_m7_avx512_permi2_ymm" 4 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog") + (and (eq_attr "c86_attr" "perm2") + (and (eq_attr "mode" "V8SF,V4DF,OI") + (eq_attr "memory" "none"))))) + "c86-4g-m7-vector,c86-4g-m7-fpux4") + +(define_insn_reservation "c86_4g_m7_avx512_permi2_zmm" 16 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog") + (and (eq_attr "c86_attr" "perm2") + (and (eq_attr "mode" "V16SF,V8DF,XI") + (eq_attr "memory" "none"))))) + "c86-4g-m7-vector,c86-4g-m7-fpux16") + +(define_insn_reservation "c86_4g_m7_avx512_permi2_ymm_load" 11 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog") + (and (eq_attr "c86_attr" "perm2") + (and (eq_attr "mode" "V8SF,V4DF,OI") + (eq_attr "memory" "load"))))) + "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-fpux4") + +(define_insn_reservation "c86_4g_m7_avx512_permi2_zmm_load" 23 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog") + (and (eq_attr "c86_attr" "perm2") + (and (eq_attr "mode" "V16SF,V8DF,XI") + (eq_attr "memory" "load"))))) + "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-fpux16") + +(define_insn_reservation "c86_4g_m7_avx512_perm_zmm_imm" 4 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog") + (and (eq_attr "c86_attr" "perm") + (and (eq_attr "mode" "V16SF,V8DF,XI") + (and (match_operand 2 "immediate_operand") + (eq_attr "memory" "none")))))) + "c86-4g-m7-direct,c86-4g-m7-fpux4") + +(define_insn_reservation "c86_4g_m7_avx512_perm_zmm_imm_load" 11 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog") + (and (eq_attr "c86_attr" "perm") + (and (eq_attr "mode" "V16SF,V8DF,XI") + (and (match_operand 2 "immediate_operand") + (eq_attr "memory" "load")))))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpux4") + +(define_insn_reservation "c86_4g_m7_avx512_perm_zmm_noimm" 8 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog") + (and (eq_attr "c86_attr" "perm") + (and (eq_attr "mode" "V16SF,V8DF,XI") + (and (match_operand 2 "nonimmediate_operand") + (eq_attr "memory" "none")))))) + "c86-4g-m7-vector,c86-4g-m7-fpux8") + +(define_insn_reservation "c86_4g_m7_sse_perm_zmm_noimm_load" 15 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog") + (and (eq_attr "c86_attr" "perm") + (and (eq_attr "mode" "V16SF,V8DF,XI") + (and (match_operand 2 "nonimmediate_operand") + (eq_attr "memory" "load")))))) + "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-fpux8") + +;; VINSERT +(define_insn_reservation "c86_4g_m7_avx512_insertx_ymm" 3 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog,sselog1") + (and (eq_attr "c86_attr" "insertx") + (and (eq_attr "mode" "V8SF,V4DF,OI") + (and (eq_attr "prefix" "evex") + (eq_attr "memory" "none")))))) + "c86-4g-m7-direct,c86-4g-m7-fpu_0_2x2,c86-4g-m7-fpu_0_2x2") + +(define_insn_reservation "c86_4g_m7_avx512_insertx_ymem" 10 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog,sselog1") + (and (eq_attr "c86_attr" "insertx") + (and (eq_attr "mode" "V8SF,V4DF,OI") + (and (eq_attr "prefix" "evex") + (eq_attr "memory" "load,both")))))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_2x2,c86-4g-m7-fpu_0_2x2") + +(define_insn_reservation "c86_4g_m7_avx512_insertx_zxmm" 5 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog") + (and (eq_attr "c86_attr" "insertx") + (and (eq_attr "mode" "V16SF,V8DF,XI") + (and (match_test "GET_MODE_SIZE (GET_MODE (operands[2]))==16") + (match_operand 2 "register_operand")))))) + "c86-4g-m7-double,c86-4g-m7-fpu_0_2x4,c86-4g-m7-fpu_0_2x4") + +(define_insn_reservation "c86_4g_m7_avx512_insertx_zxmem" 12 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog") + (and (eq_attr "c86_attr" "insertx") + (and (eq_attr "mode" "V16SF,V8DF,XI") + (and (match_test "GET_MODE_SIZE (GET_MODE (operands[2]))==16") + (match_operand 2 "memory_operand")))))) + "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu_0_2x4,c86-4g-m7-fpu_0_2x4") + +(define_insn_reservation "c86_4g_m7_avx512_insertx_zymm" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog") + (and (eq_attr "c86_attr" "insertx") + (and (eq_attr "mode" "V16SF,V8DF,XI") + (and (match_test "GET_MODE_SIZE (GET_MODE (operands[2]))==32") + (match_operand 2 "register_operand")))))) + "c86-4g-m7-double,c86-4g-m7-fpu_1_3,c86-4g-m7-fpu_1_3") + +(define_insn_reservation "c86_4g_m7_avx512_insertx_zymem" 8 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog") + (and (eq_attr "c86_attr" "insertx") + (and (eq_attr "mode" "V16SF,V8DF,XI") + (and (match_test "GET_MODE_SIZE (GET_MODE (operands[2]))==32") + (match_operand 2 "memory_operand")))))) + "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu_1_3") + +(define_insn_reservation "c86_4g_m7_avx_insertx_ymm" 3 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog,sselog1") + (and (eq_attr "c86_attr" "insertx") + (and (eq_attr "prefix" "!evex") + (eq_attr "memory" "none"))))) + "c86-4g-m7-direct,c86-4g-m7-fpu0*2") + +(define_insn_reservation "c86_4g_m7_avx_insertx_ymem" 10 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog,sselog1") + (and (eq_attr "c86_attr" "insertx") + (and (eq_attr "prefix" "!evex") + (eq_attr "memory" "load"))))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu0*2") + +;; SHUF/MULTISHIFTQB +(define_insn_reservation "c86_4g_m7_avx512_shuf_xymm" 3 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog") + (and (eq_attr "c86_attr" "shufx") + (and (not (eq_attr "mode" "V8DF,V16SF,XI")) + (eq_attr "memory" "none"))))) + "c86-4g-m7-direct,c86-4g-m7-fpu_0_2x2") + +(define_insn_reservation "c86_4g_m7_avx512_shuf_zmm" 4 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog") + (and (eq_attr "c86_attr" "shufx") + (and (eq_attr "mode" "V8DF,V16SF,XI") + (eq_attr "memory" "none"))))) + "c86-4g-m7-vector,c86-4g-m7-fpu_0_2x4") + +(define_insn_reservation "c86_4g_m7_avx512_shuf_xymem" 10 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog") + (and (eq_attr "c86_attr" "shufx") + (and (not (eq_attr "mode" "V8DF,V16SF,XI")) + (eq_attr "memory" "load"))))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_2x2") + +(define_insn_reservation "c86_4g_m7_avx512_shuf_zmem" 11 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog") + (and (eq_attr "c86_attr" "shufx") + (and (eq_attr "mode" "V8DF,V16SF,XI") + (eq_attr "memory" "load"))))) + "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-fpu_0_2x4") + +;; SSELOGIC +(define_insn_reservation "c86_4g_m7_sselogic_xymm" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog,sselog1") + (and (eq_attr "c86_attr" "sselogic") + (eq_attr "memory" "none")))) + "c86-4g-m7-direct,c86-4g-m7-fpu") + +(define_insn_reservation "c86_4g_m7_sselogic_xymm_load" 8 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog,sselog1") + (and (eq_attr "c86_attr" "sselogic") + (eq_attr "memory" "load")))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu") + +;; CMPESTR +(define_insn_reservation "c86_4g_m7_avx512_cmpestr" 6 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog") + (and (eq_attr "c86_attr" "cmpestr") + (eq_attr "memory" "none")))) + "c86-4g-m7-vector,c86-4g-m7-fpux6") + +(define_insn_reservation "c86_4g_m7_avx512_cmpestr_load" 13 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog") + (and (eq_attr "c86_attr" "cmpestr") + (eq_attr "memory" "load")))) + "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-fpux6") + +;; SSELOG +(define_insn_reservation "c86_4g_m7_avx512_log" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1") + (and (eq_attr "c86_attr" "other") + (eq_attr "memory" "none")))) + "c86-4g-m7-direct,c86-4g-m7-fpu_1_3") + +(define_insn_reservation "c86_4g_m7_avx512_log_load" 8 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1") + (and (eq_attr "c86_attr" "other") + (eq_attr "memory" "load")))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_1_3") + +;; SSELOG1 +;; VDBPSADBW +(define_insn_reservation "c86_4g_m7_avx512_vdbpsadbw_xymm" 4 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog1") + (and (eq_attr "c86_attr" "sadbw") + (and (eq_attr "mode" "OI,TI") + (eq_attr "memory" "none"))))) + "c86-4g-m7-double,c86-4g-m7-fpu_0_2,c86-4g-m7-fpu_1_3") + +(define_insn_reservation "c86_4g_m7_avx512_vdbpsadbw_xymem" 11 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog1") + (and (eq_attr "c86_attr" "sadbw") + (and (eq_attr "mode" "OI,TI") + (eq_attr "memory" "load"))))) + "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu_0_2,c86-4g-m7-fpu_1_3") + +(define_insn_reservation "c86_4g_m7_avx512_vdbpsadbw_zmm" 4 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog1") + (and (eq_attr "c86_attr" "sadbw") + (and (eq_attr "mode" "XI") + (eq_attr "memory" "none"))))) + "c86-4g-m7-vector,c86-4g-m7-fpu_0_2,c86-4g-m7-fpu_1_3x2") + +(define_insn_reservation "c86_4g_m7_avx512_vdbpsadbw_zmem" 11 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog1") + (and (eq_attr "c86_attr" "sadbw") + (and (eq_attr "mode" "XI") + (eq_attr "memory" "load"))))) + "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-fpu_0_2,c86-4g-m7-fpu_1_3x2") + +;; ABS +(define_insn_reservation "c86_4g_m7_avx512_abs" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog1,sse") + (and (eq_attr "c86_attr" "abs") + (and (eq_attr "prefix" "evex") + (eq_attr "memory" "none"))))) + "c86-4g-m7-direct,c86-4g-m7-fpu") + +(define_insn_reservation "c86_4g_m7_avx512_abs_load" 8 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog1,sse") + (and (eq_attr "c86_attr" "abs") + (and (eq_attr "prefix" "evex") + (eq_attr "memory" "load,both"))))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu") + +;; SIGN +(define_insn_reservation "c86_4g_m7_avx_sign" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog1") + (and (eq_attr "c86_attr" "sign") + (eq_attr "memory" "none")))) + "c86-4g-m7-direct,c86-4g-m7-fpu0|c86-4g-m7-fpu3") + +(define_insn_reservation "c86_4g_m7_avx_sign_load" 8 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog1") + (and (eq_attr "c86_attr" "sign") + (eq_attr "memory" "!none")))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu0|c86-4g-m7-fpu3") + +;; BLEND/ABS/AES +(define_insn_reservation "c86_4g_m7_avx_blend" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog1") + (and (eq_attr "c86_attr" "abs,blend,aes") + (and (eq_attr "prefix" "!evex") + (eq_attr "memory" "none"))))) + "c86-4g-m7-direct,c86-4g-m7-fpu_0_1") + +(define_insn_reservation "c86_4g_m7_avx_blend_load" 8 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog1") + (and (eq_attr "c86_attr" "abs,blend,aes") + (and (eq_attr "prefix" "!evex") + (eq_attr "memory" "load"))))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_1") + +(define_insn_reservation "c86_4g_m7_avx512_aes" 3 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog1,ssecvt,sse") + (and (eq_attr "c86_attr" "aes") + (and (eq_attr "prefix" "evex") + (eq_attr "memory" "none"))))) + "c86-4g-m7-direct,c86-4g-m7-fpu_1_3") + +(define_insn_reservation "c86_4g_m7_avx512_aes_load" 10 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog1,ssecvt,sse") + (and (eq_attr "c86_attr" "aes") + (and (eq_attr "prefix" "evex") + (eq_attr "memory" "load"))))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_1_3") + +(define_insn_reservation "c86_4g_m7_avx_aes" 3 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog1") + (and (eq_attr "c86_attr" "aes") + (and (eq_attr "prefix" "!evex") + (eq_attr "memory" "none"))))) + "c86-4g-m7-direct,c86-4g-m7-fpu0|c86-4g-m7-fpu1") + +(define_insn_reservation "c86_4g_m7_avx_aes_load" 10 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog1") + (and (eq_attr "c86_attr" "aes") + (and (eq_attr "prefix" "!evex") + (eq_attr "memory" "load"))))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu0|c86-4g-m7-fpu1") + +;; EXTR +(define_insn_reservation "c86_4g_m7_extr" 5 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog1,sselog,mmxcvt") + (and (eq_attr "c86_attr" "extr") + (eq_attr "memory" "none")))) + "c86-4g-m7-double,c86-4g-m7-fpu1,c86-4g-m7-fpu_0_1") + +(define_insn_reservation "c86_4g_m7_extr_load" 12 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sselog1,sselog,mmxcvt") + (and (eq_attr "c86_attr" "extr") + (eq_attr "memory" "!none")))) + "c86-4g-m7-double,c86-4g-m7-store,c86-4g-m7-fpu1,c86-4g-m7-fpu_0_1") + +;; SSECOMI +(define_insn_reservation "c86_4g_m7_avx_ssecomi_comi" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssecomi") + (and (eq_attr "prefix_extra" "0") + (eq_attr "memory" "none")))) + "c86-4g-m7-double,c86-4g-m7-fpu") + +(define_insn_reservation "c86_4g_m7_avx_ssecomi_comi_load" 8 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssecomi") + (and (eq_attr "prefix_extra" "0") + (eq_attr "memory" "load")))) + "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu") + +(define_insn_reservation "c86_4g_m7_avx_ssecomi_test" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssecomi") + (and (eq_attr "prefix_extra" "1") + (eq_attr "memory" "none")))) + "c86-4g-m7-direct,c86-4g-m7-fpu1|c86-4g-m7-fpu2") + +(define_insn_reservation "c86_4g_m7_avx_ssecomi_test_load" 8 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssecomi") + (and (eq_attr "prefix_extra" "1") + (eq_attr "memory" "load")))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu1|c86-4g-m7-fpu2") + +;; SSEIMUL +(define_insn_reservation "c86_4g_m7_avx512_imul" 3 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sseimul") + (and (eq_attr "prefix" "evex") + (eq_attr "memory" "none")))) + "c86-4g-m7-direct,c86-4g-m7-fpu_0_2") + +(define_insn_reservation "c86_4g_m7_avx512_imul_mem" 10 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sseimul") + (and (eq_attr "prefix" "evex") + (eq_attr "memory" "load")))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_2") + +(define_insn_reservation "c86_4g_m7_avx_imul" 3 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sseimul") + (and (eq_attr "prefix" "!evex") + (eq_attr "memory" "none")))) + "c86-4g-m7-direct,c86-4g-m7-fpu0|c86-4g-m7-fpu3") + +(define_insn_reservation "c86_4g_m7_avx_imul_mem" 10 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sseimul") + (and (eq_attr "prefix" "!evex") + (eq_attr "memory" "load")))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu0|c86-4g-m7-fpu3") + +;; SSEMOV +(define_insn_reservation "c86_4g_m7_avx512_mov_vmov" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssemov,sseiadd") + (and (eq_attr "c86_attr" "other,blend,maxmin") + (eq_attr "memory" "none")))) + "c86-4g-m7-direct,c86-4g-m7-fpu") + +(define_insn_reservation "c86_4g_m7_avx512_mov_vmov_store" 8 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssemov") + (and (eq_attr "c86_attr" "other") + (eq_attr "memory" "store")))) + "c86-4g-m7-direct,c86-4g-m7-store,c86-4g-m7-fpu1") + +(define_insn_reservation "c86_4g_m7_avx512_mov_vmov_load" 8 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssemov,sseiadd") + (and (eq_attr "c86_attr" "other,blend,maxmin") + (eq_attr "memory" "load")))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu") + +(define_insn_reservation "c86_4g_m7_avx512_vpmovx_y" 3 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssemov") + (and (eq_attr "c86_attr" "vpmovx") + (and (eq_attr "prefix" "evex") + (and (eq_attr "mode" "OI,V8SF,V4DF") + (eq_attr "memory" "none")))))) + "c86-4g-m7-direct,c86-4g-m7-fpu_0_2x2") + +(define_insn_reservation "c86_4g_m7_avx512_vpmovx_y_load" 10 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssemov,sselog1") + (and (eq_attr "c86_attr" "vpmovx") + (and (eq_attr "prefix" "evex") + (and (eq_attr "mode" "OI,V8SF,V4DF") + (eq_attr "memory" "load,both")))))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_2x2") + +(define_insn_reservation "c86_4g_m7_avx512_vpmovx_z" 5 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssemov") + (and (eq_attr "c86_attr" "vpmovx") + (and (eq_attr "mode" "XI") + (eq_attr "memory" "none"))))) + "c86-4g-m7-direct,c86-4g-m7-fpu_0_2x4") + +(define_insn_reservation "c86_4g_m7_avx512_vpmovx_z_load" 12 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssemov") + (and (eq_attr "c86_attr" "vpmovx") + (and (eq_attr "mode" "XI") + (eq_attr "memory" "load"))))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_2x4") + +(define_insn_reservation "c86_4g_m7_avx512_vpmovx_x" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssemov") + (and (eq_attr "c86_attr" "vpmovx") + (and (eq_attr "prefix" "evex") + (and (eq_attr "mode" "TI,SI") + (eq_attr "memory" "none")))))) + "c86-4g-m7-direct,c86-4g-m7-fpu_1_3") + +(define_insn_reservation "c86_4g_m7_avx512_vpmovx_x_load" 8 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssemov") + (and (eq_attr "c86_attr" "vpmovx") + (and (eq_attr "prefix" "evex") + (and (eq_attr "mode" "TI,SI") + (eq_attr "memory" "load")))))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_1_3") + +(define_insn_reservation "c86_4g_m7_avx_vpmovx_xx" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssemov") + (and (eq_attr "c86_attr" "vpmovx") + (and (eq_attr "prefix" "!evex") + (eq_attr "memory" "none"))))) + "c86-4g-m7-direct,c86-4g-m7-fpu1|c86-4g-m7-fpu2") + +(define_insn_reservation "c86_4g_m7_avx_vpmovx_xx_load" 8 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssemov") + (and (eq_attr "c86_attr" "vpmovx") + (and (eq_attr "prefix" "!evex") + (eq_attr "memory" "load,both"))))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu1|c86-4g-m7-fpu2") + +;; EXPAND +(define_insn_reservation "c86_4g_m7_avx512_expand" 3 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssemov") + (and (eq_attr "c86_attr" "expand,compress") + (and (not (eq_attr "mode" "XI,V16SF,V8DF")) + (eq_attr "memory" "none"))))) + "c86-4g-m7-direct,c86-4g-m7-fpu3,c86-4g-m7-fpu_0_3") + +(define_insn_reservation "c86_4g_m7_avx512_expand_load" 10 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssemov") + (and (eq_attr "c86_attr" "expand,compress") + (and (not (eq_attr "mode" "XI,V16SF,V8DF")) + (eq_attr "memory" "load"))))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu3,c86-4g-m7-fpu_0_3") + +(define_insn_reservation "c86_4g_m7_avx512_expand_z" 10 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssemov") + (and (eq_attr "c86_attr" "expand,compress") + (and (eq_attr "mode" "XI,V16SF,V8DF") + (eq_attr "memory" "none"))))) + "c86-4g-m7-vector,c86-4g-m7-fpu3,c86-4g-m7-fpu_0_3") + +(define_insn_reservation "c86_4g_m7_avx512_expand_z_load" 17 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssemov") + (and (eq_attr "c86_attr" "expand,compress") + (and (eq_attr "mode" "XI,V16SF,V8DF") + (eq_attr "memory" "load"))))) + "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-fpu3,c86-4g-m7-fpu_0_3") + +;; MOVNT +(define_insn_reservation "c86_4g_m7_avx512_movnt_load" 8 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssemov") + (and (eq_attr "c86_attr" "movnt") + (and (eq_attr "mode" "XI,V16SF,V8DF") + (eq_attr "memory" "load"))))) + "c86-4g-m7-double,c86-4g-m7-load") + +(define_insn_reservation "c86_4g_m7_avx512_movnt_store" 8 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssemov") + (and (eq_attr "c86_attr" "movnt") + (and (eq_attr "mode" "XI,V16SF,V8DF") + (eq_attr "memory" "store"))))) + "c86-4g-m7-direct,c86-4g-m7-store,c86-4g-m7-fpu1*2") + +(define_insn_reservation "c86_4g_m7_sse_movnt_store" 4 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssemov,mmxmov") + (and (eq_attr "c86_attr" "movnt") + (and (not (eq_attr "mode" "XI,V16SF,V8DF")) + (eq_attr "memory" "!none"))))) + "c86-4g-m7-direct,c86-4g-m7-store,c86-4g-m7-fpu1") + +(define_insn_reservation "c86_4g_m7_sse_movnt" 4 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssemov") + (and (eq_attr "c86_attr" "movnt") + (and (not (eq_attr "mode" "XI,V16SF,V8DF")) + (eq_attr "memory" "none"))))) + "c86-4g-m7-direct,c86-4g-m7-fpu_1_3") + +;; BLENDV +(define_insn_reservation "c86_4g_m7_avx512_blendv" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssemov") + (and (eq_attr "c86_attr" "blendv") + (eq_attr "memory" "none")))) + "c86-4g-m7-direct,c86-4g-m7-fpu_0_1") + +(define_insn_reservation "c86_4g_m7_avx512_blendv_load" 8 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssemov") + (and (eq_attr "c86_attr" "blendv") + (eq_attr "memory" "load")))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_1") + +;; SSEISHFT +(define_insn_reservation "c86_4g_m7_avx512_sseishft_aligr" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sseishft") + (and (eq_attr "prefix_extra" "1") + (and (eq_attr "prefix" "evex") + (eq_attr "memory" "none"))))) + "c86-4g-m7-direct,c86-4g-m7-fpu_1_3") + +(define_insn_reservation "c86_4g_m7_avx512_sseishft_aligr_load" 8 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sseishft") + (and (eq_attr "prefix_extra" "1") + (and (eq_attr "prefix" "evex") + (eq_attr "memory" "load"))))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_1_3") + +(define_insn_reservation "c86_4g_m7_avx512_sseishft_vshift" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sseishft") + (and (eq_attr "prefix_extra" "!1") + (eq_attr "memory" "none")))) + "c86-4g-m7-direct,c86-4g-m7-fpu_0_2") + +(define_insn_reservation "c86_4g_m7_avx512_sseishft_vshift_load" 8 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sseishft") + (and (eq_attr "prefix_extra" "!1") + (eq_attr "memory" "!none")))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_2") + + +;; SSEADD +(define_insn_reservation "c86_4g_m7_avx512_sseadd_maxmin_xy" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sseadd,sse") + (and (eq_attr "c86_attr" "maxmin") + (and (eq_attr "prefix" "evex") + (and (eq_attr "memory" "none") + (eq_attr "memory" "none")))))) + "c86-4g-m7-direct,c86-4g-m7-fpu_0_2") + +(define_insn_reservation "c86_4g_m7_avx512_sseadd_maxmin_xy_load" 8 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sseadd,sse") + (and (eq_attr "c86_attr" "maxmin") + (and (eq_attr "prefix" "evex") + (ior (eq_attr "memory" "load") + (eq_attr "memory" "load")))))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_2") + +(define_insn_reservation "c86_4g_m7_avx_sseadd_maxmin" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sseadd,sse") + (and (eq_attr "c86_attr" "maxmin") + (and (eq_attr "prefix" "vex") + (eq_attr "memory" "none"))))) + "c86-4g-m7-direct,c86-4g-m7-fpu_0_1") + +(define_insn_reservation "c86_4g_m7_avx_sseadd_maxmin_load" 8 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sseadd,sse") + (and (eq_attr "c86_attr" "maxmin") + (and (eq_attr "prefix" "vex") + (eq_attr "memory" "load"))))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_1") + +(define_insn_reservation "c86_4g_m7_sse_sseadd_maxmin" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sseadd,sse") + (and (eq_attr "c86_attr" "maxmin") + (and (eq_attr "prefix" "orig") + (eq_attr "memory" "none"))))) + "c86-4g-m7-direct,c86-4g-m7-fpu2|c86-4g-m7-fpu3") + +(define_insn_reservation "c86_4g_m7_sse_sseadd_maxmin_load" 8 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sseadd,sse") + (and (eq_attr "c86_attr" "maxmin") + (and (eq_attr "prefix" "orig") + (eq_attr "memory" "load"))))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu2|c86-4g-m7-fpu3") + +;; SUB/ADD +(define_insn_reservation "c86_4g_m7_avx512_sseadd_xy" 3 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sseadd") + (and (eq_attr "c86_attr" "other") + (eq_attr "memory" "none")))) + "c86-4g-m7-direct,c86-4g-m7-fpu_1_3") + +(define_insn_reservation "c86_4g_m7_avx512_sseadd_xy_load" 10 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sseadd") + (and (eq_attr "c86_attr" "other") + (eq_attr "memory" "load")))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_1_3") + +;; HADD/HSUB +(define_insn_reservation "c86_4g_m7_avx_sseadd_hplus" 7 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sseadd,sseadd1") + (and (eq_attr "c86_attr" "hplus") + (eq_attr "memory" "none")))) + "c86-4g-m7-vector") + +(define_insn_reservation "c86_4g_m7_avx_sseadd_hplus_load" 14 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sseadd,sseadd1") + (and (eq_attr "c86_attr" "hplus") + (eq_attr "memory" "load")))) + "c86-4g-m7-vector,c86-4g-m7-load") + +;; SSEIADD +(define_insn_reservation "c86_4g_m7_avx512_sseiadd_madd" 3 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sseiadd") + (and (eq_attr "c86_attr" "sadbw,madd") + (and (ior (eq_attr "prefix" "evex") + (eq_attr "mode" "XI")) + (eq_attr "memory" "none"))))) + "c86-4g-m7-direct,c86-4g-m7-fpu_0_2") + +(define_insn_reservation "c86_4g_m7_avx512_sseiadd_madd_mem" 10 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sseiadd") + (and (eq_attr "c86_attr" "sadbw,madd") + (and (ior (eq_attr "prefix" "evex") + (eq_attr "mode" "XI")) + (eq_attr "memory" "load"))))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_2") + +(define_insn_reservation "c86_4g_m7_avx_sseiadd_sadbw" 3 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sseiadd") + (and (eq_attr "c86_attr" "sadbw") + (and (eq_attr "prefix" "vex,maybe_evex") + (and (eq_attr "mode" "TI,OI") + (eq_attr "memory" "none")))))) + "c86-4g-m7-direct,c86-4g-m7-fpu_0_1") + +(define_insn_reservation "c86_4g_m7_avx_sseiadd_sadbw_mem" 10 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sseiadd") + (and (eq_attr "c86_attr" "sadbw") + (and (eq_attr "prefix" "vex,maybe_evex") + (and (eq_attr "mode" "TI,OI") + (eq_attr "memory" "load")))))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_1") + +(define_insn_reservation "c86_4g_m7_sse_sseiadd_sadbw" 3 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sseiadd") + (and (eq_attr "c86_attr" "sadbw") + (and (eq_attr "prefix" "orig") + (eq_attr "memory" "none"))))) + "c86-4g-m7-direct,c86-4g-m7-fpu0|c86-4g-m7-fpu3") + +(define_insn_reservation "c86_4g_m7_sse_sseiadd_sadbw_mem" 10 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sseiadd") + (and (eq_attr "c86_attr" "sadbw") + (and (eq_attr "prefix" "orig") + (eq_attr "memory" "load"))))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu0|c86-4g-m7-fpu3") + +(define_insn_reservation "c86_4g_m7_sse_sseiadd_madd" 3 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sseiadd") + (and (eq_attr "c86_attr" "madd") + (and (eq_attr "prefix" "!evex") + (eq_attr "memory" "none"))))) + "c86-4g-m7-direct,c86-4g-m7-fpu0") + +(define_insn_reservation "c86_4g_m7_sse_sseiadd_madd_mem" 10 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sseiadd") + (and (eq_attr "c86_attr" "madd") + (and (eq_attr "prefix" "!evex") + (eq_attr "memory" "load"))))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu0") + +;; AVG +(define_insn_reservation "c86_4g_m7_avx512_sseiadd_avg" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sseiadd") + (and (eq_attr "c86_attr" "avg") + (eq_attr "memory" "none")))) + "c86-4g-m7-direct,c86-4g-m7-fpu") + +(define_insn_reservation "c86_4g_m7_avx512_sseiadd_avg_load" 8 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sseiadd") + (and (eq_attr "c86_attr" "avg") + (eq_attr "memory" "load")))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu") + +(define_insn_reservation "c86_4g_m7_avx_sseiadd_hplus" 3 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sseiadd,sseiadd1") + (and (eq_attr "c86_attr" "hplus") + (and (eq_attr "prefix" "vex") + (eq_attr "memory" "none"))))) + "c86-4g-m7-vector") + +(define_insn_reservation "c86_4g_m7_avx_sseiadd_hplus_load" 10 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sseiadd,sseiadd1") + (and (eq_attr "c86_attr" "hplus") + (and (eq_attr "prefix" "vex") + (eq_attr "memory" "load"))))) + "c86-4g-m7-vector,c86-4g-m7-load") + +(define_insn_reservation "c86_4g_m7_sse_sseiadd_hplus" 3 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sseiadd,sseiadd1") + (and (eq_attr "c86_attr" "hplus") + (and (eq_attr "prefix" "orig") + (eq_attr "memory" "none"))))) + "c86-4g-m7-vector,c86-4g-m7-fpux2") + +(define_insn_reservation "c86_4g_m7_sse_sseiadd_hplus_load" 10 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sseiadd,sseiadd1") + (and (eq_attr "c86_attr" "hplus") + (and (eq_attr "prefix" "orig") + (eq_attr "memory" "load"))))) + "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-fpux2") + +;; SSEMUL +(define_insn_reservation "c86_4g_m7_avx512_ssemul" 3 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssemul") + (eq_attr "memory" "none"))) + "c86-4g-m7-direct,c86-4g-m7-fpu_0_2") + +(define_insn_reservation "c86_4g_m7_avx512_ssemul_load" 10 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssemul") + (eq_attr "memory" "load"))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_2") + +;; SSEDIV +(define_insn_reservation "c86_4g_m7_avx512_ssediv_x" 13 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssediv") + (and (eq_attr "mode" "SF,DF,V4SF,V2DF") + (eq_attr "memory" "none")))) + "c86-4g-m7-direct,c86-4g-m7-fp1div1_fp3div3_x4x8") + +(define_insn_reservation "c86_4g_m7_avx512_ssediv_xmem" 20 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssediv") + (and (eq_attr "mode" "SF,DF,V4SF,V2DF") + (eq_attr "memory" "load")))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fp1div1_fp3div3_x4x8") + +(define_insn_reservation "c86_4g_m7_avx512_ssediv_y" 13 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssediv") + (and (eq_attr "mode" "V8SF,V4DF") + (eq_attr "memory" "none")))) + "c86-4g-m7-direct,c86-4g-m7-fp13div13x4,c86-4g-m7-fdiv13*8") + +(define_insn_reservation "c86_4g_m7_avx512_ssediv_ymem" 20 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssediv") + (and (eq_attr "mode" "V8SF,V4DF") + (eq_attr "memory" "load")))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fp13div13x4,c86-4g-m7-fdiv13*8") + +(define_insn_reservation "c86_4g_m7_avx512_ssediv_z" 24 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssediv") + (and (eq_attr "mode" "V16SF,V8DF") + (eq_attr "memory" "none")))) + "c86-4g-m7-double,c86-4g-m7-fp13div13x4,c86-4g-m7-fdiv13*20") + +(define_insn_reservation "c86_4g_m7_avx512_ssediv_zmem" 31 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssediv") + (and (eq_attr "mode" "V16SF,V8DF") + (eq_attr "memory" "load")))) + "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fp13div13x4,c86-4g-m7-fdiv13*20") + +;; SSECMP +(define_insn_reservation "c86_4g_m7_avx512_ssecmp" 5 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssecmp") + (and (eq_attr "prefix" "evex") + (and (eq_attr "mode" "V2DF,V4DF,V8SF,V4SF,SF,DF") + (eq_attr "memory" "none"))))) + "c86-4g-m7-double,c86-4g-m7-fpu_0_2,c86-4g-m7-fpu_1_3") + +(define_insn_reservation "c86_4g_m7_avx512_ssecmp_load" 12 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssecmp") + (and (eq_attr "prefix" "evex") + (and (eq_attr "mode" "V2DF,V4DF,V8SF,V4SF,SF,DF") + (eq_attr "memory" "load"))))) + "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu_0_2,c86-4g-m7-fpu_1_3") + +(define_insn_reservation "c86_4g_m7_avx512_ssecmp_z" 5 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssecmp") + (and (eq_attr "mode" "V16SF,V8DF,XI") + (and (eq_attr "c86_attr" "other") + (eq_attr "memory" "none"))))) + "c86-4g-m7-vector,c86-4g-m7-fpu_0_2,c86-4g-m7-fpu_1_3") + +(define_insn_reservation "c86_4g_m7_avx512_ssecmp_z_load" 12 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssecmp") + (and (eq_attr "mode" "V16SF,V8DF,XI") + (and (eq_attr "c86_attr" "other") + (eq_attr "memory" "load"))))) + "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-fpu_0_2,c86-4g-m7-fpu_1_3x2") + +(define_insn_reservation "c86_4g_m7_avx512_ssecmp_vp" 5 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssecmp") + (and (eq_attr "prefix" "evex") + (and (eq_attr "mode" "TI,OI") + (and (eq_attr "c86_attr" "other") + (eq_attr "memory" "none")))))) + "c86-4g-m7-double,c86-4g-m7-fpu,c86-4g-m7-fpu_1_3") + +(define_insn_reservation "c86_4g_m7_avx512_ssecmp_vp_load" 12 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssecmp") + (and (eq_attr "prefix" "evex") + (and (eq_attr "mode" "TI,OI") + (and (eq_attr "c86_attr" "other") + (eq_attr "memory" "load")))))) + "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu,c86-4g-m7-fpu_1_3") + +(define_insn_reservation "c86_4g_m7_avx512_ssecmp_vp_z" 5 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssecmp") + (and (eq_attr "prefix" "evex") + (and (eq_attr "mode" "XI") + (and (eq_attr "c86_attr" "other,ptest") + (eq_attr "memory" "none")))))) + "c86-4g-m7-double,c86-4g-m7-fpu,c86-4g-m7-fpu_1_3") + +(define_insn_reservation "c86_4g_m7_avx512_ssecmp_vp_z_load" 12 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssecmp") + (and (eq_attr "prefix" "evex") + (and (eq_attr "mode" "XI") + (and (eq_attr "c86_attr" "other,ptest") + (eq_attr "memory" "load")))))) + "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu,c86-4g-m7-fpu_1_3x2") + +(define_insn_reservation "c86_4g_m7_avx_ssecmp_vp" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssecmp") + (and (eq_attr "prefix" "!evex") + (eq_attr "memory" "none")))) + "c86-4g-m7-direct,c86-4g-m7-fpu") + +(define_insn_reservation "c86_4g_m7_avx_ssecmp_vp_load" 8 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssecmp") + (and (eq_attr "prefix" "!evex") + (eq_attr "memory" "load")))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu") + +;; VPTEST +(define_insn_reservation "c86_4g_m7_avx512_ssecmp_test" 6 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssecmp") + (and (eq_attr "mode" "TI,OI") + (and (eq_attr "c86_attr" "ptest") + (eq_attr "memory" "none"))))) + "c86-4g-m7-double,c86-4g-m7-fpu,c86-4g-m7-fpu_1_3") + +(define_insn_reservation "c86_4g_m7_avx512_ssecmp_test_load" 13 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssecmp") + (and (eq_attr "mode" "TI,OI") + (and (eq_attr "c86_attr" "ptest") + (eq_attr "memory" "load"))))) + "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu1,c86-4g-m7-fpu_1_3") + +;; SSECVT +(define_insn_reservation "c86_4g_m7_avx512_ssecvt_xy" 4 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "c86_attr" "other") + (and (eq_attr "prefix" "evex") + (and (eq_attr "mode" "TI,V4SF,V2DF,OI,V8SF,V4DF") + (and (not (ior (match_operand:V8DI 1 "register_operand") + (match_operand:V8DF 1 "register_operand"))) + (eq_attr "memory" "none"))))))) + "c86-4g-m7-direct,c86-4g-m7-fpu_1_3") + +(define_insn_reservation "c86_4g_m7_avx512_ssecvt_xy_load" 11 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "prefix" "evex") + (and (eq_attr "c86_attr" "other") + (and (eq_attr "mode" "TI,V4SF,V2DF,OI,V8SF,V4DF") + (and (not (ior (match_operand:V8DI 1 "register_operand") + (match_operand:V8DF 1 "register_operand"))) + (eq_attr "memory" "!none"))))))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_1_3") + +(define_insn_reservation "c86_4g_m7_avx512_ssecvt_y_z" 8 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "mode" "OI,V8SF,V4DF") + (and (eq_attr "c86_attr" "other") + (and (ior (match_operand:V8DI 1 "register_operand") + (match_operand:V8DF 1 "register_operand")) + (eq_attr "memory" "none")))))) + "c86-4g-m7-double,c86-4g-m7-fpu_1_3") + +(define_insn_reservation "c86_4g_m7_avx512_ssecvt_y_z_load" 15 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "mode" "OI,V8SF,V4DF") + (and (eq_attr "c86_attr" "other") + (and (ior (match_operand:V8DI 1 "memory_operand") + (match_operand:V8DF 1 "memory_operand")) + (eq_attr "memory" "!none")))))) + "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu_1_3") + +(define_insn_reservation "c86_4g_m7_avx512_ssecvt_z" 4 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "c86_attr" "other") + (and (eq_attr "mode" "XI,V16SF,V8DF") + (eq_attr "memory" "none"))))) + "c86-4g-m7-double,c86-4g-m7-fpu_1_3") + +(define_insn_reservation "c86_4g_m7_avx512_ssecvt_z_load" 11 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "c86_attr" "other") + (and (eq_attr "mode" "XI,V16SF,V8DF") + (eq_attr "memory" "!none"))))) + "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu_1_3") + +(define_insn_reservation "c86_4g_m7_avx_ssecvt" 4 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "prefix" "!evex") + (and (eq_attr "mmx_isa" "base") + (eq_attr "memory" "none"))))) + "c86-4g-m7-direct,c86-4g-m7-fpu2|c86-4g-m7-fpu3") + +(define_insn_reservation "c86_4g_m7_avx_ssecvt_load" 11 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "prefix" "!evex") + (and (eq_attr "mmx_isa" "base") + (eq_attr "memory" "!none"))))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu2|c86-4g-m7-fpu3") + +;; CVTPI +(define_insn_reservation "c86_4g_m7_sse_ssecvt_pspi" 4 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "mode" "SF,DI") + (eq_attr "memory" "none")))) + "c86-4g-m7-direct,c86-4g-m7-fpu1") + +(define_insn_reservation "c86_4g_m7_sse_ssecvt_pspi_load" 11 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "mode" "SF,DI") + (eq_attr "memory" "load")))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu1") + +(define_insn_reservation "c86_4g_m7_sse_ssecvt_pi" 5 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssecvt") + (and (not (eq_attr "mode" "SF,DI")) + (and (eq_attr "mmx_isa" "native") + (eq_attr "memory" "none"))))) + "c86-4g-m7-double,c86-4g-m7-fpu1,c86-4g-m7-fpu_0_1") + +(define_insn_reservation "c86_4g_m7_sse_ssecvt_pi_load" 12 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssecvt") + (and (not (eq_attr "mode" "SF,DI")) + (and (eq_attr "mmx_isa" "native") + (eq_attr "memory" "load"))))) + "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu1,c86-4g-m7-fpu_0_1") + +;; SSEMULADD +(define_insn_reservation "c86_4g_m7_avx512_muladd" 4 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssemuladd") + (and (eq_attr "c86_attr" "other") + (eq_attr "memory" "none")))) + "c86-4g-m7-direct,c86-4g-m7-fpu_0_2") + +(define_insn_reservation "c86_4g_m7_avx512_muladd_load" 11 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssemuladd") + (and (eq_attr "c86_attr" "other") + (eq_attr "memory" "load")))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_2") + +(define_insn_reservation "c86_4g_m7_avx512_muladd_madd" 4 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssemuladd,sse") + (and (eq_attr "c86_attr" "madd,rcp") + (and (eq_attr "prefix" "evex") + (eq_attr "memory" "none"))))) + "c86-4g-m7-direct,c86-4g-m7-fpu_0_2") + +(define_insn_reservation "c86_4g_m7_avx512_muladd_madd_load" 11 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "ssemuladd,sse") + (and (eq_attr "c86_attr" "madd,rcp") + (and (eq_attr "prefix" "evex") + (eq_attr "memory" "load"))))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_2") + +;; SSE +(define_insn_reservation "c86_4g_m7_avx512_sse_range" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sse") + (and (eq_attr "c86_attr" "other") + (and (eq_attr "length_immediate" "!1") + (and (eq_attr "prefix" "evex") + (and (eq_attr "c86_decode" "direct") + (eq_attr "memory" "none"))))))) + "c86-4g-m7-direct,c86-4g-m7-fpu_0_2") + +(define_insn_reservation "c86_4g_m7_avx512_sse_range_load" 8 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sse") + (and (eq_attr "c86_attr" "other") + (and (eq_attr "length_immediate" "!1") + (and (eq_attr "c86_decode" "direct") + (and (eq_attr "prefix" "evex") + (eq_attr "memory" "load"))))))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_2") + +(define_insn_reservation "c86_4g_m7_avx512_sse_conflict_x" 2 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sse") + (and (eq_attr "c86_decode" "vector") + (and (eq_attr "mode" "TI") + (eq_attr "memory" "none"))))) + "c86-4g-m7-vector,c86-4g-m7-fpu_1_3x2") + +(define_insn_reservation "c86_4g_m7_avx512_sse_conflict_x_load" 9 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sse") + (and (eq_attr "c86_decode" "vector") + (and (eq_attr "mode" "TI") + (eq_attr "memory" "load"))))) + "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-fpu_1_3x2") + +(define_insn_reservation "c86_4g_m7_avx512_sse_conflict_y" 5 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sse") + (and (eq_attr "c86_decode" "vector") + (and (eq_attr "mode" "OI") + (eq_attr "memory" "none"))))) + "c86-4g-m7-vector,c86-4g-m7-fpu_1_3x3") + +(define_insn_reservation "c86_4g_m7_avx512_sse_conflict_y_load" 12 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sse") + (and (eq_attr "c86_decode" "vector") + (and (eq_attr "mode" "OI") + (eq_attr "memory" "load"))))) + "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-fpu_1_3x3") + +(define_insn_reservation "c86_4g_m7_avx512_sse_conflict_z" 8 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sse") + (and (eq_attr "c86_decode" "vector") + (and (eq_attr "mode" "XI") + (eq_attr "memory" "none"))))) + "c86-4g-m7-vector,c86-4g-m7-fpu_1_3x6") + +(define_insn_reservation "c86_4g_m7_avx512_sse_conflict_z_load" 15 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sse") + (and (eq_attr "c86_decode" "vector") + (and (eq_attr "mode" "XI") + (eq_attr "memory" "load"))))) + "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-fpu_1_3x6") + +(define_insn_reservation "c86_4g_m7_avx512_sse_class" 4 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sse") + (and (eq_attr "c86_attr" "other") + (and (eq_attr "length_immediate" "1") + (and (not (eq_attr "mode" "V32HF,V16SF,V8DF")) + (eq_attr "memory" "none")))))) + "c86-4g-m7-double,c86-4g-m7-fpu_1_3,c86-4g-m7-fpu_1_3") + +(define_insn_reservation "c86_4g_m7_avx512_sse_class_load" 11 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sse") + (and (eq_attr "c86_attr" "other") + (and (eq_attr "length_immediate" "1") + (and (not (eq_attr "mode" "V32HF,V16SF,V8DF")) + (eq_attr "memory" "load")))))) + "c86-4g-m7-double,c86-4g-m7-load,c86-4g-m7-fpu_1_3,c86-4g-m7-fpu_1_3") + +(define_insn_reservation "c86_4g_m7_avx512_sse_class_z" 4 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sse") + (and (eq_attr "c86_attr" "other") + (and (eq_attr "length_immediate" "1") + (and (eq_attr "mode" "V32HF,V16SF,V8DF") + (eq_attr "memory" "none")))))) + "c86-4g-m7-vector,c86-4g-m7-fpu_1_3,c86-4g-m7-fpu_1_3") + +(define_insn_reservation "c86_4g_m7_avx512_sse_class_z_load" 11 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sse") + (and (eq_attr "c86_attr" "other") + (and (eq_attr "length_immediate" "1") + (and (eq_attr "mode" "V32HF,V16SF,V8DF") + (eq_attr "memory" "load")))))) + "c86-4g-m7-vector,c86-4g-m7-load,c86-4g-m7-fpu_1_3,c86-4g-m7-fpu_1_3") + +(define_insn_reservation "c86_4g_m7_avx_sse" 5 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sse") + (and (eq_attr "c86_attr" "rcp,other") + (and (eq_attr "prefix" "!evex") + (eq_attr "memory" "none"))))) + "c86-4g-m7-direct,c86-4g-m7-fpu_0_1") + +(define_insn_reservation "c86_4g_m7_avx_sse_load" 12 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sse") + (and (eq_attr "c86_attr" "rcp,other") + (and (eq_attr "prefix" "!evex") + (eq_attr "memory" "load"))))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fpu_0_1") + +;; SSE SQRT +(define_insn_reservation "c86_4g_m7_avx512_sse_sqrt_sf_x" 14 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sse") + (and (eq_attr "mode" "SF,V4SF") + (and (eq_attr "c86_attr" "sqrt") + (eq_attr "memory" "none"))))) + "c86-4g-m7-direct,c86-4g-m7-fp1div1_fp3div3_x4x9") + +(define_insn_reservation "c86_4g_m7_avx512_sse_sqrt_sf_xload" 21 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sse") + (and (eq_attr "mode" "SF,V4SF") + (and (eq_attr "c86_attr" "sqrt") + (eq_attr "memory" "load"))))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fp1div1_fp3div3_x4x9") + +(define_insn_reservation "c86_4g_m7_avx512_sse_sqrt_sf_y" 14 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sse") + (and (eq_attr "mode" "V8SF") + (and (eq_attr "c86_attr" "sqrt") + (eq_attr "memory" "none"))))) + "c86-4g-m7-direct,c86-4g-m7-fp13div13x4,c86-4g-m7-fdiv13*9") + +(define_insn_reservation "c86_4g_m7_avx512_sse_sqrt_sf_yload" 21 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sse") + (and (eq_attr "mode" "V8SF") + (and (eq_attr "c86_attr" "sqrt") + (eq_attr "memory" "load"))))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fp13div13x4,c86-4g-m7-fdiv13*9") + +(define_insn_reservation "c86_4g_m7_avx512_sse_sqrt_sf_z" 26 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sse") + (and (eq_attr "mode" "V16SF") + (and (eq_attr "c86_attr" "sqrt") + (eq_attr "memory" "none"))))) + "c86-4g-m7-direct,c86-4g-m7-fp13div13x4,c86-4g-m7-fdiv13*22") + +(define_insn_reservation "c86_4g_m7_avx512_sse_sqrt_sf_zload" 33 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sse") + (and (eq_attr "mode" "V16SF") + (and (eq_attr "c86_attr" "sqrt") + (eq_attr "memory" "load"))))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fp13div13x4,c86-4g-m7-fdiv13*22") + +(define_insn_reservation "c86_4g_m7_avx512_sse_sqrt_df_x" 20 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sse") + (and (eq_attr "mode" "DF,V2DF") + (and (eq_attr "c86_attr" "sqrt") + (eq_attr "memory" "none"))))) + "c86-4g-m7-direct,c86-4g-m7-fp1div1_fp3div3_x4x15") + +(define_insn_reservation "c86_4g_m7_avx512_sse_sqrt_df_xload" 27 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sse") + (and (eq_attr "mode" "DF,V2DF") + (and (eq_attr "c86_attr" "sqrt") + (eq_attr "memory" "load"))))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fp1div1_fp3div3_x4x15") + +(define_insn_reservation "c86_4g_m7_avx512_sse_sqrt_df_y" 20 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sse") + (and (eq_attr "mode" "V4DF") + (and (eq_attr "c86_attr" "sqrt") + (eq_attr "memory" "none"))))) + "c86-4g-m7-direct,c86-4g-m7-fp13div13x4,c86-4g-m7-fdiv13*15") + +(define_insn_reservation "c86_4g_m7_avx512_sse_sqrt_df_yload" 27 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sse") + (and (eq_attr "mode" "V4DF") + (and (eq_attr "c86_attr" "sqrt") + (eq_attr "memory" "load"))))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fp13div13x4,c86-4g-m7-fdiv13*15") + +(define_insn_reservation "c86_4g_m7_avx512_sse_sqrt_df_z" 38 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sse") + (and (eq_attr "mode" "V8DF") + (and (eq_attr "c86_attr" "sqrt") + (eq_attr "memory" "none"))))) + "c86-4g-m7-direct,c86-4g-m7-fp13div13x4,c86-4g-m7-fdiv13*34") + +(define_insn_reservation "c86_4g_m7_avx512_sse_sqrt_df_zload" 45 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "sse") + (and (eq_attr "mode" "V8DF") + (and (eq_attr "c86_attr" "sqrt") + (eq_attr "memory" "load"))))) + "c86-4g-m7-direct,c86-4g-m7-load,c86-4g-m7-fp13div13x4,c86-4g-m7-fdiv13*34") + +;; MSKLOG/MSKMOV +(define_insn_reservation "c86_4g_m7_avx512_msklog" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "msklog") + (eq_attr "c86_decode" "direct"))) + "c86-4g-m7-direct,c86-4g-m7-fpu_1_3") + +(define_insn_reservation "c86_4g_m7_avx512_msklog_vector" 4 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "msklog") + (eq_attr "c86_decode" "vector"))) + "c86-4g-m7-vector,c86-4g-m7-fpu_1_3") + +(define_insn_reservation "c86_4g_m7_avx512_mskmov_reg_k" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "mskmov") + (and (match_operand 0 "register_operand" "r") + (eq_attr "memory" "none")))) + "c86-4g-m7-double,c86-4g-m7-fpu3,c86-4g-m7-fpu_1_3") + +(define_insn_reservation "c86_4g_m7_avx512_mskmov_xy_k" 2 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "mskmov") + (ior (match_operand:V2DI 0 "register_operand" "v") + (match_operand:V4DI 0 "register_operand" "v")))) + "c86-4g-m7-double,c86-4g-m7-fpu3,c86-4g-m7-fpu_1_3") + +(define_insn_reservation "c86_4g_m7_avx512_mskmov_z_k" 3 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "mskmov") + (match_operand:V8DI 0 "register_operand" "v"))) + "c86-4g-m7-vector,c86-4g-m7-fpu3,c86-4g-m7-fpu_1_3") + +(define_insn_reservation "c86_4g_m7_avx512_mskmov_k_k" 1 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "mskmov") + (and (match_operand 0 "register_operand" "k") + (match_operand 1 "register_operand" "k")))) + "c86-4g-m7-direct,c86-4g-m7-fpu_1_3") + +(define_insn_reservation "c86_4g_m7_avx512_mskmov_k_reg" 3 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "mskmov") + (and (match_operand 0 "register_operand" "k") + (match_operand 1 "register_operand" "r")))) + "c86-4g-m7-double,c86-4g-m7-fpu1,c86-4g-m7-fpu_1_3") + +(define_insn_reservation "c86_4g_m7_avx512_mskmov_k_m" 8 + (and (eq_attr "cpu" "c86_4g_m7") + (and (eq_attr "type" "mskmov") + (and (match_operand 0 "register_operand" "k") + (match_operand 1 "memory_operand")))) + "c86-4g-m7-direct,c86-4g-m7-load") diff --git a/gcc/config/i386/c86-4g.md b/gcc/config/i386/c86-4g.md new file mode 100644 index 0000000000000000000000000000000000000000..8b81fcaabb28571106ab6972f7d3b1a0e7675d6c --- /dev/null +++ b/gcc/config/i386/c86-4g.md @@ -0,0 +1,1214 @@ +;; Copyright (C) 2026 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . +;; + + +(define_attr "c86_decode" "direct,vector,double" + (const_string "direct")) + +(define_attr "c86_attr" "other,abs,sqrt,maxmin,blend,blendv,rcp,movnt,avg, + sign,sadbw,insr,perm2,perm,insertx,shufx,madd, + compress,sselogic,cmpestr,extr,vpmovx,expand,aes, + hplus,ptest" + (const_string "other")) + +;; HYGON Scheduling +;; Modeling automatons for decoders, integer execution pipes, +;; AGU pipes, floating point execution units, integer and +;; floating point dividers. Split fp1 into its own automaton +;; to keep this unit independent without increasing the main +;; c86_4g_fp state space. +(define_automaton "c86_4g, c86_4g_ieu, c86_4g_fp024, c86_4g_fp1, c86_4g_agu, c86_4g_idiv, c86_4g_fdiv") + +;; Decoders unit has 4 decoders and all of them can decode fast path +;; and vector type instructions. +(define_cpu_unit "c86-4g-decode0" "c86_4g") +(define_cpu_unit "c86-4g-decode1" "c86_4g") +(define_cpu_unit "c86-4g-decode2" "c86_4g") +(define_cpu_unit "c86-4g-decode3" "c86_4g") + +;; Currently blocking all decoders for vector path instructions as +;; they are dispatched separetely as microcode sequence. +;; Fix me: Need to revisit this. +(define_reservation "c86-4g-vector" "c86-4g-decode0+c86-4g-decode1+c86-4g-decode2+c86-4g-decode3") + +;; Direct instructions can be issued to any of the four decoders. +(define_reservation "c86-4g-direct" "c86-4g-decode0|c86-4g-decode1|c86-4g-decode2|c86-4g-decode3") + +;; Fix me: Need to revisit this later to simulate fast path double behavior. +(define_reservation "c86-4g-double" "c86-4g-direct") + +;; Integer unit 4 ALU pipes. +(define_cpu_unit "c86-4g-ieu0" "c86_4g_ieu") +(define_cpu_unit "c86-4g-ieu1" "c86_4g_ieu") +(define_cpu_unit "c86-4g-ieu2" "c86_4g_ieu") +(define_cpu_unit "c86-4g-ieu3" "c86_4g_ieu") +(define_reservation "c86-4g-ieu" "c86-4g-ieu0|c86-4g-ieu1|c86-4g-ieu2|c86-4g-ieu3") + +;; One separated integer divider. +(define_cpu_unit "c86-4g-idiv" "c86_4g_idiv") + +;; 2 AGU pipes in c86_4g +;; According to CPU diagram last AGU unit is used only for stores. +(define_cpu_unit "c86-4g-agu0" "c86_4g_agu") +(define_cpu_unit "c86-4g-agu1" "c86_4g_agu") +(define_reservation "c86-4g-agu-reserve" "c86-4g-agu0|c86-4g-agu1") + +;; Load is 4 cycles. We do not model reservation of load unit. +;;(define_reservation "c86-4g-load" "c86-4g-agu-reserve, nothing, nothing, nothing") +(define_reservation "c86-4g-load" "c86-4g-agu-reserve") +(define_reservation "c86-4g-store" "c86-4g-agu-reserve") + +;; vectorpath (microcoded) instructions are single issue instructions. +;; So, they occupy all the integer units. +(define_reservation "c86-4g-ivector" "c86-4g-ieu0+c86-4g-ieu1 + +c86-4g-ieu2+c86-4g-ieu3 + +c86-4g-agu0+c86-4g-agu1") + +;; Floating point unit 4 FP pipes. +(define_cpu_unit "c86-4g-fp0" "c86_4g_fp024") +(define_cpu_unit "c86-4g-fp1" "c86_4g_fp1") +(define_cpu_unit "c86-4g-fp2" "c86_4g_fp024") +(define_cpu_unit "c86-4g-fp3" "c86_4g_fp024") + +(define_reservation "c86-4g-fpu" "c86-4g-fp0|c86-4g-fp1|c86-4g-fp2|c86-4g-fp3") + +(define_reservation "c86-4g-fvector" "c86-4g-fp0+c86-4g-fp1 + +c86-4g-fp2+c86-4g-fp3 + +c86-4g-agu0+c86-4g-agu1") + +;; One separated FP divider. +(define_cpu_unit "c86-4g-fdiv" "c86_4g_fdiv") + +(define_reservation "c86-4g-fp1fdivx4" "(c86-4g-fp1+c86-4g-fdiv)*4") + +;; Call instruction +(define_insn_reservation "c86_4g_call" 1 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "type" "call,callv")) + "c86-4g-double,c86-4g-store,c86-4g-ieu0+c86-4g-ieu3") + +;; General instructions +(define_insn_reservation "c86_4g_push" 1 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "push") + (eq_attr "memory" "store"))) + "c86-4g-direct,c86-4g-store") + +(define_insn_reservation "c86_4g_push_load" 4 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "push") + (eq_attr "memory" "both"))) + "c86-4g-direct,c86-4g-load+c86-4g-store") + +(define_insn_reservation "c86_4g_pop" 4 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "pop") + (eq_attr "memory" "load"))) + "c86-4g-direct,c86-4g-load") + +(define_insn_reservation "c86_4g_pop_mem" 4 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "pop") + (eq_attr "memory" "both"))) + "c86-4g-direct,c86-4g-load,c86-4g-store") + +;; Leave +(define_insn_reservation "c86_4g_leave" 1 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "type" "leave")) + "c86-4g-double,c86-4g-ieu+c86-4g-store") + +;; Integer Instructions or General instructions +;; Multiplications +;; Reg operands +(define_insn_reservation "c86_4g_imul" 3 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "imul") + (eq_attr "memory" "none"))) + "c86-4g-direct,c86-4g-ieu1") + +(define_insn_reservation "c86_4g_imul_mem" 7 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "imul") + (eq_attr "memory" "!none"))) + "c86-4g-direct,c86-4g-load, c86-4g-ieu1") + +;; Divisions +;; Reg operands +(define_insn_reservation "c86_4g_idiv_DI" 41 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "DI") + (eq_attr "memory" "none")))) + "c86-4g-double,c86-4g-ieu2,c86-4g-idiv*41") + +(define_insn_reservation "c86_4g_idiv_SI" 25 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "SI") + (eq_attr "memory" "none")))) + "c86-4g-double,c86-4g-ieu2,c86-4g-idiv*25") + +(define_insn_reservation "c86_4g_idiv_HI" 17 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "HI") + (eq_attr "memory" "none")))) + "c86-4g-double,c86-4g-ieu2,c86-4g-idiv*17") + +(define_insn_reservation "c86_4g_idiv_QI" 15 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "QI") + (eq_attr "memory" "none")))) + "c86-4g-direct,c86-4g-ieu2,c86-4g-idiv*15") + +;; Mem operands +(define_insn_reservation "c86_4g_idiv_mem_DI" 45 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "DI") + (eq_attr "memory" "load")))) + "c86-4g-double,c86-4g-load,c86-4g-ieu2,c86-4g-idiv*41") + +(define_insn_reservation "c86_4g_idiv_mem_SI" 29 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "SI") + (eq_attr "memory" "load")))) + "c86-4g-double,c86-4g-load,c86-4g-ieu2,c86-4g-idiv*25") + +(define_insn_reservation "c86_4g_idiv_mem_HI" 21 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "HI") + (eq_attr "memory" "load")))) + "c86-4g-double,c86-4g-load,c86-4g-ieu2,c86-4g-idiv*17") + +(define_insn_reservation "c86_4g_idiv_mem_QI" 19 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "QI") + (eq_attr "memory" "load")))) + "c86-4g-direct,c86-4g-load,c86-4g-ieu2,c86-4g-idiv*15") + +;; STR ISHIFT which are micro coded. +;; Fix me: Latency need to be rechecked. +(define_insn_reservation "c86_4g_str_ishift" 6 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "str,ishift") + (eq_attr "memory" "both,store"))) + "c86-4g-vector,c86-4g-ivector") + +;; MOV - integer moves +(define_insn_reservation "c86_4g_load_imov_double" 2 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "c86_decode" "double") + (and (eq_attr "type" "imovx") + (eq_attr "memory" "none")))) + "c86-4g-double,c86-4g-ieu") + +(define_insn_reservation "c86_4g_load_imov_direct" 1 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "c86_decode" "!double") + (and (eq_attr "type" "imov,imovx") + (eq_attr "memory" "none")))) + "c86-4g-direct,c86-4g-ieu") + +(define_insn_reservation "c86_4g_load_imov_double_store" 2 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "c86_decode" "double") + (and (eq_attr "type" "imovx") + (eq_attr "memory" "store")))) + "c86-4g-double,c86-4g-ieu,c86-4g-store") + +(define_insn_reservation "c86_4g_load_imov_direct_store" 1 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "c86_decode" "!double") + (and (eq_attr "type" "imov,imovx") + (eq_attr "memory" "store")))) + "c86-4g-direct,c86-4g-ieu,c86-4g-store") + +(define_insn_reservation "c86_4g_load_imov_double_load" 5 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "c86_decode" "double") + (and (eq_attr "type" "imovx") + (eq_attr "memory" "load")))) + "c86-4g-double,c86-4g-load,c86-4g-ieu") + +(define_insn_reservation "c86_4g_load_imov_direct_load" 4 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "c86_decode" "!double") + (and (eq_attr "type" "imov,imovx") + (eq_attr "memory" "load")))) + "c86-4g-direct,c86-4g-load") + +;; INTEGER/GENERAL instructions +;; register/imm operands only: ALU, ICMP, NEG, NOT, ROTATE, ISHIFT, TEST +(define_insn_reservation "c86_4g_insn" 1 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "alu,icmp,negnot,rotate,rotate1,ishift,ishift1,test,setcc,incdec,icmov") + (eq_attr "memory" "none,unknown"))) + "c86-4g-direct,c86-4g-ieu") + +(define_insn_reservation "c86_4g_insn_load" 5 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "alu,icmp,negnot,rotate,rotate1,ishift,ishift1,test,setcc,incdec,icmov") + (eq_attr "memory" "load"))) + "c86-4g-direct,c86-4g-load,c86-4g-ieu") + +;; FIXME: The instructions matched here has only two operands, which means memory type can only be none, load or both. +;; Store memory type handling should never take effect here? +(define_insn_reservation "c86_4g_insn_store" 1 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "alu,icmp,negnot,rotate,rotate1,ishift1,test,setcc,incdec") + (eq_attr "memory" "store"))) + "c86-4g-direct,c86-4g-ieu,c86-4g-store") + +(define_insn_reservation "c86_4g_insn_both" 5 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "alu,icmp,negnot,rotate,rotate1,ishift1,test,setcc,incdec") + (eq_attr "memory" "both"))) + "c86-4g-direct,c86-4g-load,c86-4g-ieu,c86-4g-store") + +;; Special latency for multi type. +(define_insn_reservation "c86_4g_fp_fcomp" 1 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "memory" "none") + (and (eq_attr "unit" "i387") + (eq_attr "type" "multi")))) + "c86-4g-double,c86-4g-fp0|c86-4g-fp2") + +;; Fix me: Other vector type insns keeping latency 6 as of now. +(define_insn_reservation "c86_4g_ieu_vector" 6 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "memory" "none") + (and (eq_attr "unit" "!i387") + (eq_attr "type" "other,str,multi")))) + "c86-4g-vector,c86-4g-ivector") + +;; ALU1 register operands. +(define_insn_reservation "c86_4g_alu1_vector" 3 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "c86_decode" "vector") + (and (eq_attr "type" "alu1") + (eq_attr "memory" "none,unknown")))) + "c86-4g-vector,c86-4g-ivector") + +(define_insn_reservation "c86_4g_alu1_double" 2 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "c86_decode" "double") + (and (eq_attr "type" "alu1") + (eq_attr "memory" "none,unknown")))) + "c86-4g-double,c86-4g-ieu") + +(define_insn_reservation "c86_4g_alu1_direct" 1 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "c86_decode" "direct") + (and (eq_attr "type" "alu1") + (eq_attr "memory" "none,unknown")))) + "c86-4g-direct,c86-4g-ieu") + +;; Branches : Fix me need to model conditional branches. +(define_insn_reservation "c86_4g_branch" 1 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "ibr") + (eq_attr "memory" "none"))) + "c86-4g-direct") + +;; Indirect branches check latencies. +(define_insn_reservation "c86_4g_indirect_branch_mem" 6 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "ibr") + (eq_attr "memory" "load"))) + "c86-4g-vector,c86-4g-ivector") + +;; LEA executes in ALU units with 1 cycle latency. +(define_insn_reservation "c86_4g_lea" 1 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "type" "lea")) + "c86-4g-direct,c86-4g-ieu") + +;; Floating point +(define_insn_reservation "c86_4g_fp_cmov" 6 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "type" "fcmov")) + "c86-4g-vector,c86-4g-fvector") + + +(define_insn_reservation "c86_4g_fp_mov_direct_load" 8 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "c86_decode" "direct") + (and (eq_attr "type" "fmov") + (eq_attr "memory" "load")))) + "c86-4g-direct,c86-4g-load,c86-4g-fp1") + +(define_insn_reservation "c86_4g_fp_mov_direct_store" 5 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "c86_decode" "direct") + (and (eq_attr "type" "fmov") + (eq_attr "memory" "store")))) + "c86-4g-direct,c86-4g-fp2|c86-4g-fp3,c86-4g-store") + +(define_insn_reservation "c86_4g_fp_mov_double" 4 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "c86_decode" "double") + (and (eq_attr "type" "fmov") + (eq_attr "memory" "none")))) + "c86-4g-double,c86-4g-fp1") + +(define_insn_reservation "c86_4g_fp_mov_double_load" 12 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "c86_decode" "double") + (and (eq_attr "type" "fmov") + (eq_attr "memory" "load")))) + "c86-4g-double,c86-4g-load,c86-4g-fp1") + +(define_insn_reservation "c86_4g_fp_mov_direct" 1 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "c86_decode" "direct") + (and (eq_attr "type" "fmov") + (eq_attr "memory" "none")))) + "c86-4g-direct,c86-4g-fp1") + +;; SQRT +(define_insn_reservation "c86_4g_fp_sqrt" 22 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "fpspc") + (eq_attr "c86_attr" "sqrt"))) + "c86-4g-direct,c86-4g-fp1fdivx4,c86-4g-fdiv*18") + +(define_insn_reservation "c86_4g_sse_sqrt_sf" 14 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "mode" "SF,V4SF,V8SF") + (and (eq_attr "memory" "none,unknown") + (and (eq_attr "c86_attr" "sqrt") + (eq_attr "type" "sse"))))) + "c86-4g-direct,c86-4g-fp1fdivx4,c86-4g-fdiv*10") + +(define_insn_reservation "c86_4g_sse_sqrt_sf_mem" 21 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "mode" "SF,V4SF,V8SF") + (and (eq_attr "memory" "load") + (and (eq_attr "c86_attr" "sqrt") + (eq_attr "type" "sse"))))) + "c86-4g-direct,c86-4g-load,c86-4g-fp1fdivx4,c86-4g-fdiv*10") + +(define_insn_reservation "c86_4g_sse_sqrt_df" 20 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "mode" "DF,V2DF,V4DF") + (and (eq_attr "memory" "none,unknown") + (and (eq_attr "c86_attr" "sqrt") + (eq_attr "type" "sse"))))) + "c86-4g-direct,c86-4g-fp1fdivx4,c86-4g-fdiv*16") + +(define_insn_reservation "c86_4g_sse_sqrt_df_mem" 27 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "mode" "DF,V2DF,V4DF") + (and (eq_attr "memory" "load") + (and (eq_attr "c86_attr" "sqrt") + (eq_attr "type" "sse"))))) + "c86-4g-direct,c86-4g-load,c86-4g-fp1fdivx4,c86-4g-fdiv*16") + +;; RCP +(define_insn_reservation "c86_4g_sse_rcp" 5 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "mode" "V4SF,V8SF,SF") + (and (eq_attr "memory" "none") + (and (eq_attr "c86_attr" "rcp") + (eq_attr "type" "sse"))))) + "c86-4g-direct,c86-4g-fp0|c86-4g-fp2") + +(define_insn_reservation "c86_4g_sse_rcp_mem" 12 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "mode" "V4SF,V8SF,SF") + (and (eq_attr "memory" "load") + (and (eq_attr "c86_attr" "rcp") + (eq_attr "type" "sse"))))) + "c86-4g-direct,c86-4g-load,c86-4g-fp0|c86-4g-fp2") + +;; TODO: AGU? +(define_insn_reservation "c86_4g_fp_spc_direct" 5 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "c86_decode" "direct") + (and (eq_attr "type" "fpspc") + (eq_attr "memory" "store")))) + "c86-4g-direct,c86-4g-fp3") + +;; FABS +(define_insn_reservation "c86_4g_fp_absneg" 1 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "type" "fsgn")) + "c86-4g-direct,c86-4g-fp1|c86-4g-fp3") + +;; FCMP +(define_insn_reservation "c86_4g_fp_fcmp" 5 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "memory" "none") + (and (eq_attr "c86_decode" "double") + (eq_attr "type" "fcmp")))) + "c86-4g-double,c86-4g-fp0,c86-4g-fp1") + +(define_insn_reservation "c86_4g_fp_fcmp_load" 12 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "memory" "load") + (and (eq_attr "c86_decode" "double") + (eq_attr "type" "fcmp")))) + "c86-4g-double,c86-4g-load, c86-4g-fp0,c86-4g-fp1") + +;;FADD FSUB FMUL +(define_insn_reservation "c86_4g_fp_op_mul" 5 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "fop,fmul") + (eq_attr "memory" "none"))) + "c86-4g-direct,c86-4g-fp0|c86-4g-fp2") + +(define_insn_reservation "c86_4g_fp_op_mul_load" 12 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "fop,fmul") + (and (eq_attr "fp_int_src" "false") + (eq_attr "memory" "load")))) + "c86-4g-direct,c86-4g-load,c86-4g-fp0|c86-4g-fp2") + +(define_insn_reservation "c86_4g_fp_op_imul_load" 16 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "fop,fmul") + (and (eq_attr "fp_int_src" "true") + (eq_attr "memory" "load")))) + "c86-4g-double,c86-4g-load,c86-4g-fp0,c86-4g-fp0|c86-4g-fp2") + +(define_insn_reservation "c86_4g_fp_op_div" 15 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "fdiv") + (eq_attr "memory" "none"))) + "c86-4g-direct,c86-4g-fp1fdivx4,c86-4g-fdiv*11") + +(define_insn_reservation "c86_4g_fp_op_div_load" 22 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "fdiv") + (eq_attr "memory" "load"))) + "c86-4g-direct,c86-4g-load,c86-4g-fp1fdivx4,c86-4g-fdiv*11") + +(define_insn_reservation "c86_4g_fp_op_idiv_load" 26 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "fdiv") + (and (eq_attr "fp_int_src" "true") + (eq_attr "memory" "load")))) + "c86-4g-double,c86-4g-load,c86-4g-fp1*4,c86-4g-fp1fdivx4,c86-4g-fdiv*11") + +;; MMX, SSE, SSEn.n, AVX, AVX2 instructions +(define_insn_reservation "c86_4g_fp_insn" 1 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "type" "mmx")) + "c86-4g-direct,c86-4g-fpu") + +(define_insn_reservation "c86_4g_mmx_add" 1 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "mmxadd") + (eq_attr "memory" "none"))) + "c86-4g-direct,c86-4g-fp0|c86-4g-fp1|c86-4g-fp3") + +(define_insn_reservation "c86_4g_mmx_add_load" 8 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "mmxadd") + (eq_attr "memory" "load"))) + "c86-4g-direct,c86-4g-load,c86-4g-fp0|c86-4g-fp1|c86-4g-fp3") + +(define_insn_reservation "c86_4g_mmx_hadd" 3 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "sseadd1") + (eq_attr "memory" "none"))) + "c86-4g-direct,c86-4g-fp0") + +(define_insn_reservation "c86_4g_mmx_hadd_load" 10 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "sseadd1") + (eq_attr "memory" "load"))) + "c86-4g-direct,c86-4g-load,c86-4g-fp0") + +(define_insn_reservation "c86_4g_mmx_cmp" 1 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "mmxcmp") + (eq_attr "memory" "none"))) + "c86-4g-direct,c86-4g-fp0|c86-4g-fp3") + +(define_insn_reservation "c86_4g_mmx_cmp_load" 8 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "mmxcmp") + (eq_attr "memory" "load"))) + "c86-4g-direct,c86-4g-load,c86-4g-fp0|c86-4g-fp3") + +(define_insn_reservation "c86_4g_mmx_cvt_pck_shuf" 1 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "mmxcvt,sseshuf,sseshuf1") + (eq_attr "memory" "none"))) + "c86-4g-direct,c86-4g-fp0|c86-4g-fp1") + +(define_insn_reservation "c86_4g_mmx_cvt_pck_shuf_load" 8 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "mmxcvt,sseshuf,sseshuf1") + (eq_attr "memory" "load"))) + "c86-4g-direct,c86-4g-load,c86-4g-fp0|c86-4g-fp1") + +(define_insn_reservation "c86_4g_mmx_shift" 1 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "mmxshft") + (eq_attr "memory" "none"))) + "c86-4g-direct,c86-4g-fp1") + +(define_insn_reservation "c86_4g_mmx_move" 4 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "mmxmov") + (eq_attr "memory" "none"))) + "c86-4g-direct,c86-4g-fp1") + +(define_insn_reservation "c86_4g_mmx_shift_load" 8 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "mmxshft") + (eq_attr "memory" "load"))) + "c86-4g-direct,c86-4g-load,c86-4g-fp1") + +(define_insn_reservation "c86_4g_mmx_move_load" 11 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "mmxshft") + (eq_attr "memory" "load"))) + "c86-4g-direct,c86-4g-load,c86-4g-fp1") + +(define_insn_reservation "c86_4g_mmx_move_store" 1 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "mmxshft,mmxmov") + (eq_attr "memory" "store,both"))) + "c86-4g-direct,c86-4g-fp2,c86-4g-store") + +(define_insn_reservation "c86_4g_mmx_mul" 3 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "mmxmul") + (eq_attr "memory" "none"))) + "c86-4g-direct,c86-4g-fp0") + +(define_insn_reservation "c86_4g_mmx_mul_load" 10 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "mmxmul") + (eq_attr "memory" "load"))) + "c86-4g-direct,c86-4g-load,c86-4g-fp0") + +;; sseabs +(define_insn_reservation "c86_4g_sse_abs" 1 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "c86_attr" "abs") + (and (eq_attr "type" "sselog1") + (eq_attr "memory" "none")))) + "c86-4g-double,c86-4g-fpu") + +(define_insn_reservation "c86_4g_sse_pinsr_reg" 3 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "sselog") + (and (eq_attr "c86_attr" "insr") + (and (match_operand 2 "register_operand") + (eq_attr "memory" "none"))))) + "c86-4g-direct,c86-4g-ieu2,c86-4g-fp0|c86-4g-fp1") + +(define_insn_reservation "c86_4g_sse_pinsr" 1 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "sselog") + (and (eq_attr "c86_attr" "insr") + (and (not (match_operand 2 "register_operand")) + (eq_attr "memory" "none"))))) + "c86-4g-direct,c86-4g-fp0|c86-4g-fp1") + +(define_insn_reservation "c86_4g_sse_log" 1 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "sselog") + (and (eq_attr "c86_attr" "other") + (eq_attr "memory" "none")))) + "c86-4g-direct,c86-4g-fp0|c86-4g-fp1") + +(define_insn_reservation "c86_4g_sse_log_load" 8 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "sselog") + (and (eq_attr "c86_attr" "other") + (eq_attr "memory" "load")))) + "c86-4g-direct,c86-4g-load,c86-4g-fp0|c86-4g-fp1") + +(define_insn_reservation "c86_4g_sse_sign" 1 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "sselog1") + (and (eq_attr "c86_attr" "sign") + (eq_attr "memory" "none")))) + "c86-4g-direct,c86-4g-fpu") + +(define_insn_reservation "c86_4g_sse_sign_load" 8 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "sselog1") + (and (eq_attr "c86_attr" "sign") + (eq_attr "memory" "!none")))) + "c86-4g-direct,c86-4g-load,c86-4g-fpu") + + +(define_insn_reservation "c86_4g_sse_log1" 1 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "sselog1") + (and (eq_attr "c86_attr" "other") + (eq_attr "memory" "none")))) + "c86-4g-direct,c86-4g-fp0|c86-4g-fp1") + +(define_insn_reservation "c86_4g_sse_log1_load" 8 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "sselog1") + (and (eq_attr "c86_attr" "other") + (eq_attr "memory" "!none")))) + "c86-4g-direct,c86-4g-load,c86-4g-fp0|c86-4g-fp1") + +(define_insn_reservation "c86_4g_sse_extrq" 5 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "sse") + (and (eq_attr "memory" "none") + (eq_attr "prefix_data16" "1")))) + "c86-4g-direct,c86-4g-fp0|c86-4g-fp1,c86-4g-fp1") + +(define_insn_reservation "c86_4g_sse_movsdup" 1 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "sse") + (and (eq_attr "memory" "none") + (eq_attr "prefix" "vex")))) + "c86-4g-direct,c86-4g-fp0|c86-4g-fp1") + +(define_insn_reservation "c86_4g_sse_alignr" 1 + (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "prefix_extra" "1")) + (and (eq_attr "type" "sseishft") + (eq_attr "memory" "none"))) + "c86-4g-direct,c86-4g-fp0|c86-4g-fp1") + +(define_insn_reservation "c86_4g_sse_ishift" 1 + (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "prefix_extra" "!1")) + (and (eq_attr "type" "sseishft") + (eq_attr "memory" "none"))) + "c86-4g-direct,c86-4g-fp1") + +(define_insn_reservation "c86_4g_sse_ishift_load" 8 + (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "prefix_extra" "!1")) + (and (eq_attr "type" "sseishft") + (eq_attr "memory" "!none"))) + "c86-4g-direct,c86-4g-load,c86-4g-fp1") + +(define_insn_reservation "c86_4g_sse_insertimm" 3 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "sseins") + (and (eq_attr "memory" "none") + (eq_attr "length_immediate" "2")))) + "c86-4g-direct,c86-4g-fp0") + +(define_insn_reservation "c86_4g_sse_insert" 4 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "sseins") + (and (eq_attr "memory" "none") + (eq_attr "length_immediate" "!2")))) + "c86-4g-direct,c86-4g-fpu,c86-4g-fp0") + +(define_insn_reservation "c86_4g_sse_comi" 5 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "mode" "SF,DF,V4SF,V2DF") + (and (eq_attr "prefix" "!vex") + (and (eq_attr "prefix_extra" "0") + (and (eq_attr "type" "ssecomi") + (eq_attr "memory" "none")))))) + "c86-4g-direct,c86-4g-fp0|c86-4g-fp2,c86-4g-fp1") + +(define_insn_reservation "c86_4g_sse_comi_load" 12 + (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "mode" "SF,DF,V4SF,V2DF")) + (and (eq_attr "prefix_extra" "0") + (and (eq_attr "type" "ssecomi") + (eq_attr "memory" "load")))) + "c86-4g-direct,c86-4g-load,c86-4g-fp0|c86-4g-fp2,c86-4g-fp1") + +(define_insn_reservation "c86_4g_sse_comi_double" 2 + (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "mode" "V4SF,V2DF,TI")) + (and (eq_attr "prefix" "vex") + (and (eq_attr "prefix_extra" "0") + (and (eq_attr "type" "ssecomi") + (eq_attr "memory" "none"))))) + "c86-4g-double,c86-4g-fp0|c86-4g-fp1") + +(define_insn_reservation "c86_4g_sse_comi_double_load" 10 + (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "mode" "V4SF,V2DF,TI")) + (and (eq_attr "prefix" "vex") + (and (eq_attr "prefix_extra" "0") + (and (eq_attr "type" "ssecomi") + (eq_attr "memory" "load"))))) + "c86-4g-double,c86-4g-load,c86-4g-fp0|c86-4g-fp1") + +(define_insn_reservation "c86_4g_sse_test" 4 + (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")) + (and (eq_attr "prefix_extra" "1") + (and (eq_attr "type" "ssecomi") + (eq_attr "memory" "none")))) + "c86-4g-direct,c86-4g-fp0|c86-4g-fp1,c86-4g-fp1") + +(define_insn_reservation "c86_4g_sse_test_load" 11 + (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")) + (and (eq_attr "prefix_extra" "1") + (and (eq_attr "type" "ssecomi") + (eq_attr "memory" "load")))) + "c86-4g-direct,c86-4g-load,c86-4g-fp0|c86-4g-fp1,c86-4g-fp1") + +(define_insn_reservation "c86_4g_avx256_test" 8 + (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "mode" "V8SF,V4DF,OI")) + (and (eq_attr "prefix_extra" "1") + (and (eq_attr "type" "ssecomi") + (eq_attr "memory" "none")))) + "c86-4g-direct,c86-4g-fp0|c86-4g-fp1,c86-4g-fp1") + +(define_insn_reservation "c86_4g_avx256_test_load" 15 + (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "mode" "V8SF,V4DF,OI")) + (and (eq_attr "prefix_extra" "1") + (and (eq_attr "type" "ssecomi") + (eq_attr "memory" "load")))) + "c86-4g-direct,c86-4g-load,c86-4g-fp0|c86-4g-fp1,c86-4g-fp1") + +;; SSE moves +;; Fix me: Need to revist this again some of the moves may be restricted +;; to some fpu pipes. + +;; movnt doesn't touch cache, so latency modeling has little impact. +(define_insn_reservation "c86_4g_sse_movnt_load" 8 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "c86_attr" "movnt") + (and (eq_attr "type" "ssemov,mmxmov,ssecvt") + (eq_attr "memory" "load")))) + "c86-4g-direct,c86-4g-load") + +(define_insn_reservation "c86_4g_sse_movnt_store" 8 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "c86_attr" "movnt") + (and (eq_attr "type" "ssemov,mmxmov,ssecvt") + (eq_attr "memory" "store")))) + "c86-4g-direct,c86-4g-fp1") + +(define_insn_reservation "c86_4g_sse_mov" 2 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "mode" "SI") + (and (eq_attr "isa" "avx") + (and (eq_attr "type" "ssemov") + (eq_attr "memory" "none"))))) + "c86-4g-direct,c86-4g-ieu0") + +(define_insn_reservation "c86_4g_avx_mov" 2 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "mode" "TI") + (and (eq_attr "isa" "avx") + (and (eq_attr "type" "ssemov") + (and (match_operand:SI 1 "register_operand") + (eq_attr "memory" "none")))))) + "c86-4g-direct,c86-4g-ieu2") + +(define_insn_reservation "c86_4g_sseavx_mov" 1 + (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")) + (and (eq_attr "prefix_extra" "0") + (and (eq_attr "type" "ssemov") + (eq_attr "memory" "none")))) + "c86-4g-direct,c86-4g-fpu") + +(define_insn_reservation "c86_4g_sseavx_blend" 1 + (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "mode" "SF,DF,V4SF,V2DF")) + (and (eq_attr "type" "ssemov,sselog1") + (and (eq_attr "c86_attr" "blend,blendv") + (eq_attr "memory" "none")))) + "c86-4g-direct,c86-4g-fp0|c86-4g-fp2") + +(define_insn_reservation "c86_4g_sseavx_mov_store" 1 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "mode" "SF,DF,V4SF,V2DF,TI") + (and (eq_attr "type" "ssemov") + (eq_attr "memory" "store")))) + "c86-4g-direct,c86-4g-fpu,c86-4g-store") + +(define_insn_reservation "c86_4g_sseavx_mov_load" 8 + (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")) + (and (eq_attr "type" "ssemov") + (eq_attr "memory" "load"))) + "c86-4g-direct,c86-4g-load,c86-4g-fpu") + +(define_insn_reservation "c86_4g_avx256_mov" 1 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "mode" "V8SF,V4DF,OI") + (and (eq_attr "type" "ssemov") + (eq_attr "memory" "none")))) + "c86-4g-double,c86-4g-fpu") + +(define_insn_reservation "c86_4g_avx256_mov_store" 1 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "mode" "V8SF,V4DF,OI") + (and (eq_attr "type" "ssemov") + (eq_attr "memory" "store")))) + "c86-4g-double,c86-4g-fpu,c86-4g-store") + +(define_insn_reservation "c86_4g_avx256_mov_load" 8 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "mode" "V8SF,V4DF,OI") + (and (eq_attr "type" "ssemov") + (eq_attr "memory" "load")))) + "c86-4g-double,c86-4g-load,c86-4g-fpu") + +;; SSE max & min +(define_insn_reservation "c86_4g_sse_maxmin" 1 + (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "mode" "SF,DF,V4SF,V8SF,V2DF,V4DF,TI")) + (and (eq_attr "type" "sseadd") + (and (eq_attr "memory" "none") + (eq_attr "c86_attr" "maxmin")))) + "c86-4g-direct,c86-4g-fp0|c86-4g-fp2") + +(define_insn_reservation "c86_4g_sse_maxmin_load" 8 + (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "mode" "SF,DF,V4SF,V8SF,V2DF,V4DF,TI")) + (and (eq_attr "type" "sseadd") + (and (eq_attr "memory" "load") + (eq_attr "c86_attr" "maxmin")))) + "c86-4g-direct,c86-4g-load,c86-4g-fp0|c86-4g-fp2") + +(define_insn_reservation "c86_4g_sse_pmaxmin" 1 + (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "mode" "TI,OI")) + (and (eq_attr "type" "mmxadd,sseiadd") + (and (eq_attr "memory" "none") + (eq_attr "c86_attr" "maxmin")))) + "c86-4g-direct,c86-4g-fpu") + +(define_insn_reservation "c86_4g_sse_pmaxmin_load" 8 + (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "mode" "TI,OI")) + (and (eq_attr "type" "mmxadd,sseiadd") + (and (eq_attr "memory" "load") + (eq_attr "c86_attr" "maxmin")))) + "c86-4g-direct,c86-4g-load,c86-4g-fpu") + +;; SSE avg +(define_insn_reservation "c86_4g_sse_avg" 1 + (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "c86_attr" "avg")) + (and (eq_attr "type" "sseiadd,mmxshft") + (eq_attr "memory" "none"))) + "c86-4g-direct,c86-4g-fpu") + +(define_insn_reservation "c86_4g_sse_avg_load" 8 + (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "c86_attr" "avg")) + (and (eq_attr "type" "sseiadd,mmxshft") + (eq_attr "memory" "load"))) + "c86-4g-direct,c86-4g-load,c86-4g-fp0|c86-4g-fp3") + +;;MMX sadbw +(define_insn_reservation "c86_4g_sse_sadbw" 3 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "sseiadd,mmxshft") + (and (eq_attr "c86_attr" "sadbw") + (eq_attr "memory" "none")))) + "c86-4g-direct,c86-4g-fp0") + +(define_insn_reservation "c86_4g_sse_sadbw_load" 10 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "sseiadd,mmxshft") + (and (eq_attr "c86_attr" "sadbw") + (eq_attr "memory" "load")))) + "c86-4g-direct,c86-4g-load,c86-4g-fp0") + +;; SSE add +(define_insn_reservation "c86_4g_sse_add" 3 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "sseadd") + (and (eq_attr "memory" "none") + (eq_attr "c86_attr" "other")))) + "c86-4g-direct,c86-4g-fp1|c86-4g-fp3") + +(define_insn_reservation "c86_4g_sse_add_load" 10 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "sseadd") + (and (eq_attr "memory" "load") + (eq_attr "c86_attr" "!maxmin")))) + "c86-4g-direct,c86-4g-load,c86-4g-fp1|c86-4g-fp3") + +(define_insn_reservation "c86_4g_sse_fma" 5 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "ssemuladd") + (eq_attr "memory" "none"))) + "c86-4g-direct,c86-4g-fp0|c86-4g-fp2") + +(define_insn_reservation "c86_4g_sse_fma_load" 12 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "ssemuladd") + (eq_attr "memory" "load"))) + "c86-4g-direct,c86-4g-load,c86-4g-fp0|c86-4g-fp2") + +(define_insn_reservation "c86_4g_sse_iadd" 1 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "sseiadd") + (and (eq_attr "c86_attr" "other") + (eq_attr "memory" "none")))) + "c86-4g-direct,c86-4g-fpu") + +(define_insn_reservation "c86_4g_sse_iadd_load" 8 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "sseiadd") + (eq_attr "memory" "load"))) + "c86-4g-direct,c86-4g-load,c86-4g-fpu") + +;; SSE conversions. +(define_insn_reservation "c86_4g_ssecvtsf_si_load" 12 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "mode" "SI") + (and (eq_attr "type" "sseicvt") + (and (match_operand:SF 1 "memory_operand") + (eq_attr "memory" "load"))))) + "c86-4g-double,c86-4g-load,c86-4g-fp3,c86-4g-ieu0") + +(define_insn_reservation "c86_4g_ssecvtdf_si" 5 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "mode" "SI") + (and (match_operand:DF 1 "register_operand") + (and (eq_attr "type" "sseicvt") + (eq_attr "memory" "none"))))) + "c86-4g-double,c86-4g-fp3,c86-4g-ieu0") + +(define_insn_reservation "c86_4g_ssecvtdf_si_load" 12 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "mode" "SI") + (and (eq_attr "type" "sseicvt") + (and (match_operand:DF 1 "memory_operand") + (eq_attr "memory" "load"))))) + "c86-4g-double,c86-4g-load,c86-4g-fp3,c86-4g-ieu0") + +;; All other used ssecvt fp3 pipes +;; Check: Need to revisit this again. +;; Some SSE converts may use different pipe combinations. +(define_insn_reservation "c86_4g_ssecvt" 4 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "c86_attr" "other") + (eq_attr "memory" "none")))) + "c86-4g-direct,c86-4g-fp1") + +(define_insn_reservation "c86_4g_ssecvt_load" 11 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "c86_attr" "other") + (eq_attr "memory" "load")))) + "c86-4g-direct,c86-4g-load,c86-4g-fp1") + +;; SSE div +(define_insn_reservation "c86_4g_ssediv_ss_ps" 10 + (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "mode" "V4SF,SF")) + (and (eq_attr "type" "ssediv") + (eq_attr "memory" "none"))) + "c86-4g-direct,c86-4g-fp1fdivx4,c86-4g-fdiv*6") + +(define_insn_reservation "c86_4g_ssediv_ss_ps_load" 17 + (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "mode" "V4SF,SF")) + (and (eq_attr "type" "ssediv") + (eq_attr "memory" "load"))) + "c86-4g-direct,c86-4g-load,c86-4g-fp1fdivx4,c86-4g-fdiv*6") + +(define_insn_reservation "c86_4g_ssediv_sd_pd" 13 + (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "mode" "V2DF,DF")) + (and (eq_attr "type" "ssediv") + (eq_attr "memory" "none"))) + "c86-4g-direct,c86-4g-fp1fdivx4,c86-4g-fdiv*9") + +(define_insn_reservation "c86_4g_ssediv_sd_pd_load" 20 + (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "mode" "V2DF,DF")) + (and (eq_attr "type" "ssediv") + (eq_attr "memory" "load"))) + "c86-4g-direct,c86-4g-load,c86-4g-fp1fdivx4,c86-4g-fdiv*9") + + +(define_insn_reservation "c86_4g_ssediv_avx256_ps" 10 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "mode" "V8SF") + (and (eq_attr "memory" "none") + (eq_attr "type" "ssediv")))) + "c86-4g-double,c86-4g-fp1fdivx4,c86-4g-fdiv*6") + +(define_insn_reservation "c86_4g_ssediv_avx256_ps_load" 17 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "mode" "V8SF") + (and (eq_attr "type" "ssediv") + (eq_attr "memory" "load")))) + "c86-4g-double,c86-4g-load,c86-4g-fp1fdivx4,c86-4g-fdiv*6") + +(define_insn_reservation "c86_4g_ssediv_avx256_pd" 13 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "mode" "V4DF") + (and (eq_attr "type" "ssediv") + (eq_attr "memory" "none")))) + "c86-4g-double,c86-4g-fp1fdivx4,c86-4g-fdiv*9") + +(define_insn_reservation "c86_4g_ssediv_avx256_pd_load" 20 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "mode" "V4DF") + (and (eq_attr "type" "ssediv") + (eq_attr "memory" "load")))) + "c86-4g-double,c86-4g-load,c86-4g-fp1fdivx4,c86-4g-fdiv*9") +;; SSE MUL +(define_insn_reservation "c86_4g_ssemul_ss_ps" 3 + (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "mode" "V8SF,V4SF,SF")) + (and (eq_attr "type" "ssemul") + (eq_attr "memory" "none"))) + "c86-4g-direct,c86-4g-fp0|c86-4g-fp2") + +(define_insn_reservation "c86_4g_ssemul_ss_ps_load" 10 + (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "mode" "V8SF,V4SF,SF")) + (and (eq_attr "type" "ssemul") + (eq_attr "memory" "load"))) + "c86-4g-direct,c86-4g-load,c86-4g-fp0|c86-4g-fp2") + +(define_insn_reservation "c86_4g_ssemul_sd_pd" 4 + (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "mode" "V4DF,V2DF,DF")) + (and (eq_attr "type" "ssemul") + (eq_attr "memory" "none"))) + "c86-4g-direct,c86-4g-fp0|c86-4g-fp2") + +(define_insn_reservation "c86_4g_ssemul_sd_pd_load" 11 + (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "mode" "V4DF,V2DF,DF")) + (and (eq_attr "type" "ssemul") + (eq_attr "memory" "load"))) + "c86-4g-direct,c86-4g-load,c86-4g-fp0|c86-4g-fp2") + +;;SSE imul +(define_insn_reservation "c86_4g_sseimul" 3 + (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "mode" "TI")) + (and (eq_attr "type" "sseimul") + (eq_attr "memory" "none"))) + "c86-4g-direct,c86-4g-fp0") + +(define_insn_reservation "c86_4g_sseimul_avx256" 4 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "mode" "OI") + (and (eq_attr "type" "sseimul") + (eq_attr "memory" "none")))) + "c86-4g-double,c86-4g-fp0") + +(define_insn_reservation "c86_4g_sseimul_load" 10 + (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "mode" "TI")) + (and (eq_attr "type" "sseimul") + (eq_attr "memory" "load"))) + "c86-4g-direct,c86-4g-load,c86-4g-fp0") + +(define_insn_reservation "c86_4g_sseimul_avx256_load" 11 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "mode" "OI") + (and (eq_attr "type" "sseimul") + (eq_attr "memory" "load")))) + "c86-4g-double,c86-4g-load,c86-4g-fp0") + +(define_insn_reservation "c86_4g_sseimul_di" 3 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "mode" "DI") + (and (eq_attr "memory" "none") + (eq_attr "type" "sseimul")))) + "c86-4g-direct,c86-4g-fp0") + +(define_insn_reservation "c86_4g_sseimul_load_di" 10 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "mode" "DI") + (and (eq_attr "type" "sseimul") + (eq_attr "memory" "load")))) + "c86-4g-direct,c86-4g-load,c86-4g-fp0") + +;; SSE compares +(define_insn_reservation "c86_4g_sse_cmp" 1 + (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "mode" "SF,DF,V4SF,V2DF")) + (and (eq_attr "type" "ssecmp") + (eq_attr "memory" "none"))) + "c86-4g-direct,c86-4g-fp0|c86-4g-fp2") + +(define_insn_reservation "c86_4g_sse_cmp_load" 8 + (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "mode" "SF,DF,V4SF,V2DF")) + (and (eq_attr "type" "ssecmp") + (eq_attr "memory" "load"))) + "c86-4g-direct,c86-4g-load,c86-4g-fp0|c86-4g-fp2") + + +(define_insn_reservation "c86_4g_sse_cmp_avx256" 1 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "mode" "V8SF,V4DF") + (and (eq_attr "type" "ssecmp") + (eq_attr "memory" "none")))) + "c86-4g-double,c86-4g-fp0|c86-4g-fp2") + +(define_insn_reservation "c86_4g_sse_cmp_avx256_load" 8 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "mode" "V8SF,V4DF") + (and (eq_attr "type" "ssecmp") + (eq_attr "memory" "load")))) + "c86-4g-double,c86-4g-load,c86-4g-fp0|c86-4g-fp2") + +(define_insn_reservation "c86_4g_sse_icmp" 1 + (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "mode" "QI,HI,SI,DI,TI")) + (and (eq_attr "type" "ssecmp") + (eq_attr "memory" "none"))) + "c86-4g-direct,c86-4g-fpu") + + +(define_insn_reservation "c86_4g_sse_icmp_load" 8 + (and (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (eq_attr "mode" "QI,HI,SI,DI,TI")) + (and (eq_attr "type" "ssecmp") + (eq_attr "memory" "load"))) + "c86-4g-direct,c86-4g-load,c86-4g-fpu") + + +(define_insn_reservation "c86_4g_sse_icmp_avx256" 1 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "mode" "OI") + (and (eq_attr "type" "ssecmp") + (eq_attr "memory" "none")))) + "c86-4g-double,c86-4g-fpu") + + +(define_insn_reservation "c86_4g_sse_icmp_avx256_load" 8 + (and (eq_attr "cpu" "c86_4g_m4,c86_4g_m6") + (and (eq_attr "mode" "OI") + (and (eq_attr "type" "ssecmp") + (eq_attr "memory" "load")))) + "c86-4g-double,c86-4g-load,c86-4g-fpu") diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h index a4c2fed7edaba5a2b0cdf826770ff1cbcb3b06e3..6ab68edb311c2a6687ebbbe1a817984703b455d4 100644 --- a/gcc/config/i386/cpuid.h +++ b/gcc/config/i386/cpuid.h @@ -208,6 +208,10 @@ #define signature_SHANGHAI_ecx 0x20206961 #define signature_SHANGHAI_edx 0x68676e61 +#define signature_HYGON_ebx 0x6f677948 +#define signature_HYGON_ecx 0x656e6975 +#define signature_HYGON_edx 0x6e65476e + #ifndef __x86_64__ /* At least one cpu (Winchip 2) does not set %ebx and %ecx for cpuid leaf 1. Forcibly zero the two registers before diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc index 2670c90b288684e17d2084bf6ce6022b07963877..d17ad2515ea100e43fc9a7ab6b497e9657890bbf 100644 --- a/gcc/config/i386/driver-i386.cc +++ b/gcc/config/i386/driver-i386.cc @@ -496,6 +496,16 @@ const char *host_detect_local_cpu (int argc, const char **argv) else processor = PROCESSOR_PENTIUM; } + else if (vendor == VENDOR_HYGON) + { + processor = PROCESSOR_GENERIC; + if (model == 4) + processor = PROCESSOR_C86_4G_M4; + else if (model == 6) + processor = PROCESSOR_C86_4G_M6; + else if (model >= 7) + processor = PROCESSOR_C86_4G_M7; + } else if (vendor == VENDOR_CENTAUR) { processor = PROCESSOR_GENERIC; @@ -802,6 +812,14 @@ const char *host_detect_local_cpu (int argc, const char **argv) break; case PROCESSOR_SHIJIDADAO: cpu = "shijidadao"; + case PROCESSOR_C86_4G_M4: + cpu = "c86-4g-m4"; + break; + case PROCESSOR_C86_4G_M6: + cpu = "c86-4g-m6"; + break; + case PROCESSOR_C86_4G_M7: + cpu = "c86-4g-m7"; break; default: diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc index aae6840337ebea0274a55ff3328f4a54dca733bf..1505963032f5be1ef787498743efe782a8a5630f 100644 --- a/gcc/config/i386/i386-c.cc +++ b/gcc/config/i386/i386-c.cc @@ -262,6 +262,18 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, def_or_undef (parse_in, "__rocketlake"); def_or_undef (parse_in, "__rocketlake__"); break; + case PROCESSOR_C86_4G_M4: + def_or_undef (parse_in, "__c86_4g_m4"); + def_or_undef (parse_in, "__c86_4g_m4__"); + break; + case PROCESSOR_C86_4G_M6: + def_or_undef (parse_in, "__c86_4g_m6"); + def_or_undef (parse_in, "__c86_4g_m6__"); + break; + case PROCESSOR_C86_4G_M7: + def_or_undef (parse_in, "__c86_4g_m7"); + def_or_undef (parse_in, "__c86_4g_m7__"); + break; /* use PROCESSOR_max to not set/unset the arch macro. */ case PROCESSOR_max: break; @@ -440,6 +452,15 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, case PROCESSOR_ROCKETLAKE: def_or_undef (parse_in, "__tune_rocketlake__"); break; + case PROCESSOR_C86_4G_M4: + def_or_undef (parse_in, "__tune_c86_4g_m4__"); + break; + case PROCESSOR_C86_4G_M6: + def_or_undef (parse_in, "__tune_c86_4g_m6__"); + break; + case PROCESSOR_C86_4G_M7: + def_or_undef (parse_in, "__tune_c86_4g_m7__"); + break; case PROCESSOR_INTEL: case PROCESSOR_GENERIC: break; diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc index 5bf08f394b316794c0d37c3fee6760d7431bd599..98cf03eaa15131c322c8a62e85b9a20af933e48e 100644 --- a/gcc/config/i386/i386-options.cc +++ b/gcc/config/i386/i386-options.cc @@ -170,6 +170,10 @@ along with GCC; see the file COPYING3. If not see #define m_ZNVER (m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4) #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER \ | m_ZNVER) +#define m_C86_4G_M4 (HOST_WIDE_INT_1U<\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "prefix" "orig,vex") + (set_attr "c86_attr" "maxmin") (set_attr "type" "sseadd") (set_attr "mode" "")]) @@ -20988,6 +21024,7 @@ v\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "prefix" "orig,maybe_evex") + (set_attr "c86_attr" "maxmin") (set_attr "type" "sseadd") (set_attr "mode" "")]) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 197f19e4b1a49ceb593e663802149d584957b4cc..bbf0ed9b92faed9d44f221e05085f114f1ac2694 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -543,6 +543,7 @@ [(set_attr "isa" "*,x64") (set_attr "mmx_isa" "native,*") (set_attr "type" "mmxmov,ssemov") + (set_attr "c86_attr" "movnt") (set_attr "mode" "DI")]) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -1130,6 +1131,7 @@ vblendvps\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssemov") + (set_attr "c86_attr" "blendv") (set_attr "length_immediate" "1") (set_attr "prefix_data16" "1,1,*") (set_attr "prefix_extra" "1") @@ -2030,6 +2032,7 @@ [(set_attr "isa" "*,sse2_noavx,avx") (set_attr "mmx_isa" "native,*,*") (set_attr "type" "mmxmul,sseiadd,sseiadd") + (set_attr "c86_attr" "madd") (set_attr "mode" "DI,TI,TI")]) (define_expand "mmx_pmulhrwv4hi3" @@ -2702,6 +2705,7 @@ vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssemov") + (set_attr "c86_attr" "blendv") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "*,*,1") (set_attr "prefix" "orig,orig,vex") @@ -2722,6 +2726,7 @@ vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssemov") + (set_attr "c86_attr" "blendv") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "*,*,1") (set_attr "prefix" "orig,orig,vex") @@ -3223,6 +3228,7 @@ "%vpmovbw\t{%1, %0|%0, %1}" [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssemov") + (set_attr "c86_attr" "vpmovx") (set_attr "prefix_extra" "1") (set_attr "prefix" "orig,orig,maybe_evex") (set_attr "mode" "TI")]) @@ -3237,6 +3243,7 @@ "%vpmovwd\t{%1, %0|%0, %1}" [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssemov") + (set_attr "c86_attr" "vpmovx") (set_attr "prefix_extra" "1") (set_attr "prefix" "orig,orig,maybe_evex") (set_attr "mode" "TI")]) @@ -3251,6 +3258,7 @@ "%vpmovbw\t{%1, %0|%0, %1}" [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssemov") + (set_attr "c86_attr" "vpmovx") (set_attr "prefix_extra" "1") (set_attr "prefix" "orig,orig,maybe_evex") (set_attr "mode" "TI")]) @@ -3360,6 +3368,7 @@ (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") (set_attr "type" "sselog") + (set_attr "c86_attr" "insr") (set_attr "length_immediate" "1") (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) @@ -3411,6 +3420,7 @@ [(set_attr "isa" "*,sse2_noavx,avx") (set_attr "mmx_isa" "native,*,*") (set_attr "type" "mmxcvt,sselog,sselog") + (set_attr "c86_attr" "insr") (set_attr "length_immediate" "1") (set_attr "mode" "DI,TI,TI")]) @@ -3444,6 +3454,7 @@ } [(set_attr "isa" "noavx,avx") (set_attr "type" "sselog") + (set_attr "c86_attr" "insr") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") @@ -3464,6 +3475,7 @@ [(set_attr "isa" "*,sse2,sse4") (set_attr "mmx_isa" "native,*,*") (set_attr "type" "mmxcvt,sselog1,sselog1") + (set_attr "c86_attr" "extr") (set_attr "length_immediate" "1") (set_attr "prefix" "orig,maybe_vex,maybe_vex") (set_attr "mode" "DI,TI,TI")]) @@ -3482,6 +3494,7 @@ [(set_attr "isa" "*,sse2") (set_attr "mmx_isa" "native,*") (set_attr "type" "mmxcvt,sselog1") + (set_attr "c86_attr" "extr") (set_attr "length_immediate" "1") (set_attr "prefix" "orig,maybe_vex") (set_attr "mode" "DI,TI")]) @@ -3496,6 +3509,7 @@ %vpextrb\t{%2, %1, %k0|%k0, %1, %2} %vpextrb\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sselog1") + (set_attr "c86_attr" "extr") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") @@ -3638,6 +3652,7 @@ vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssemov") + (set_attr "c86_attr" "blend") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "prefix" "orig,orig,vex") @@ -3656,6 +3671,7 @@ vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssemov") + (set_attr "c86_attr" "blend") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "prefix" "orig,orig,vex") @@ -3824,6 +3840,7 @@ [(set_attr "isa" "*,sse4,sse2,noavx,*,*,*") (set_attr "mmx_isa" "native,*,*,*,native,*,*") (set_attr "type" "mmxcvt,ssemov,sseshuf1,sseshuf1,mmxmov,ssemov,imov") + (set_attr "c86_attr" "*,extr,*,*,*,*,*") (set (attr "length_immediate") (if_then_else (eq_attr "alternative" "1,2,3") (const_string "1") @@ -3850,6 +3867,7 @@ && TARGET_64BIT && TARGET_SSE4_1" "%vpextrd\t{$1, %1, %k0|%k0, %1, 1}" [(set_attr "type" "sselog1") + (set_attr "c86_attr" "extr") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "prefix" "maybe_vex") @@ -3992,6 +4010,7 @@ } [(set_attr "isa" "noavx,avx") (set_attr "type" "sselog") + (set_attr "c86_attr" "insr") (set_attr "length_immediate" "1") (set_attr "mode" "TI")]) @@ -4025,6 +4044,7 @@ } [(set_attr "isa" "noavx,avx") (set_attr "type" "sselog") + (set_attr "c86_attr" "insr") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") @@ -4042,6 +4062,7 @@ %vpextrw\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "*,sse4") (set_attr "type" "sselog1") + (set_attr "c86_attr" "extr") (set_attr "length_immediate" "1") (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) @@ -4055,6 +4076,7 @@ "TARGET_SSE2" "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}" [(set_attr "type" "sselog1") + (set_attr "c86_attr" "extr") (set_attr "length_immediate" "1") (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) @@ -4069,6 +4091,7 @@ %vpextrb\t{%2, %1, %k0|%k0, %1, %2} %vpextrb\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sselog1") + (set_attr "c86_attr" "extr") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") @@ -4084,6 +4107,7 @@ "TARGET_SSE4_1" "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}" [(set_attr "type" "sselog1") + (set_attr "c86_attr" "extr") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") @@ -4419,6 +4443,7 @@ [(set_attr "isa" "*,sse2_noavx,avx") (set_attr "mmx_isa" "native,*,*") (set_attr "type" "mmxshft,sseiadd,sseiadd") + (set_attr "c86_attr" "sadbw") (set_attr "mode" "DI,TI,TI")]) (define_expand "reduc_plus_scal_v8qi" diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index f25dd5f2bc452dd223de3555bd89311b1e01b909..4d13cd750100844b5f1f1ec78212c6fdc89856bc 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1523,6 +1523,11 @@ } } [(set_attr "type" "ssemov") + (set (attr "c86_attr") + (if_then_else (and (match_test "REG_P (operands[1])") + (match_test "REGNO (operands[1]) != REGNO (operands[0])")) + (const_string "blend") + (const_string "*"))) (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -1551,6 +1556,7 @@ vmovdqu\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2} vpblendmw\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}" [(set_attr "type" "ssemov") + (set_attr "c86_attr" "*,blend") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -1763,6 +1769,7 @@ "TARGET_SSE2" "movnti\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") + (set_attr "c86_attr" "movnt") (set_attr "prefix_data16" "0") (set_attr "mode" "")]) @@ -1774,6 +1781,7 @@ "TARGET_SSE" "%vmovnt\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") + (set_attr "c86_attr" "movnt") (set_attr "prefix" "maybe_vex") (set_attr "mode" "")]) @@ -1789,6 +1797,7 @@ (match_test "TARGET_AVX") (const_string "*") (const_string "1"))) + (set_attr "c86_attr" "movnt") (set_attr "prefix" "maybe_vex") (set_attr "mode" "")]) @@ -2415,6 +2424,7 @@ [(set_attr "type" "sse") (set_attr "atom_sse_attr" "rcp") (set_attr "btver2_sse_attr" "rcp") + (set_attr "c86_attr" "rcp") (set_attr "prefix" "maybe_vex") (set_attr "mode" "")]) @@ -2433,6 +2443,7 @@ (set_attr "type" "sse") (set_attr "atom_sse_attr" "rcp") (set_attr "btver2_sse_attr" "rcp") + (set_attr "c86_attr" "rcp") (set_attr "prefix" "orig,vex") (set_attr "mode" "SF")]) @@ -2452,6 +2463,7 @@ (set_attr "type" "sse") (set_attr "atom_sse_attr" "rcp") (set_attr "btver2_sse_attr" "rcp") + (set_attr "c86_attr" "rcp") (set_attr "prefix" "orig,vex") (set_attr "mode" "SF")]) @@ -2501,6 +2513,7 @@ "TARGET_AVX512F" "vrcp14\t{%1, %0|%0, %1}" [(set_attr "type" "sse") + (set_attr "c86_attr" "rcp") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -2515,6 +2528,7 @@ "TARGET_AVX512F" "vrcp14\t{%1, %2, %0|%0, %2, %1}" [(set_attr "type" "sse") + (set_attr "c86_attr" "rcp") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -2532,6 +2546,7 @@ "TARGET_AVX512F" "vrcp14\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %1}" [(set_attr "type" "sse") + (set_attr "c86_attr" "rcp") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -2567,6 +2582,7 @@ (set_attr "type" "sse") (set_attr "atom_sse_attr" "sqrt") (set_attr "btver2_sse_attr" "sqrt") + (set_attr "c86_attr" "sqrt") (set_attr "prefix" "maybe_vex") (set_attr "mode" "")]) @@ -2586,6 +2602,7 @@ (set_attr "atom_sse_attr" "sqrt") (set_attr "prefix" "") (set_attr "btver2_sse_attr" "sqrt") + (set_attr "c86_attr" "sqrt") (set_attr "mode" "")]) (define_insn "*_vmsqrt2" @@ -2605,6 +2622,7 @@ (set_attr "atom_sse_attr" "sqrt") (set_attr "prefix" "") (set_attr "btver2_sse_attr" "sqrt") + (set_attr "c86_attr" "sqrt") (set_attr "mode" "")]) (define_expand "rsqrt2" @@ -2653,6 +2671,7 @@ "TARGET_AVX512F" "vrsqrt14\t{%1, %0|%0, %1}" [(set_attr "type" "sse") + (set_attr "c86_attr" "rcp") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -2667,6 +2686,7 @@ "TARGET_AVX512F" "vrsqrt14\t{%1, %2, %0|%0, %2, %1}" [(set_attr "type" "sse") + (set_attr "c86_attr" "rcp") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -2684,6 +2704,7 @@ "TARGET_AVX512F" "vrsqrt14\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %1}" [(set_attr "type" "sse") + (set_attr "c86_attr" "rcp") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -2806,6 +2827,7 @@ [(set_attr "isa" "noavx,avx") (set_attr "type" "sseadd") (set_attr "btver2_sse_attr" "maxmin") + (set_attr "c86_attr" "maxmin") (set_attr "prefix" "") (set_attr "mode" "")]) @@ -2830,6 +2852,7 @@ [(set_attr "isa" "noavx,avx") (set_attr "type" "sseadd") (set_attr "btver2_sse_attr" "maxmin") + (set_attr "c86_attr" "maxmin") (set_attr "prefix" "") (set_attr "mode" "")]) @@ -2854,6 +2877,7 @@ [(set_attr "isa" "noavx,avx") (set_attr "type" "sseadd") (set_attr "btver2_sse_attr" "maxmin") + (set_attr "c86_attr" "maxmin") (set (attr "prefix") (cond [(eq_attr "alternative" "0") (const_string "orig") @@ -2881,6 +2905,7 @@ [(set_attr "isa" "noavx,avx") (set_attr "type" "sse") (set_attr "btver2_sse_attr" "maxmin") + (set_attr "c86_attr" "maxmin") (set_attr "prefix" "") (set_attr "mode" "")]) @@ -3057,6 +3082,7 @@ "TARGET_AVX" "vhpd\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseadd") + (set_attr "c86_attr" "hplus") (set_attr "prefix" "vex") (set_attr "mode" "V4DF")]) @@ -3100,6 +3126,7 @@ vhaddpd\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sseadd") + (set_attr "c86_attr" "hplus") (set_attr "prefix" "orig,vex") (set_attr "mode" "V2DF")]) @@ -3122,6 +3149,7 @@ vhsubpd\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sseadd") + (set_attr "c86_attr" "hplus") (set_attr "prefix" "orig,vex") (set_attr "mode" "V2DF")]) @@ -3141,6 +3169,7 @@ vhaddpd\t{%1, %1, %0|%0, %1, %1}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sseadd1") + (set_attr "c86_attr" "hplus") (set_attr "prefix" "orig,vex") (set_attr "mode" "V2DF")]) @@ -3159,6 +3188,7 @@ vhsubpd\t{%1, %1, %0|%0, %1, %1}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sseadd1") + (set_attr "c86_attr" "hplus") (set_attr "prefix" "orig,vex") (set_attr "mode" "V2DF")]) @@ -3202,6 +3232,7 @@ "TARGET_AVX" "vhps\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseadd") + (set_attr "c86_attr" "hplus") (set_attr "prefix" "vex") (set_attr "mode" "V8SF")]) @@ -3232,6 +3263,7 @@ vhps\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sseadd") + (set_attr "c86_attr" "hplus") (set_attr "atom_unit" "complex") (set_attr "prefix" "orig,vex") (set_attr "prefix_rep" "1,*") @@ -3396,6 +3428,7 @@ "TARGET_AVX512DQ || (VALID_AVX512FP16_REG_MODE (mode))" "vreduce\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sse") + (set_attr "c86_attr" "aes") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -3412,6 +3445,7 @@ "TARGET_AVX512DQ || (VALID_AVX512FP16_REG_MODE (mode))" "vreduce\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "sse") + (set_attr "c86_attr" "aes") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -3927,6 +3961,7 @@ vpcmpeq\t{%2, %1, %0|%0, %1, %2} vptestnm\t{%1, %1, %0|%0, %1, %1}" [(set_attr "type" "ssecmp") + (set_attr "c86_attr" "*,ptest") (set_attr "prefix_extra" "1") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -4025,6 +4060,7 @@ vpcmpeq\t{%2, %1, %0|%0, %1, %2} vptestnm\t{%1, %1, %0|%0, %1, %1}" [(set_attr "type" "ssecmp") + (set_attr "c86_attr" "*,ptest") (set_attr "prefix_extra" "1") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -4622,6 +4658,7 @@ } [(set_attr "isa" "noavx,avx,avx512dq,avx512f") (set_attr "type" "sselog") + (set_attr "c86_attr" "sselogic") (set_attr "prefix" "orig,maybe_vex,evex,evex") (set (attr "mode") (cond [(and (match_test "") @@ -4669,6 +4706,7 @@ return ""; } [(set_attr "type" "sselog") + (set_attr "c86_attr" "sselogic") (set_attr "prefix" "evex") (set (attr "mode") (if_then_else (match_test "TARGET_AVX512DQ") @@ -4743,6 +4781,7 @@ } [(set_attr "isa" "noavx,avx,avx512dq,avx512f") (set_attr "type" "sselog") + (set_attr "c86_attr" "sselogic") (set_attr "prefix" "orig,maybe_evex,evex,evex") (set (attr "mode") (cond [(and (match_test "") @@ -4790,6 +4829,7 @@ return ""; } [(set_attr "type" "sselog") + (set_attr "c86_attr" "sselogic") (set_attr "prefix" "evex") (set (attr "mode") (if_then_else (match_test "TARGET_AVX512DQ") @@ -4904,6 +4944,7 @@ } [(set_attr "isa" "noavx,avx,avx512vl,avx512f") (set_attr "type" "sselog") + (set_attr "c86_attr" "sselogic") (set_attr "prefix" "orig,vex,evex,evex") (set (attr "mode") (cond [(eq_attr "alternative" "2") @@ -5030,6 +5071,7 @@ } [(set_attr "isa" "noavx,avx,avx512vl,avx512f") (set_attr "type" "sselog") + (set_attr "c86_attr" "sselogic") (set_attr "prefix" "orig,vex,evex,evex") (set (attr "mode") (cond [(eq_attr "alternative" "2") @@ -5092,6 +5134,7 @@ } [(set_attr "isa" "noavx,avx,avx512vl,avx512f") (set_attr "type" "sselog") + (set_attr "c86_attr" "sselogic") (set (attr "prefix_data16") (if_then_else (and (eq_attr "alternative" "0") @@ -10575,6 +10618,10 @@ (const_string "fmov") ] (const_string "ssemov"))) + (set (attr "c86_attr") + (if_then_else (eq_attr "alternative" "8,9,10") + (const_string "insr") + (const_string "*"))) (set (attr "prefix_extra") (if_then_else (eq_attr "alternative" "8,9,10") (const_string "1") @@ -10642,6 +10689,13 @@ (if_then_else (eq_attr "alternative" "0,1,2,5,6,9") (const_string "ssemov") (const_string "sselog"))) + (set (attr "c86_attr") + (cond [(eq_attr "alternative" "5,6,9") + (const_string "blend") + (eq_attr "alternative" "3,4,7,8,10,11") + (const_string "insr") + ] + (const_string "*"))) (set (attr "prefix_data16") (if_then_else (eq_attr "alternative" "3,4") (const_string "1") @@ -10910,6 +10964,7 @@ } [(set_attr "isa" "noavx,noavx,avx,noavx,avx") (set_attr "type" "sselog,sselog,sselog,*,*") + (set_attr "c86_attr" "extr,extr,extr,*,*") (set_attr "prefix_data16" "1,1,1,*,*") (set_attr "prefix_extra" "1,1,1,*,*") (set_attr "length_immediate" "1,1,1,*,*") @@ -11821,6 +11876,7 @@ } [(set_attr "isa" "*,sse4,noavx,avx") (set_attr "type" "sselog1,sselog1,sseishft1,sseishft1") + (set_attr "c86_attr" "extr,extr,other,other") (set_attr "prefix" "maybe_evex") (set_attr "mode" "TI")]) @@ -12526,6 +12582,7 @@ "TARGET_AVX512F" "valign\t{%3, %2, %1, %0|%0, %1, %2, %3}"; [(set_attr "prefix" "evex") + (set_attr "c86_attr" "shufx") (set_attr "mode" "")]) (define_mode_attr vec_extract_imm_predicate @@ -15161,6 +15218,7 @@ "TARGET_AVX512BW && " "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"; [(set_attr "type" "sseiadd") + (set_attr "c86_attr" "madd") (set_attr "prefix" "evex") (set_attr "mode" "XI")]) @@ -15232,6 +15290,7 @@ "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "vpmaddwd\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseiadd") + (set_attr "c86_attr" "madd") (set_attr "prefix" "vex") (set_attr "mode" "OI")]) @@ -15290,6 +15349,7 @@ vpmaddwd\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sseiadd") + (set_attr "c86_attr" "madd") (set_attr "atom_unit" "simul") (set_attr "prefix_data16" "1,*") (set_attr "prefix" "orig,vex") @@ -15964,6 +16024,7 @@ "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "vp\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseiadd") + (set_attr "c86_attr" "maxmin") (set_attr "prefix_extra" "1") (set_attr "prefix" "vex") (set_attr "mode" "OI")]) @@ -16005,6 +16066,7 @@ "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "vp\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseiadd") + (set_attr "c86_attr" "maxmin") (set_attr "prefix_extra" "1") (set_attr "prefix" "maybe_evex") (set_attr "mode" "")]) @@ -16017,6 +16079,7 @@ "TARGET_AVX512BW" "vp\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseiadd") + (set_attr "c86_attr" "maxmin") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -16115,6 +16178,7 @@ vp\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "sseiadd") + (set_attr "c86_attr" "maxmin") (set_attr "prefix_extra" "1,1,*") (set_attr "prefix" "orig,orig,vex") (set_attr "mode" "TI")]) @@ -16130,6 +16194,7 @@ vpw\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sseiadd") + (set_attr "c86_attr" "maxmin") (set_attr "prefix_data16" "1,*") (set_attr "prefix_extra" "*,1") (set_attr "prefix" "orig,vex") @@ -16199,6 +16264,7 @@ vp\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "sseiadd") + (set_attr "c86_attr" "maxmin") (set_attr "prefix_extra" "1,1,*") (set_attr "prefix" "orig,orig,vex") (set_attr "mode" "TI")]) @@ -16214,6 +16280,7 @@ vpb\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sseiadd") + (set_attr "c86_attr" "maxmin") (set_attr "prefix_data16" "1,*") (set_attr "prefix_extra" "*,1") (set_attr "prefix" "orig,vex") @@ -16831,6 +16898,7 @@ } [(set_attr "isa" "noavx,avx,avx") (set_attr "type" "sselog") + (set_attr "c86_attr" "sselogic") (set (attr "prefix_data16") (if_then_else (and (eq_attr "alternative" "0") @@ -16896,6 +16964,7 @@ "TARGET_AVX512F" "vpandn\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"; [(set_attr "type" "sselog") + (set_attr "c86_attr" "sselogic") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -17017,6 +17086,7 @@ } [(set_attr "isa" "noavx,avx,avx") (set_attr "type" "sselog") + (set_attr "c86_attr" "sselogic") (set (attr "prefix_data16") (if_then_else (and (eq_attr "alternative" "0") @@ -17110,6 +17180,7 @@ } [(set_attr "isa" "noavx,avx,avx") (set_attr "type" "sselog") + (set_attr "c86_attr" "sselogic") (set (attr "prefix_data16") (if_then_else (and (eq_attr "alternative" "0") @@ -17145,6 +17216,7 @@ (set_attr "prefix" "orig,vex,evex") (set_attr "prefix_data16" "1,*,*") (set_attr "type" "sselog") + (set_attr "c86_attr" "sselogic") (set_attr "mode" "TI")]) (define_expand "one_cmplv1ti2" @@ -17893,6 +17965,7 @@ } [(set_attr "isa" "noavx,noavx,avx,avx,,,avx2") (set_attr "type" "sselog") + (set_attr "c86_attr" "insr") (set (attr "prefix_rex") (if_then_else (and (not (match_test "TARGET_AVX")) @@ -18002,6 +18075,7 @@ } } [(set_attr "type" "sselog,ssemov,ssemov") + (set_attr "c86_attr" "insertx,*,*") (set_attr "length_immediate" "1,0,0") (set_attr "prefix" "evex,vex,evex") (set_attr "mode" ",,")]) @@ -18034,6 +18108,7 @@ return "vinsert\t{%3, %2, %1, %0|%0, %1, %2, %3}"; } [(set_attr "type" "sselog") + (set_attr "c86_attr" "insertx") (set_attr "length_immediate" "1") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -18072,6 +18147,7 @@ "TARGET_AVX512DQ" "vinsert32x8\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" [(set_attr "type" "sselog") + (set_attr "c86_attr" "insertx") (set_attr "length_immediate" "1") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -18089,6 +18165,7 @@ "TARGET_AVX512DQ" "vinsert32x8\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" [(set_attr "type" "sselog") + (set_attr "c86_attr" "insertx") (set_attr "length_immediate" "1") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -18104,6 +18181,7 @@ "TARGET_AVX512F" "vinsert64x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" [(set_attr "type" "sselog") + (set_attr "c86_attr" "insertx") (set_attr "length_immediate" "1") (set_attr "prefix" "evex") (set_attr "mode" "XI")]) @@ -18119,6 +18197,7 @@ "TARGET_AVX512F" "vinsert64x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" [(set_attr "type" "sselog") + (set_attr "c86_attr" "insertx") (set_attr "length_immediate" "1") (set_attr "prefix" "evex") (set_attr "mode" "XI")]) @@ -18166,6 +18245,7 @@ return "vshuf64x2\t{%3, %2, %1, %0|%0, %1, %2, %3}"; } [(set_attr "type" "sselog") + (set_attr "c86_attr" "shufx") (set_attr "length_immediate" "1") (set_attr "prefix" "evex") (set_attr "mode" "XI")]) @@ -18228,6 +18308,7 @@ return "vshuf64x2\t{%3, %2, %1, %0|%0, %1, %2, %3}"; } [(set_attr "type" "sselog") + (set_attr "c86_attr" "shufx") (set_attr "length_immediate" "1") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -18264,6 +18345,7 @@ return "vshuf64x2\t{%2, %1, %1, %0|%0, %1, %1, %2}"; } [(set_attr "type" "sselog") + (set_attr "c86_attr" "shufx") (set_attr "length_immediate" "1") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -18324,6 +18406,7 @@ return "vshuf32x4\t{%3, %2, %1, %0|%0, %1, %2, %3}"; } [(set_attr "type" "sselog") + (set_attr "c86_attr" "shufx") (set_attr "length_immediate" "1") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -18410,6 +18493,7 @@ return "vshuf32x4\t{%3, %2, %1, %0|%0, %1, %2, %3}"; } [(set_attr "type" "sselog") + (set_attr "c86_attr" "shufx") (set_attr "length_immediate" "1") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -18462,6 +18546,7 @@ return "vshuf32x4\t{%2, %1, %1, %0|%0, %1, %1, %2}"; } [(set_attr "type" "sselog") + (set_attr "c86_attr" "shufx") (set_attr "length_immediate" "1") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -19040,6 +19125,7 @@ %vpextr\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "*,sse4") (set_attr "type" "sselog1") + (set_attr "c86_attr" "extr") (set_attr "prefix_data16" "1") (set (attr "prefix_extra") (if_then_else @@ -19062,6 +19148,7 @@ "TARGET_SSE2" "%vpextr\t{%2, %1, %k0|%k0, %1, %2}" [(set_attr "type" "sselog1") + (set_attr "c86_attr" "extr") (set_attr "prefix_data16" "1") (set (attr "prefix_extra") (if_then_else @@ -19082,6 +19169,7 @@ "TARGET_SSE4_1" "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}" [(set_attr "type" "sselog1") + (set_attr "c86_attr" "extr") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") @@ -19215,6 +19303,7 @@ } [(set_attr "isa" "*,avx512dq,noavx,noavx,avx") (set_attr "type" "sselog1,sselog1,sseishft1,sseishft1,sseishft1") + (set_attr "c86_attr" "extr,extr,*,*,*") (set (attr "prefix_extra") (if_then_else (eq_attr "alternative" "0,1") (const_string "1") @@ -19233,6 +19322,7 @@ "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}" [(set_attr "isa" "*,avx512dq") (set_attr "type" "sselog1") + (set_attr "c86_attr" "extr") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "prefix" "maybe_vex") @@ -19302,6 +19392,10 @@ (const_string "imov") ] (const_string "sselog1"))) + (set (attr "c86_attr") + (if_then_else (eq_attr "alternative" "0,1") + (const_string "extr") + (const_string "other"))) (set (attr "length_immediate") (if_then_else (eq_attr "alternative" "0,1,3,4,5") (const_string "1") @@ -19461,6 +19555,10 @@ (const_string "mmxmov") ] (const_string "sselog"))) + (set (attr "c86_attr") + (if_then_else (eq_attr "alternative" "0,1,2,3") + (const_string "insr") + (const_string "other"))) (set (attr "prefix_extra") (if_then_else (eq_attr "alternative" "0,1,2,3") (const_string "1") @@ -19561,6 +19659,10 @@ (eq_attr "alternative" "0,1,2,3,4,5") (const_string "sselog") (const_string "ssemov"))) + (set (attr "c86_attr") + (if_then_else (eq_attr "alternative" "0,1,2,3") + (const_string "insr") + (const_string "other"))) (set (attr "prefix_rex") (if_then_else (eq_attr "alternative" "0,1,2,3") (const_string "1") @@ -19762,6 +19864,7 @@ vpavg\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sseiadd") + (set_attr "c86_attr" "avg") (set_attr "prefix_data16" "1,*") (set_attr "prefix" "orig,") (set_attr "mode" "")]) @@ -19780,6 +19883,7 @@ vpsadbw\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sseiadd") + (set_attr "c86_attr" "sadbw") (set_attr "atom_unit" "simul") (set_attr "prefix_data16" "1,*") (set_attr "prefix" "orig,maybe_evex") @@ -19793,6 +19897,7 @@ "TARGET_SSE" "%vmovmsk\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") + (set_attr "c86_attr" "movnt") (set_attr "prefix" "maybe_vex") (set_attr "mode" "")]) @@ -19805,6 +19910,7 @@ "TARGET_64BIT && TARGET_SSE" "%vmovmsk\t{%1, %k0|%k0, %1}" [(set_attr "type" "ssemov") + (set_attr "c86_attr" "movnt") (set_attr "prefix" "maybe_vex") (set_attr "mode" "")]) @@ -19888,6 +19994,7 @@ "TARGET_SSE2" "%vpmovmskb\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") + (set_attr "c86_attr" "movnt") (set (attr "prefix_data16") (if_then_else (match_test "TARGET_AVX") @@ -19905,6 +20012,7 @@ "TARGET_64BIT && TARGET_SSE2" "%vpmovmskb\t{%1, %k0|%k0, %1}" [(set_attr "type" "ssemov") + (set_attr "c86_attr" "movnt") (set (attr "prefix_data16") (if_then_else (match_test "TARGET_AVX") @@ -19922,6 +20030,7 @@ "TARGET_64BIT && TARGET_SSE2" "%vpmovmskb\t{%1, %k0|%k0, %1}" [(set_attr "type" "ssemov") + (set_attr "c86_attr" "movnt") (set (attr "prefix_data16") (if_then_else (match_test "TARGET_AVX") @@ -20093,6 +20202,7 @@ return "%vmaskmovdqu\t{%2, %1|%1, %2}"; } [(set_attr "type" "ssemov") + (set_attr "c86_attr" "blend") (set_attr "prefix_data16" "1") (set (attr "length_address") (symbol_ref ("Pmode != word_mode"))) @@ -20189,6 +20299,7 @@ "TARGET_AVX2" "vphw\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseiadd") + (set_attr "c86_attr" "hplus") (set_attr "prefix_extra" "1") (set_attr "prefix" "vex") (set_attr "mode" "OI")]) @@ -20214,6 +20325,7 @@ vphw\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sseiadd") + (set_attr "c86_attr" "hplus") (set_attr "atom_unit" "complex") (set_attr "prefix_data16" "1,*") (set_attr "prefix_extra" "1") @@ -20278,6 +20390,7 @@ "TARGET_AVX2" "vphd\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseiadd") + (set_attr "c86_attr" "hplus") (set_attr "prefix_extra" "1") (set_attr "prefix" "vex") (set_attr "mode" "OI")]) @@ -20301,6 +20414,7 @@ vphd\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sseiadd") + (set_attr "c86_attr" "hplus") (set_attr "atom_unit" "complex") (set_attr "prefix_data16" "1,*") (set_attr "prefix_extra" "1") @@ -20340,6 +20454,7 @@ } [(set_attr "mmx_isa" "native,sse_noavx,avx") (set_attr "type" "sseiadd") + (set_attr "c86_attr" "hplus") (set_attr "atom_unit" "complex") (set_attr "prefix_extra" "1") (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) @@ -20395,6 +20510,7 @@ "TARGET_AVX2" "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseiadd") + (set_attr "c86_attr" "madd") (set_attr "prefix_extra" "1") (set_attr "prefix" "vex") (set_attr "mode" "OI")]) @@ -20410,6 +20526,7 @@ "TARGET_AVX512BW" "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"; [(set_attr "type" "sseiadd") + (set_attr "c86_attr" "madd") (set_attr "prefix" "evex") (set_attr "mode" "XI")]) @@ -20485,6 +20602,7 @@ vpmaddubsw\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sseiadd") + (set_attr "c86_attr" "madd") (set_attr "atom_unit" "simul") (set_attr "prefix_data16" "1,*") (set_attr "prefix_extra" "1") @@ -20522,6 +20640,7 @@ [(set_attr "isa" "*,noavx,avx") (set_attr "mmx_isa" "native,*,*") (set_attr "type" "sseiadd") + (set_attr "c86_attr" "madd") (set_attr "atom_unit" "simul") (set_attr "prefix_extra" "1") (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) @@ -20805,6 +20924,7 @@ vpsign\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sselog1") + (set_attr "c86_attr" "sign") (set_attr "prefix_data16" "1,*") (set_attr "prefix_extra" "1") (set_attr "prefix" "orig,vex") @@ -20824,6 +20944,7 @@ [(set_attr "isa" "*,noavx,avx") (set_attr "mmx_isa" "native,*,*") (set_attr "type" "sselog1") + (set_attr "c86_attr" "sign") (set_attr "prefix_extra" "1") (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) (set_attr "mode" "DI,TI,TI")]) @@ -20952,6 +21073,7 @@ (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") (set_attr "prefix" "maybe_vex") + (set_attr "c86_attr" "abs") (set_attr "mode" "")]) (define_insn "abs2_mask" @@ -20964,6 +21086,7 @@ "TARGET_AVX512F" "vpabs\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" [(set_attr "type" "sselog1") + (set_attr "c86_attr" "abs") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -20977,6 +21100,7 @@ "TARGET_AVX512BW" "vpabs\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" [(set_attr "type" "sselog1") + (set_attr "c86_attr" "abs") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -21009,6 +21133,7 @@ "TARGET_SSE4A" "movnt\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") + (set_attr "c86_attr" "movnt") (set_attr "mode" "")]) (define_insn "sse4a_vmmovnt" @@ -21021,6 +21146,7 @@ "TARGET_SSE4A" "movnt\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") + (set_attr "c86_attr" "movnt") (set_attr "mode" "")]) (define_insn "sse4a_extrqi" @@ -21097,6 +21223,7 @@ vblend\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssemov") + (set_attr "c86_attr" "blend") (set_attr "length_immediate" "1") (set_attr "prefix_data16" "1,1,*") (set_attr "prefix_extra" "1") @@ -21117,6 +21244,7 @@ vblendv\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssemov") + (set_attr "c86_attr" "blendv") (set_attr "length_immediate" "1") (set_attr "prefix_data16" "1,1,*") (set_attr "prefix_extra" "1") @@ -21149,6 +21277,7 @@ } [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssemov") + (set_attr "c86_attr" "blendv") (set_attr "length_immediate" "1") (set_attr "prefix_data16" "1,1,*") (set_attr "prefix_extra" "1") @@ -21287,6 +21416,7 @@ "%vmovntdqa\t{%1, %0|%0, %1}" [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssemov") + (set_attr "c86_attr" "movnt") (set_attr "prefix_extra" "1,1,*") (set_attr "prefix" "orig,orig,maybe_evex") (set_attr "mode" "")]) @@ -21344,6 +21474,7 @@ vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssemov") + (set_attr "c86_attr" "blendv") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "*,*,1") (set_attr "prefix" "orig,orig,vex") @@ -21437,6 +21568,7 @@ vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssemov") + (set_attr "c86_attr" "blend") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "prefix" "orig,orig,vex") @@ -21508,6 +21640,7 @@ return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"; } [(set_attr "type" "ssemov") + (set_attr "c86_attr" "blend") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "prefix" "vex") @@ -21522,6 +21655,7 @@ "TARGET_AVX2" "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemov") + (set_attr "c86_attr" "blend") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "prefix" "vex") @@ -21546,6 +21680,7 @@ "TARGET_AVX2 && && " "vpmovbw\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") + (set_attr "c86_attr" "vpmovx") (set_attr "prefix_extra" "1") (set_attr "prefix" "maybe_evex") (set_attr "mode" "OI")]) @@ -21600,6 +21735,7 @@ "TARGET_AVX512BW" "vpmovbw\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") + (set_attr "c86_attr" "vpmovx") (set_attr "prefix_extra" "1") (set_attr "prefix" "evex") (set_attr "mode" "XI")]) @@ -21660,6 +21796,7 @@ "%vpmovbw\t{%1, %0|%0, %1}" [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssemov") + (set_attr "c86_attr" "vpmovx") (set_attr "prefix_extra" "1") (set_attr "prefix" "orig,orig,maybe_evex") (set_attr "mode" "TI")]) @@ -21672,6 +21809,7 @@ "%vpmovbw\t{%1, %0|%0, %1}" [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssemov") + (set_attr "c86_attr" "vpmovx") (set_attr "prefix_extra" "1") (set_attr "prefix" "orig,orig,maybe_evex") (set_attr "mode" "TI")]) @@ -21784,6 +21922,7 @@ "TARGET_AVX512F" "vpmovbd\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") + (set_attr "c86_attr" "vpmovx") (set_attr "prefix" "evex") (set_attr "mode" "XI")]) @@ -21805,6 +21944,7 @@ "TARGET_AVX2 && " "vpmovbd\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") + (set_attr "c86_attr" "vpmovx") (set_attr "prefix_extra" "1") (set_attr "prefix" "maybe_evex") (set_attr "mode" "OI")]) @@ -21816,6 +21956,7 @@ "TARGET_AVX2 && " "%vpmovbd\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") + (set_attr "c86_attr" "vpmovx") (set_attr "prefix_extra" "1") (set_attr "prefix" "maybe_evex") (set_attr "mode" "OI")]) @@ -21866,6 +22007,7 @@ "%vpmovbd\t{%1, %0|%0, %1}" [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssemov") + (set_attr "c86_attr" "vpmovx") (set_attr "prefix_extra" "1") (set_attr "prefix" "orig,orig,maybe_evex") (set_attr "mode" "TI")]) @@ -21878,6 +22020,7 @@ "%vpmovbd\t{%1, %0|%0, %1}" [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssemov") + (set_attr "c86_attr" "vpmovx") (set_attr "prefix_extra" "1") (set_attr "prefix" "orig,orig,maybe_evex") (set_attr "mode" "TI")]) @@ -21926,6 +22069,7 @@ "TARGET_AVX512F" "vpmovwd\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") + (set_attr "c86_attr" "vpmovx") (set_attr "prefix" "evex") (set_attr "mode" "XI")]) @@ -21979,6 +22123,7 @@ "TARGET_AVX2 && " "vpmovwd\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") + (set_attr "c86_attr" "vpmovx") (set_attr "prefix_extra" "1") (set_attr "prefix" "maybe_evex") (set_attr "mode" "OI")]) @@ -22038,6 +22183,7 @@ "%vpmovwd\t{%1, %0|%0, %1}" [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssemov") + (set_attr "c86_attr" "vpmovx") (set_attr "prefix_extra" "1") (set_attr "prefix" "orig,orig,maybe_evex") (set_attr "mode" "TI")]) @@ -22050,6 +22196,7 @@ "%vpmovwd\t{%1, %0|%0, %1}" [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssemov") + (set_attr "c86_attr" "vpmovx") (set_attr "prefix_extra" "1") (set_attr "prefix" "orig,orig,maybe_evex") (set_attr "mode" "TI")]) @@ -22161,6 +22308,7 @@ "TARGET_AVX512F" "vpmovbq\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") + (set_attr "c86_attr" "vpmovx") (set_attr "prefix" "evex") (set_attr "mode" "XI")]) @@ -22171,6 +22319,7 @@ "TARGET_AVX512F" "vpmovbq\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") + (set_attr "c86_attr" "vpmovx") (set_attr "prefix" "evex") (set_attr "mode" "XI")]) @@ -22218,6 +22367,7 @@ "TARGET_AVX2 && " "vpmovbq\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") + (set_attr "c86_attr" "vpmovx") (set_attr "prefix_extra" "1") (set_attr "prefix" "maybe_evex") (set_attr "mode" "OI")]) @@ -22229,6 +22379,7 @@ "TARGET_AVX2 && " "vpmovbq\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") + (set_attr "c86_attr" "vpmovx") (set_attr "prefix_extra" "1") (set_attr "prefix" "maybe_evex") (set_attr "mode" "OI")]) @@ -22280,6 +22431,7 @@ "%vpmovbq\t{%1, %0|%0, %1}" [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssemov") + (set_attr "c86_attr" "vpmovx") (set_attr "prefix_extra" "1") (set_attr "prefix" "orig,orig,maybe_evex") (set_attr "mode" "TI")]) @@ -22303,6 +22455,7 @@ "TARGET_AVX512F" "vpmovwq\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") + (set_attr "c86_attr" "vpmovx") (set_attr "prefix" "evex") (set_attr "mode" "XI")]) @@ -22322,6 +22475,7 @@ "TARGET_AVX2 && " "vpmovwq\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") + (set_attr "c86_attr" "vpmovx") (set_attr "prefix_extra" "1") (set_attr "prefix" "maybe_evex") (set_attr "mode" "OI")]) @@ -22333,6 +22487,7 @@ "TARGET_AVX2 && " "vpmovwq\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") + (set_attr "c86_attr" "vpmovx") (set_attr "prefix_extra" "1") (set_attr "prefix" "maybe_evex") (set_attr "mode" "OI")]) @@ -22380,6 +22535,7 @@ "%vpmovwq\t{%1, %0|%0, %1}" [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssemov") + (set_attr "c86_attr" "vpmovx") (set_attr "prefix_extra" "1") (set_attr "prefix" "orig,orig,maybe_evex") (set_attr "mode" "TI")]) @@ -22392,6 +22548,7 @@ "%vpmovwq\t{%1, %0|%0, %1}" [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssemov") + (set_attr "c86_attr" "vpmovx") (set_attr "prefix_extra" "1") (set_attr "prefix" "orig,orig,maybe_evex") (set_attr "mode" "TI")]) @@ -22439,6 +22596,7 @@ "TARGET_AVX512F" "vpmovdq\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") + (set_attr "c86_attr" "vpmovx") (set_attr "prefix" "evex") (set_attr "mode" "XI")]) @@ -22490,6 +22648,7 @@ "TARGET_AVX2 && " "vpmovdq\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") + (set_attr "c86_attr" "vpmovx") (set_attr "prefix" "maybe_evex") (set_attr "prefix_extra" "1") (set_attr "mode" "OI")]) @@ -22545,6 +22704,7 @@ "%vpmovdq\t{%1, %0|%0, %1}" [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssemov") + (set_attr "c86_attr" "vpmovx") (set_attr "prefix_extra" "1") (set_attr "prefix" "orig,orig,maybe_evex") (set_attr "mode" "TI")]) @@ -22557,6 +22717,7 @@ "%vpmovdq\t{%1, %0|%0, %1}" [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssemov") + (set_attr "c86_attr" "vpmovx") (set_attr "prefix_extra" "1") (set_attr "prefix" "orig,orig,maybe_evex") (set_attr "mode" "TI")]) @@ -22823,6 +22984,7 @@ vrndscale\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "isa" "noavx,noavx,avx,avx512f") (set_attr "type" "ssecvt") + (set_attr "c86_attr" "aes") (set_attr "length_immediate" "1") (set_attr "prefix_data16" "1,1,*,*") (set_attr "prefix_extra" "1") @@ -22847,6 +23009,7 @@ vrndscale\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "isa" "noavx,noavx,avx,avx512f") (set_attr "type" "ssecvt") + (set_attr "c86_attr" "aes") (set_attr "length_immediate" "1") (set_attr "prefix_data16" "1,1,*,*") (set_attr "prefix_extra" "1") @@ -23021,6 +23184,7 @@ "TARGET_SSE4_2" "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}" [(set_attr "type" "sselog") + (set_attr "c86_attr" "cmpestr") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") (set_attr "prefix" "maybe_vex") @@ -23049,6 +23213,7 @@ "TARGET_SSE4_2" "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}" [(set_attr "type" "sselog") + (set_attr "c86_attr" "cmpestr") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") @@ -23075,6 +23240,7 @@ %vpcmpestri\t{%6, %4, %2|%2, %4, %6} %vpcmpestri\t{%6, %4, %2|%2, %4, %6}" [(set_attr "type" "sselog") + (set_attr "c86_attr" "cmpestr") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") @@ -23150,6 +23316,7 @@ "TARGET_SSE4_2" "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}" [(set_attr "type" "sselog") + (set_attr "c86_attr" "cmpestr") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") @@ -23174,6 +23341,7 @@ "TARGET_SSE4_2" "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}" [(set_attr "type" "sselog") + (set_attr "c86_attr" "cmpestr") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") @@ -23198,6 +23366,7 @@ %vpcmpistri\t{%4, %3, %2|%2, %3, %4} %vpcmpistri\t{%4, %3, %2|%2, %3, %4}" [(set_attr "type" "sselog") + (set_attr "c86_attr" "cmpestr") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") @@ -23609,7 +23778,8 @@ (const_int 13) (const_int 15)])))))] "TARGET_XOP" "vphaddbw\t{%1, %0|%0, %1}" - [(set_attr "type" "sseiadd1")]) + [(set_attr "type" "sseiadd1") + (set_attr "c86_attr" "hplus")]) (define_insn "xop_phaddbd" [(set (match_operand:V4SI 0 "register_operand" "=x") @@ -23638,7 +23808,8 @@ (const_int 11) (const_int 15)]))))))] "TARGET_XOP" "vphaddbd\t{%1, %0|%0, %1}" - [(set_attr "type" "sseiadd1")]) + [(set_attr "type" "sseiadd1") + (set_attr "c86_attr" "hplus")]) (define_insn "xop_phaddbq" [(set (match_operand:V2DI 0 "register_operand" "=x") @@ -23683,7 +23854,8 @@ (parallel [(const_int 7) (const_int 15)])))))))] "TARGET_XOP" "vphaddbq\t{%1, %0|%0, %1}" - [(set_attr "type" "sseiadd1")]) + [(set_attr "type" "sseiadd1") + (set_attr "c86_attr" "hplus")]) (define_insn "xop_phaddwd" [(set (match_operand:V4SI 0 "register_operand" "=x") @@ -23700,7 +23872,8 @@ (const_int 5) (const_int 7)])))))] "TARGET_XOP" "vphaddwd\t{%1, %0|%0, %1}" - [(set_attr "type" "sseiadd1")]) + [(set_attr "type" "sseiadd1") + (set_attr "c86_attr" "hplus")]) (define_insn "xop_phaddwq" [(set (match_operand:V2DI 0 "register_operand" "=x") @@ -23725,7 +23898,8 @@ (parallel [(const_int 3) (const_int 7)]))))))] "TARGET_XOP" "vphaddwq\t{%1, %0|%0, %1}" - [(set_attr "type" "sseiadd1")]) + [(set_attr "type" "sseiadd1") + (set_attr "c86_attr" "hplus")]) (define_insn "xop_phadddq" [(set (match_operand:V2DI 0 "register_operand" "=x") @@ -23740,7 +23914,8 @@ (parallel [(const_int 1) (const_int 3)])))))] "TARGET_XOP" "vphadddq\t{%1, %0|%0, %1}" - [(set_attr "type" "sseiadd1")]) + [(set_attr "type" "sseiadd1") + (set_attr "c86_attr" "hplus")]) (define_insn "xop_phsubbw" [(set (match_operand:V8HI 0 "register_operand" "=x") @@ -23761,7 +23936,8 @@ (const_int 13) (const_int 15)])))))] "TARGET_XOP" "vphsubbw\t{%1, %0|%0, %1}" - [(set_attr "type" "sseiadd1")]) + [(set_attr "type" "sseiadd1") + (set_attr "c86_attr" "hplus")]) (define_insn "xop_phsubwd" [(set (match_operand:V4SI 0 "register_operand" "=x") @@ -23778,7 +23954,8 @@ (const_int 5) (const_int 7)])))))] "TARGET_XOP" "vphsubwd\t{%1, %0|%0, %1}" - [(set_attr "type" "sseiadd1")]) + [(set_attr "type" "sseiadd1") + (set_attr "c86_attr" "hplus")]) (define_insn "xop_phsubdq" [(set (match_operand:V2DI 0 "register_operand" "=x") @@ -23793,7 +23970,8 @@ (parallel [(const_int 1) (const_int 3)])))))] "TARGET_XOP" "vphsubdq\t{%1, %0|%0, %1}" - [(set_attr "type" "sseiadd1")]) + [(set_attr "type" "sseiadd1") + (set_attr "c86_attr" "hplus")]) ;; XOP permute instructions (define_insn "xop_pperm" @@ -24575,6 +24753,7 @@ vaesenc\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sselog1") + (set_attr "c86_attr" "aes") (set_attr "prefix_extra" "1") (set_attr "prefix" "orig,vex") (set_attr "btver2_decode" "double,double") @@ -24591,6 +24770,7 @@ vaesenclast\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sselog1") + (set_attr "c86_attr" "aes") (set_attr "prefix_extra" "1") (set_attr "prefix" "orig,vex") (set_attr "btver2_decode" "double,double") @@ -24607,6 +24787,7 @@ vaesdec\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sselog1") + (set_attr "c86_attr" "aes") (set_attr "prefix_extra" "1") (set_attr "prefix" "orig,vex") (set_attr "btver2_decode" "double,double") @@ -24623,6 +24804,7 @@ vaesdeclast\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sselog1") + (set_attr "c86_attr" "aes") (set_attr "prefix_extra" "1") (set_attr "prefix" "orig,vex") (set_attr "btver2_decode" "double,double") @@ -24635,6 +24817,7 @@ "TARGET_AES" "%vaesimc\t{%1, %0|%0, %1}" [(set_attr "type" "sselog1") + (set_attr "c86_attr" "aes") (set_attr "prefix_extra" "1") (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) @@ -24647,6 +24830,7 @@ "TARGET_AES" "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sselog1") + (set_attr "c86_attr" "aes") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "prefix" "maybe_vex") @@ -24783,6 +24967,7 @@ return "vperm\t{%1, %2, %0|%0, %2, %1}"; } [(set_attr "type" "sselog") + (set_attr "c86_attr" "perm") (set_attr "prefix" "") (set_attr "mode" "")]) @@ -24795,6 +24980,7 @@ "TARGET_AVX512VBMI && " "vperm\t{%1, %2, %0|%0, %2, %1}" [(set_attr "type" "sselog") + (set_attr "c86_attr" "perm") (set_attr "prefix" "") (set_attr "mode" "")]) @@ -24807,6 +24993,7 @@ "TARGET_AVX512BW && " "vperm\t{%1, %2, %0|%0, %2, %1}" [(set_attr "type" "sselog") + (set_attr "c86_attr" "perm") (set_attr "prefix" "") (set_attr "mode" "")]) @@ -25025,6 +25212,7 @@ return "vperm\t{%2, %1, %0|%0, %1, %2}"; } [(set_attr "type" "sselog") + (set_attr "c86_attr" "perm") (set_attr "prefix" "") (set_attr "mode" "")]) @@ -25100,6 +25288,7 @@ return "vperm\t{%2, %1, %0|%0, %1, %2}"; } [(set_attr "type" "sselog") + (set_attr "c86_attr" "perm") (set_attr "prefix" "") (set_attr "mode" "")]) @@ -25182,6 +25371,7 @@ vshuf32x4\t{$0x0, %g1, %g1, %0|%0, %g1, %g1, 0x0} vbroadcast32x4\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") + (set_attr "c86_attr" "shufx,*") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -25194,6 +25384,7 @@ vshuf64x2\t{$0x44, %g1, %g1, %0|%0, %g1, %g1, 0x44} vbroadcast64x4\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") + (set_attr "c86_attr" "shufx,*") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -25388,6 +25579,7 @@ vinsert32x4\t{$1, %1, %0, %0|%0, %0, %1, 1}" [(set_attr "isa" "*,*,*,avx512dq,avx512dq,avx512vl,avx512vl") (set_attr "type" "ssemov,sselog1,sselog1,ssemov,sselog1,ssemov,sselog1") + (set_attr "c86_attr" "*,insertx,*,*,insertx,*,insertx") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "0,1,1,0,1,0,1") (set_attr "prefix" "vex,vex,vex,evex,evex,evex,evex") @@ -25427,6 +25619,7 @@ vshuf32x4\t{$0x0, %t1, %t1, %0|%0, %t1, %t1, 0x0} vbroadcast32x4\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") + (set_attr "c86_attr" "shufx,*") (set_attr "prefix_extra" "1") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -25440,6 +25633,7 @@ vshuf32x4\t{$0x44, %g1, %g1, %0|%0, %g1, %g1, 0x44} vbroadcast32x8\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") + (set_attr "c86_attr" "shufx,*") (set_attr "prefix_extra" "1") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -25457,6 +25651,7 @@ vshuf64x2\t{$0x0, %1, %1, %0|%0, %1, %1, 0x0} vbroadcast64x2\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") + (set_attr "c86_attr" "shufx,*") (set_attr "prefix_extra" "1") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -25546,6 +25741,7 @@ "TARGET_AVX512F" "vpermi2\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}" [(set_attr "type" "sselog") + (set_attr "c86_attr" "perm2") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -25562,6 +25758,7 @@ "TARGET_AVX512F" "vpermi2\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}" [(set_attr "type" "sselog") + (set_attr "c86_attr" "perm2") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -25591,6 +25788,7 @@ vpermt2\t{%3, %1, %0|%0, %1, %3} vpermi2\t{%3, %2, %0|%0, %2, %3}" [(set_attr "type" "sselog") + (set_attr "c86_attr" "perm2") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -25607,6 +25805,7 @@ "TARGET_AVX512F" "vpermt2\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}" [(set_attr "type" "sselog") + (set_attr "c86_attr" "perm2") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -25682,6 +25881,7 @@ return "vperm2\t{%3, %2, %1, %0|%0, %1, %2, %3}"; } [(set_attr "type" "sselog") + (set_attr "c86_attr" "insertx") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "prefix" "vex") @@ -25786,6 +25986,7 @@ return "vinsert\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"; } [(set_attr "type" "sselog") + (set_attr "c86_attr" "insertx") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "prefix" "vex") @@ -25808,6 +26009,7 @@ return "vinsert\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"; } [(set_attr "type" "sselog") + (set_attr "c86_attr" "insertx") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "prefix" "vex") @@ -25829,6 +26031,7 @@ return "vinsert\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"; } [(set_attr "type" "sselog") + (set_attr "c86_attr" "insertx") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "prefix" "vex") @@ -25850,6 +26053,7 @@ return "vinsert\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"; } [(set_attr "type" "sselog") + (set_attr "c86_attr" "insertx") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "prefix" "vex") @@ -25870,6 +26074,7 @@ vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0} vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" [(set_attr "type" "sselog") + (set_attr "c86_attr" "insertx") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "prefix" "vex,evex") @@ -25890,6 +26095,7 @@ vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1} vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" [(set_attr "type" "sselog") + (set_attr "c86_attr" "insertx") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "prefix" "vex,evex") @@ -25914,6 +26120,7 @@ vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0} vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" [(set_attr "type" "sselog") + (set_attr "c86_attr" "insertx") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "prefix" "vex,evex") @@ -25938,6 +26145,7 @@ vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1} vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" [(set_attr "type" "sselog") + (set_attr "c86_attr" "insertx") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "prefix" "vex,evex") @@ -25967,6 +26175,7 @@ "TARGET_AVX" "vmaskmov\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sselog1") + (set_attr "c86_attr" "blend") (set_attr "prefix_extra" "1") (set_attr "prefix" "vex") (set_attr "btver2_decode" "vector") @@ -26316,6 +26525,7 @@ } } [(set_attr "type" "sselog,sselog,ssemov,ssemov") + (set_attr "c86_attr" "insertx,insertx,*,*") (set_attr "prefix_extra" "1,1,*,*") (set_attr "length_immediate" "1,1,*,*") (set_attr "prefix" "maybe_evex") @@ -26894,6 +27104,7 @@ "TARGET_AVX512F" "vcompress\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" [(set_attr "type" "ssemov") + (set_attr "c86_attr" "compress") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -26907,6 +27118,7 @@ "TARGET_AVX512VBMI2" "vpcompress\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" [(set_attr "type" "ssemov") + (set_attr "c86_attr" "compress") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -26920,6 +27132,7 @@ "TARGET_AVX512F" "vcompress\t{%1, %0%{%2%}|%0%{%2%}, %1}" [(set_attr "type" "ssemov") + (set_attr "c86_attr" "compress") (set_attr "prefix" "evex") (set_attr "memory" "store") (set_attr "mode" "")]) @@ -26934,6 +27147,7 @@ "TARGET_AVX512VBMI2" "vpcompress\t{%1, %0%{%2%}|%0%{%2%}, %1}" [(set_attr "type" "ssemov") + (set_attr "c86_attr" "compress") (set_attr "prefix" "evex") (set_attr "memory" "store") (set_attr "mode" "")]) @@ -26958,6 +27172,7 @@ "TARGET_AVX512F" "vexpand\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" [(set_attr "type" "ssemov") + (set_attr "c86_attr" "expand") (set_attr "prefix" "evex") (set_attr "memory" "none,load") (set_attr "mode" "")]) @@ -26972,6 +27187,7 @@ "TARGET_AVX512VBMI2" "vexpand\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" [(set_attr "type" "ssemov") + (set_attr "c86_attr" "expand") (set_attr "prefix" "evex") (set_attr "memory" "none,load") (set_attr "mode" "")]) @@ -27164,6 +27380,7 @@ "TARGET_AVX512BW" "vdbpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "sselog1") + (set_attr "c86_attr" "sadbw") (set_attr "length_immediate" "1") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -27175,6 +27392,7 @@ "TARGET_AVX512CD" "vplzcnt\t{%1, %0|%0, %1}" [(set_attr "type" "sse") + (set_attr "c86_attr" "abs") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -27351,6 +27569,7 @@ "TARGET_AVX512IFMA" "vpmadd52\t{%3, %2, %0|%0, %2, %3}" [(set_attr "type" "ssemuladd") + (set_attr "c86_attr" "madd") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -27367,6 +27586,7 @@ "TARGET_AVX512IFMA" "vpmadd52\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}" [(set_attr "type" "ssemuladd") + (set_attr "c86_attr" "madd") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -28223,7 +28443,7 @@ UNSPEC_VAESDEC))] "TARGET_VAES" "vaesdec\t{%2, %1, %0|%0, %1, %2}" -) + [(set_attr "c86_attr" "aes")]) (define_insn "vaesdeclast_" [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v") @@ -28233,7 +28453,7 @@ UNSPEC_VAESDECLAST))] "TARGET_VAES" "vaesdeclast\t{%2, %1, %0|%0, %1, %2}" -) + [(set_attr "c86_attr" "aes")]) (define_insn "vaesenc_" [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v") @@ -28243,7 +28463,7 @@ UNSPEC_VAESENC))] "TARGET_VAES" "vaesenc\t{%2, %1, %0|%0, %1, %2}" -) + [(set_attr "c86_attr" "aes")]) (define_insn "vaesenclast_" [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v") @@ -28253,7 +28473,7 @@ UNSPEC_VAESENCLAST))] "TARGET_VAES" "vaesenclast\t{%2, %1, %0|%0, %1, %2}" -) + [(set_attr "c86_attr" "aes")]) (define_insn "vpclmulqdq_" [(set (match_operand:VI8_FVL 0 "register_operand" "=v") diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index c67e002809d7010c180bcfa0d43eab0e34bd0ec5..d2cc3ff2f471c75236a68113e8a2cbe2acbb1f8a 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -3808,3 +3808,279 @@ struct processor_costs core_cost = { "16", /* Func alignment. */ }; +/* C86_4G_M4 has optimized REP instruction for medium sized blocks, but for + very small blocks it is better to use loop. For large blocks, libcall + can do nontemporary accesses and beat inline considerably. */ +static stringop_algs c86_4g_m4_memcpy[2] = { + /* 32-bit tuning. */ + {libcall, {{6, loop, false}, + {14, unrolled_loop, false}, + {-1, libcall, false}}}, + /* 64-bit tuning. */ + {libcall, {{16, loop, false}, + {128, rep_prefix_8_byte, false}, + {-1, libcall, false}}}}; +static stringop_algs c86_4g_m4_memset[2] = { + /* 32-bit tuning. */ + {libcall, {{8, loop, false}, + {24, unrolled_loop, false}, + {128, rep_prefix_4_byte, false}, + {-1, libcall, false}}}, + /* 64-bit tuning. */ + {libcall, {{48, unrolled_loop, false}, + {128, rep_prefix_8_byte, false}, + {-1, libcall, false}}}}; +static const +struct processor_costs c86_4g_m4_cost = { + { + /* Start of register allocator costs. integer->integer move cost is 2. */ + + /* reg-reg moves are done by renaming and thus they are even cheaper than + 1 cycle. Because reg-reg move cost is 2 and the following tables + correspond to doubles of latencies, we do not model this correctly. + It does not seem to make practical difference to bump prices up even + more. */ + 6, /* cost for loading QImode using + movzbl. */ + {6, 6, 6}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {8, 8, 8}, /* cost of storing integer + registers. */ + 2, /* cost of reg,reg fld/fst. */ + {6, 6, 16}, /* cost of loading fp registers + in SFmode, DFmode and XFmode. */ + {8, 8, 16}, /* cost of storing fp registers + in SFmode, DFmode and XFmode. */ + 2, /* cost of moving MMX register. */ + {6, 6}, /* cost of loading MMX registers + in SImode and DImode. */ + {8, 8}, /* cost of storing MMX registers + in SImode and DImode. */ + 2, 3, 6, /* cost of moving XMM,YMM,ZMM + register. */ + {6, 6, 6, 12, 24}, /* cost of loading SSE registers + in 32,64,128,256 and 512-bit. */ + {8, 8, 8, 16, 32}, /* cost of storing SSE registers + in 32,64,128,256 and 512-bit. */ + 6, 6, /* SSE->integer and integer->SSE + moves. */ + 8, 8, /* mask->integer and integer->mask + moves. */ + {6, 6, 6}, /* cost of loading mask register + in QImode, HImode, SImode. */ + {8, 8, 8}, /* cost if storing mask register + in QImode, HImode, SImode. */ + 2, /* cost of moving mask register. */ + /* End of register allocator costs. */ + }, + + COSTS_N_INSNS (1), /* cost of an add instruction. */ + COSTS_N_INSNS (1), /* cost of a lea instruction. */ + COSTS_N_INSNS (1), /* variable shift costs. */ + COSTS_N_INSNS (1), /* constant shift costs. */ + {COSTS_N_INSNS (3), /* cost of starting multiply for QI. */ + COSTS_N_INSNS (3), /* HI. */ + COSTS_N_INSNS (3), /* SI. */ + COSTS_N_INSNS (3), /* DI. */ + COSTS_N_INSNS (3)}, /* other. */ + 0, /* cost of multiply per each bit + set. */ + /* Depending on parameters, idiv can get faster on HYGON. This is upper + bound. */ + {COSTS_N_INSNS (16), /* cost of a divide/mod for QI. */ + COSTS_N_INSNS (22), /* HI. */ + COSTS_N_INSNS (30), /* SI. */ + COSTS_N_INSNS (45), /* DI. */ + COSTS_N_INSNS (45)}, /* other. */ + COSTS_N_INSNS (1), /* cost of movsx. */ + COSTS_N_INSNS (1), /* cost of movzx. */ + 8, /* "large" insn. */ + 9, /* MOVE_RATIO. */ + 6, /* CLEAR_RATIO. */ + {6, 6, 6}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {8, 8, 8}, /* cost of storing integer + registers. */ + {6, 6, 6, 12, 24}, /* cost of loading SSE register + in 32bit, 64bit, 128bit, 256bit + and 512bit. */ + {8, 8, 8, 16, 32}, /* cost of storing SSE register + in 32bit, 64bit, 128bit, 256bit + and 512bit. */ + {6, 6, 6, 12, 24}, /* cost of unaligned loads. */ + {8, 8, 8, 16, 32}, /* cost of unaligned stores. */ + 2, 3, 6, /* cost of moving XMM,YMM,ZMM + register. */ + 6, /* cost of moving SSE register to + integer. */ + + 18, 8, /* Gather load static, per_elt. */ + 18, 10, /* Gather store static, per_elt. */ + 32, /* size of l1 cache. */ + 512, /* size of l2 cache. */ + 64, /* size of prefetch block. */ + /* C86_4G_M4 processors never drop prefetches; if they cannot be performed + immediately, they are queued. We set number of simultaneous prefetches + to a large constant to reflect this (it probably is not a good idea not + to limit number of prefetches at all, as their execution also takes some + time). */ + 100, /* number of parallel prefetches. */ + 3, /* Branch cost. */ + COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (5), /* cost of FMUL instruction. */ + + COSTS_N_INSNS (15), /* cost of FDIV instruction. */ + COSTS_N_INSNS (1), /* cost of FABS instruction. */ + COSTS_N_INSNS (1), /* cost of FCHS instruction. */ + + COSTS_N_INSNS (10), /* cost of FSQRT instruction. */ + + COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ + COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */ + COSTS_N_INSNS (3), /* cost of MULSS instruction. */ + COSTS_N_INSNS (4), /* cost of MULSD instruction. */ + COSTS_N_INSNS (5), /* cost of FMA SS instruction. */ + COSTS_N_INSNS (5), /* cost of FMA SD instruction. */ + COSTS_N_INSNS (10), /* cost of DIVSS instruction. */ + + COSTS_N_INSNS (13), /* cost of DIVSD instruction. */ + COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */ + COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */ + + 4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */ + c86_4g_m4_memcpy, + c86_4g_m4_memset, + COSTS_N_INSNS (4), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */ + "16", /* Loop alignment. */ + "16", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ +}; + +struct processor_costs c86_4g_m6_cost = c86_4g_m4_cost; + +struct processor_costs c86_4g_m7_cost = { + { + /* Start of register allocator costs. integer->integer move cost is 2. */ + + /* reg-reg moves are done by renaming and thus they are even cheaper than + 1 cycle. Because reg-reg move cost is 2 and following tables correspond + to doubles of latencies, we do not model this correctly. It does not + seem to make practical difference to bump prices up even more. */ + 6, /* cost for loading QImode using + movzbl. */ + {6, 6, 6}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {8, 8, 8}, /* cost of storing integer + registers. */ + 2, /* cost of reg,reg fld/fst. */ + {14, 14, 17}, /* cost of loading fp registers + in SFmode, DFmode and XFmode. */ + {12, 12, 16}, /* cost of storing fp registers + in SFmode, DFmode and XFmode. */ + 2, /* cost of moving MMX register. */ + {6, 6}, /* cost of loading MMX registers + in SImode and DImode. */ + {8, 8}, /* cost of storing MMX registers + in SImode and DImode. */ + 2, 2, 3, /* cost of moving XMM,YMM,ZMM + register. */ + {6, 6, 10, 10, 12}, /* cost of loading SSE registers + in 32,64,128,256 and 512-bit. */ + {8, 8, 8, 12, 12}, /* cost of storing SSE registers + in 32,64,128,256 and 512-bit. */ + 6, 8, /* SSE->integer and integer->SSE + moves. */ + 8, 8, /* mask->integer and integer->mask + moves. */ + {6, 6, 6}, /* cost of loading mask register + in QImode, HImode, SImode. */ + {8, 8, 8}, /* cost if storing mask register + in QImode, HImode, SImode. */ + 2, /* cost of moving mask register. */ + /* End of register allocator costs. */ + }, + + COSTS_N_INSNS (1), /* cost of an add instruction. */ + + COSTS_N_INSNS (1), /* cost of a lea instruction. */ + COSTS_N_INSNS (1), /* variable shift costs. */ + COSTS_N_INSNS (1), /* constant shift costs. */ + {COSTS_N_INSNS (3), /* cost of starting multiply for QI. */ + COSTS_N_INSNS (3), /* HI. */ + COSTS_N_INSNS (3), /* SI. */ + COSTS_N_INSNS (3), /* DI. */ + COSTS_N_INSNS (3)}, /* other. */ + 0, /* cost of multiply per each bit + set. */ + {COSTS_N_INSNS (15), /* cost of a divide/mod for QI. */ + COSTS_N_INSNS (17), /* HI. */ + COSTS_N_INSNS (25), /* SI. */ + COSTS_N_INSNS (41), /* DI. */ + COSTS_N_INSNS (41)}, /* other. */ + COSTS_N_INSNS (1), /* cost of movsx. */ + COSTS_N_INSNS (1), /* cost of movzx. */ + 8, /* "large" insn. */ + 9, /* MOVE_RATIO. */ + 6, /* CLEAR_RATIO. */ + {6, 6, 6}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {8, 8, 8}, /* cost of storing integer + registers. */ + {6, 6, 10, 10, 12}, /* cost of loading SSE registers + in 32bit, 64bit, 128bit, 256bit + and 512bit. */ + {8, 8, 8, 12, 12}, /* cost of storing SSE register + in 32bit, 64bit, 128bit, 256bit and + 512bit. */ + {6, 6, 10, 10, 12}, /* cost of unaligned loads. */ + {8, 8, 8, 12, 12}, /* cost of unaligned stores. */ + 2, 2, 3, /* cost of moving XMM,YMM,ZMM + register. */ + 6, /* cost of moving SSE register to + integer. */ + + 14, 10, /* Gather load static, per_elt. */ + 14, 20, /* Gather store static, per_elt. */ + 32, /* size of l1 cache. */ + 512, /* size of l2 cache. */ + 64, /* size of prefetch block. */ + + 100, /* number of parallel prefetches. */ + 3, /* Branch cost. */ + COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (5), /* cost of FMUL instruction. */ + + COSTS_N_INSNS (15), /* cost of FDIV instruction. */ + COSTS_N_INSNS (1), /* cost of FABS instruction. */ + COSTS_N_INSNS (1), /* cost of FCHS instruction. */ + + COSTS_N_INSNS (22), /* cost of FSQRT instruction. */ + + COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ + COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */ + COSTS_N_INSNS (3), /* cost of MULSS instruction. */ + COSTS_N_INSNS (3), /* cost of MULSD instruction. */ + COSTS_N_INSNS (4), /* cost of FMA SS instruction. */ + COSTS_N_INSNS (4), /* cost of FMA SD instruction. */ + COSTS_N_INSNS (13), /* cost of DIVSS instruction. */ + + COSTS_N_INSNS (10), /* cost of DIVSD instruction. */ + COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */ + COSTS_N_INSNS (20), /* cost of SQRTSD instruction. */ + + 4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */ + c86_4g_m4_memcpy, + c86_4g_m4_memset, + COSTS_N_INSNS (4), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */ + "16", /* Loop alignment. */ + "16", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ +}; diff --git a/gcc/config/i386/x86-tune-sched.cc b/gcc/config/i386/x86-tune-sched.cc index 13b1ba43fe35f2edcc733fe0f3a5e717eb598c17..a02abdef3f144e69c44023ae6ed53e6945e39695 100644 --- a/gcc/config/i386/x86-tune-sched.cc +++ b/gcc/config/i386/x86-tune-sched.cc @@ -78,6 +78,9 @@ ix86_issue_rate (void) case PROCESSOR_YONGFENG: case PROCESSOR_SHIJIDADAO: case PROCESSOR_GENERIC: + case PROCESSOR_C86_4G_M4: + case PROCESSOR_C86_4G_M6: + case PROCESSOR_C86_4G_M7: return 4; default: @@ -404,6 +407,9 @@ ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost, case PROCESSOR_ZNVER2: case PROCESSOR_ZNVER3: case PROCESSOR_ZNVER4: + case PROCESSOR_C86_4G_M4: + case PROCESSOR_C86_4G_M6: + case PROCESSOR_C86_4G_M7: /* Stack engine allows to execute push&pop instructions in parall. */ if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP) && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP)) diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index c57fc972f6733ee364cb423fbaa9f1d56c31696e..31cdfdf1f9a5e1d78cee2b94a724024d3f749c93 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -42,7 +42,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see DEF_TUNE (X86_TUNE_SCHEDULE, "schedule", m_PENT | m_LAKEMONT | m_PPRO | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL | m_KNL | m_KNM | m_K6_GEODE | m_AMD_MULTIPLE | m_GOLDMONT - | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC) + | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_C86_4G + | m_GENERIC) /* X86_TUNE_PARTIAL_REG_DEPENDENCY: Enable more register renaming on modern chips. Prefer stores affecting whole integer register @@ -52,7 +53,7 @@ DEF_TUNE (X86_TUNE_PARTIAL_REG_DEPENDENCY, "partial_reg_dependency", m_P4_NOCONA | m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_BONNELL | m_SILVERMONT | m_GOLDMONT | m_GOLDMONT_PLUS | m_INTEL | m_KNL | m_KNM | m_AMD_MULTIPLE | m_TREMONT | m_ALDERLAKE - | m_ZHAOXIN | m_GENERIC) + | m_ZHAOXIN | m_C86_4G | m_GENERIC) /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: This knob promotes all store destinations to be 128bit to allow register renaming on 128bit SSE units, @@ -62,7 +63,8 @@ DEF_TUNE (X86_TUNE_PARTIAL_REG_DEPENDENCY, "partial_reg_dependency", that can be partly masked by careful scheduling of moves. */ DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY, "sse_partial_reg_dependency", m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10 - | m_BDVER | m_ZNVER | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC) + | m_BDVER | m_ZNVER | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_C86_4G + | m_GENERIC) /* X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY: This knob avoids partial write to the destination in scalar SSE conversion from FP @@ -70,14 +72,14 @@ DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY, "sse_partial_reg_dependency", DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY, "sse_partial_reg_fp_converts_dependency", m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10 - | m_BDVER | m_ZNVER | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC) + | m_BDVER | m_ZNVER | m_ALDERLAKE | m_ZHAOXIN | m_C86_4G | m_GENERIC) /* X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY: This knob avoids partial write to the destination in scalar SSE conversion from integer to FP. */ DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY, "sse_partial_reg_converts_dependency", m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10 - | m_BDVER | m_ZNVER | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC) + | m_BDVER | m_ZNVER | m_ALDERLAKE | m_ZHAOXIN | m_C86_4G | m_GENERIC) /* X86_TUNE_DEST_FALSE_DEP_FOR_GLC: This knob inserts zero-idiom before several insns to break false dependency on the dest register for GLC @@ -109,32 +111,32 @@ DEF_TUNE (X86_TUNE_MOVX, "movx", m_PPRO | m_P4_NOCONA | m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_BONNELL | m_SILVERMONT | m_GOLDMONT | m_KNL | m_KNM | m_INTEL | m_GOLDMONT_PLUS | m_GEODE | m_AMD_MULTIPLE | m_CORE_AVX2 - | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC) + | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_C86_4G | m_GENERIC) /* X86_TUNE_MEMORY_MISMATCH_STALL: Avoid partial stores that are followed by full sized loads. */ DEF_TUNE (X86_TUNE_MEMORY_MISMATCH_STALL, "memory_mismatch_stall", m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL | m_KNL | m_KNM | m_GOLDMONT | m_GOLDMONT_PLUS | m_AMD_MULTIPLE - | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC) + | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_C86_4G | m_GENERIC) /* X86_TUNE_FUSE_CMP_AND_BRANCH_32: Fuse compare with a subsequent conditional jump instruction for 32 bit TARGET. */ DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_32, "fuse_cmp_and_branch_32", - m_CORE_ALL | m_BDVER | m_ZNVER | m_ZHAOXIN | m_GENERIC) + m_CORE_ALL | m_BDVER | m_ZNVER | m_ZHAOXIN | m_C86_4G | m_GENERIC) /* X86_TUNE_FUSE_CMP_AND_BRANCH_64: Fuse compare with a subsequent conditional jump instruction for TARGET_64BIT. */ DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_64, "fuse_cmp_and_branch_64", m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_BDVER - | m_ZNVER | m_ZHAOXIN | m_GENERIC) + | m_ZNVER | m_ZHAOXIN | m_C86_4G | m_GENERIC) /* X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS: Fuse compare with a subsequent conditional jump instruction when the condition jump check sign flag (SF) or overflow flag (OF). */ DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS, "fuse_cmp_and_branch_soflags", m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_BDVER - | m_ZNVER | m_ZHAOXIN | m_GENERIC) + | m_ZNVER | m_ZHAOXIN | m_C86_4G | m_GENERIC) /* X86_TUNE_FUSE_ALU_AND_BRANCH: Fuse alu with a subsequent conditional jump instruction when the alu instruction produces the CCFLAG consumed by @@ -172,14 +174,14 @@ DEF_TUNE (X86_TUNE_EPILOGUE_USING_MOVE, "epilogue_using_move", /* X86_TUNE_USE_LEAVE: Use "leave" instruction in epilogues where it fits. */ DEF_TUNE (X86_TUNE_USE_LEAVE, "use_leave", m_386 | m_CORE_ALL | m_K6_GEODE | m_AMD_MULTIPLE | m_TREMONT - | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC) + | m_ALDERLAKE | m_ZHAOXIN | m_C86_4G | m_GENERIC) /* X86_TUNE_PUSH_MEMORY: Enable generation of "push mem" instructions. Some chips, like 486 and Pentium works faster with separate load and push instructions. */ DEF_TUNE (X86_TUNE_PUSH_MEMORY, "push_memory", m_386 | m_P4_NOCONA | m_CORE_ALL | m_K6_GEODE | m_AMD_MULTIPLE - | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC) + | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_C86_4G | m_GENERIC) /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred over esp subtraction. */ @@ -256,7 +258,8 @@ DEF_TUNE (X86_TUNE_USE_INCDEC, "use_incdec", DEF_TUNE (X86_TUNE_INTEGER_DFMODE_MOVES, "integer_dfmode_moves", ~(m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_GOLDMONT - | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC)) + | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_C86_4G + | m_GENERIC)) /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag will impact LEA instruction selection. */ @@ -304,14 +307,14 @@ DEF_TUNE (X86_TUNE_PREFER_KNOWN_REP_MOVSB_STOSB, DEF_TUNE (X86_TUNE_MISALIGNED_MOVE_STRING_PRO_EPILOGUES, "misaligned_move_string_pro_epilogues", m_386 | m_486 | m_CORE_ALL | m_AMD_MULTIPLE | m_ZHAOXIN | m_TREMONT - | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC) + | m_ALDERLAKE | m_ZHAOXIN | m_C86_4G | m_GENERIC) /* X86_TUNE_USE_SAHF: Controls use of SAHF. */ DEF_TUNE (X86_TUNE_USE_SAHF, "use_sahf", m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER | m_ZNVER | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT - | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC) + | m_ALDERLAKE | m_ZHAOXIN | m_C86_4G | m_GENERIC) /* X86_TUNE_USE_CLTD: Controls use of CLTD and CTQO instructions. */ DEF_TUNE (X86_TUNE_USE_CLTD, "use_cltd", @@ -322,7 +325,7 @@ DEF_TUNE (X86_TUNE_USE_CLTD, "use_cltd", DEF_TUNE (X86_TUNE_USE_BT, "use_bt", m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_LAKEMONT | m_AMD_MULTIPLE | m_GOLDMONT | m_GOLDMONT_PLUS - | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC) + | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_C86_4G | m_GENERIC) /* X86_TUNE_AVOID_FALSE_DEP_FOR_BMI: Avoid false dependency for bit-manipulation instructions. */ @@ -343,7 +346,8 @@ DEF_TUNE (X86_TUNE_ONE_IF_CONV_INSN, "one_if_conv_insn", /* X86_TUNE_AVOID_MFENCE: Use lock prefixed instructions instead of mfence. */ DEF_TUNE (X86_TUNE_AVOID_MFENCE, "avoid_mfence", - m_CORE_ALL | m_BDVER | m_ZNVER | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC) + m_CORE_ALL | m_BDVER | m_ZNVER | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN + | m_C86_4G | m_GENERIC) /* X86_TUNE_EXPAND_ABS: This enables a new abs pattern by generating instructions for abs (x) = (((signed) x >> (W-1) ^ x) - @@ -368,10 +372,11 @@ DEF_TUNE (X86_TUNE_USE_SIMODE_FIOP, "use_simode_fiop", ~(m_PENT | m_LAKEMONT | m_PPRO | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_AMD_MULTIPLE | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE - | m_ZHAOXIN | m_GENERIC)) + | m_ZHAOXIN | m_C86_4G | m_GENERIC)) /* X86_TUNE_USE_FFREEP: Use freep instruction instead of fstp. */ -DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE | m_ZHAOXIN) +DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE | m_ZHAOXIN + | m_C86_4G) /* X86_TUNE_EXT_80387_CONSTANTS: Use fancy 80387 constants, such as PI. */ DEF_TUNE (X86_TUNE_EXT_80387_CONSTANTS, "ext_80387_constants", @@ -393,30 +398,32 @@ DEF_TUNE (X86_TUNE_GENERAL_REGS_SSE_SPILL, "general_regs_sse_spill", DEF_TUNE (X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL, "sse_unaligned_load_optimal", m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE - | m_AMDFAM10 | m_BDVER | m_BTVER | m_ZNVER | m_ZHAOXIN | m_GENERIC) + | m_AMDFAM10 | m_BDVER | m_BTVER | m_ZNVER | m_ZHAOXIN | m_C86_4G + | m_GENERIC) /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL: Use movups for misaligned stores instead of a sequence loading registers by parts. */ DEF_TUNE (X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL, "sse_unaligned_store_optimal", m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_GOLDMONT | m_GOLDMONT_PLUS - | m_TREMONT | m_ALDERLAKE | m_BDVER | m_ZNVER | m_ZHAOXIN | m_GENERIC) + | m_TREMONT | m_ALDERLAKE | m_BDVER | m_ZNVER | m_ZHAOXIN | m_C86_4G + | m_GENERIC) /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL: Use packed single precision 128bit instructions instead of double where possible. */ DEF_TUNE (X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL, "sse_packed_single_insn_optimal", - m_BDVER | m_ZNVER) + m_BDVER | m_ZNVER | m_C86_4G) /* X86_TUNE_SSE_TYPELESS_STORES: Always movaps/movups for 128bit stores. */ DEF_TUNE (X86_TUNE_SSE_TYPELESS_STORES, "sse_typeless_stores", m_AMD_MULTIPLE | m_CORE_ALL | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN - | m_GENERIC) + | m_C86_4G | m_GENERIC) /* X86_TUNE_SSE_LOAD0_BY_PXOR: Always use pxor to load0 as opposed to xorps/xorpd and other variants. */ DEF_TUNE (X86_TUNE_SSE_LOAD0_BY_PXOR, "sse_load0_by_pxor", m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BDVER | m_BTVER | m_ZNVER - | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC) + | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_C86_4G | m_GENERIC) /* X86_TUNE_INTER_UNIT_MOVES_TO_VEC: Enable moves in from integer to SSE registers. If disabled, the moves will be done by storing @@ -469,44 +476,44 @@ DEF_TUNE (X86_TUNE_AVOID_4BYTE_PREFIXES, "avoid_4byte_prefixes", elements. */ DEF_TUNE (X86_TUNE_USE_GATHER_2PARTS, "use_gather_2parts", ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE - | m_GENERIC | m_GDS | m_YONGFENG | m_SHIJIDADAO)) + | m_GENERIC | m_GDS | m_YONGFENG | m_SHIJIDADAO | m_C86_4G)) /* X86_TUNE_USE_SCATTER_2PARTS: Use scater instructions for vectors with 2 elements. */ DEF_TUNE (X86_TUNE_USE_SCATTER_2PARTS, "use_scatter_2parts", - ~(m_ZNVER4)) + ~(m_ZNVER4 | m_C86_4G_M7)) /* X86_TUNE_USE_GATHER_4PARTS: Use gather instructions for vectors with 4 elements. */ DEF_TUNE (X86_TUNE_USE_GATHER_4PARTS, "use_gather_4parts", ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE - | m_GENERIC | m_GDS | m_YONGFENG | m_SHIJIDADAO)) + | m_GENERIC | m_GDS | m_YONGFENG | m_SHIJIDADAO | m_C86_4G)) /* X86_TUNE_USE_SCATTER_4PARTS: Use scater instructions for vectors with 4 elements. */ DEF_TUNE (X86_TUNE_USE_SCATTER_4PARTS, "use_scatter_4parts", - ~(m_ZNVER4)) + ~(m_ZNVER4 | m_C86_4G_M7)) /* X86_TUNE_USE_GATHER: Use gather instructions for vectors with 8 or more elements. */ DEF_TUNE (X86_TUNE_USE_GATHER_8PARTS, "use_gather_8parts", ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER4 | m_ALDERLAKE - | m_GENERIC | m_GDS | m_YONGFENG | m_SHIJIDADAO)) + | m_GENERIC | m_GDS | m_YONGFENG | m_SHIJIDADAO | m_C86_4G)) /* X86_TUNE_USE_SCATTER: Use scater instructions for vectors with 8 or more elements. */ DEF_TUNE (X86_TUNE_USE_SCATTER_8PARTS, "use_scatter_8parts", - ~(m_ZNVER4)) + ~(m_ZNVER4 | m_C86_4G_M7)) /* X86_TUNE_AVOID_128FMA_CHAINS: Avoid creating loops with tight 128bit or smaller FMA chain. */ -DEF_TUNE (X86_TUNE_AVOID_128FMA_CHAINS, "avoid_fma_chains", m_ZNVER1 | m_ZNVER2 | m_ZNVER3 - | m_YONGFENG | m_SHIJIDADAO) +DEF_TUNE (X86_TUNE_AVOID_128FMA_CHAINS, "avoid_fma_chains", m_ZNVER1 | m_ZNVER2 + | m_ZNVER3 | m_YONGFENG | m_SHIJIDADAO | m_C86_4G) /* X86_TUNE_AVOID_256FMA_CHAINS: Avoid creating loops with tight 256bit or smaller FMA chain. */ DEF_TUNE (X86_TUNE_AVOID_256FMA_CHAINS, "avoid_fma256_chains", m_ZNVER2 | m_ZNVER3 - | m_ALDERLAKE | m_SAPPHIRERAPIDS) + | m_ALDERLAKE | m_SAPPHIRERAPIDS | m_C86_4G | m_GENERIC) /* X86_TUNE_AVOID_512FMA_CHAINS: Avoid creating loops with tight 512bit or smaller FMA chain. */ @@ -545,27 +552,28 @@ DEF_TUNE (X86_TUNE_AVX128_OPTIMAL, "avx128_optimal", m_BDVER | m_BTVER2 DEF_TUNE (X86_TUNE_AVX256_OPTIMAL, "avx256_optimal", m_CORE_AVX512) /* X86_TUNE_AVX256_SPLIT_REGS: if true, AVX512 ops are split into two AVX256 ops. */ -DEF_TUNE (X86_TUNE_AVX512_SPLIT_REGS, "avx512_split_regs", m_ZNVER4) +DEF_TUNE (X86_TUNE_AVX512_SPLIT_REGS, "avx512_split_regs", m_ZNVER4 + | m_C86_4G_M7) /* X86_TUNE_AVX256_MOVE_BY_PIECES: Optimize move_by_pieces with 256-bit AVX instructions. */ DEF_TUNE (X86_TUNE_AVX256_MOVE_BY_PIECES, "avx256_move_by_pieces", - m_CORE_AVX512) + m_CORE_AVX512 | m_C86_4G_M4 | m_C86_4G_M6) /* X86_TUNE_AVX256_STORE_BY_PIECES: Optimize store_by_pieces with 256-bit AVX instructions. */ DEF_TUNE (X86_TUNE_AVX256_STORE_BY_PIECES, "avx256_store_by_pieces", - m_CORE_AVX512) + m_CORE_AVX512 | m_C86_4G_M4 | m_C86_4G_M6) /* X86_TUNE_AVX512_MOVE_BY_PIECES: Optimize move_by_pieces with 512-bit AVX instructions. */ DEF_TUNE (X86_TUNE_AVX512_MOVE_BY_PIECES, "avx512_move_by_pieces", - m_SAPPHIRERAPIDS | m_ZNVER4) + m_SAPPHIRERAPIDS | m_ZNVER4 | m_C86_4G_M7) /* X86_TUNE_AVX512_STORE_BY_PIECES: Optimize store_by_pieces with 512-bit AVX instructions. */ DEF_TUNE (X86_TUNE_AVX512_STORE_BY_PIECES, "avx512_store_by_pieces", - m_SAPPHIRERAPIDS | m_ZNVER4) + m_SAPPHIRERAPIDS | m_ZNVER4 | m_C86_4G_M7) /*****************************************************************************/ /*****************************************************************************/ diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index eb422dc7b233852c6371bc3b298f8c2a2da8e810..528c18ffe62803babe8f8fe0dbaed17ce8ad2cd0 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -23921,6 +23921,18 @@ AMD Family 19h Zen version 3. @item znver4 AMD Family 19h Zen version 4. + +@item hygonfam18h +HYGON Family 18h CPU. + +@item c86-4g-m4 +HYGON Family 18h model 4 dharma CPU. + +@item c86-4g-m6 +HYGON Family 18h model 6 shanghai CPU. + +@item c86-4g-m7 +HYGON Family 18h model 7 chengdu CPU. @end table Here is an example: diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index f40df87219e4be79b3e5a0135e3b11e475db9ad3..3a36c4e3456e775f7fa05c70534378c9610be1d2 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -31943,6 +31943,27 @@ instruction set support. @item geode AMD Geode embedded processor with MMX and 3DNow!@: instruction set support. + +@item c86-4g-m4 +HYGON c86-4g-m4 CPU with x86-64, MMX, SSE, SSE2, SSE3, SSE4A, CX16, ABM, SSSE3, +SSE4.1, SSE4.2, AES, PCLMUL, AVX, AVX2, BMI, BMI2, F16C, FMA, PRFCHW, FXSR, SHA, +XSAVE, XSAVEOPT, XSAVEC, FSGSBASE, RDRND, MOVBE, MWAITX, ADX, RDSEED, CLZERO, +CLFLUSHOPT, XSAVES, LZCNT, POPCNT instruction set support. + +@item c86-4g-m6 +HYGON c86-4g-m6 CPU with x86-64, MMX, SSE, SSE2, SSE3, SSE4A, CX16, ABM, SSSE3, +SSE4.1, SSE4.2, AES, PCLMUL, AVX, AVX2, BMI, BMI2, F16C, FMA, PRFCHW, FXSR, SHA, +XSAVE, XSAVEOPT, XSAVEC, FSGSBASE, RDRND, MOVBE, MWAITX, ADX, RDSEED, CLZERO, +CLFLUSHOPT, XSAVES, LZCNT, POPCNT instruction set support. + +@item c86-4g-m7 +HYGON c86-4g-m7 CPU with x86-64, MMX, SSE, SSE2, SSE3, SSE4A, CX16, ABM, SSSE3, +SSE4.1, SSE4.2, AES, PCLMUL, AVX, AVX2, BMI, BMI2, F16C, FMA, PRFCHW, FXSR, SHA, +XSAVE, XSAVEOPT, XSAVEC, FSGSBASE, RDRND, MOVBE, MWAITX, ADX, RDSEED, CLZERO, +CLFLUSHOPT, XSAVES, LZCNT, POPCNT, AVX512F, AVX512DQ, AVX512IFMA, AVX512CD, +AVX512BW, AVX512VL, AVX512BF16, AVX512VBMI, AVX512VBMI2, GFNI, AVX512VNNI, VAES, +AVX512BITALG, AVX512VPOPCNTDQ, AVX512VP2INTERSECT, AVXVNNI, VPCLMULQDQ, +WBNOINVD instruction set support. @end table @item -mtune=@var{cpu-type} diff --git a/gcc/testsuite/g++.target/i386/mv33.C b/gcc/testsuite/g++.target/i386/mv33.C new file mode 100644 index 0000000000000000000000000000000000000000..8591690d2ccfe41a26ada19f937b443b3768d7a7 --- /dev/null +++ b/gcc/testsuite/g++.target/i386/mv33.C @@ -0,0 +1,42 @@ +// Test that dispatching can choose the right multiversion +// for HYGON CPUs with the same internal GCC processor id + +// { dg-do run } +// { dg-require-ifunc "" } +// { dg-options "-O2" } + +#include + +int __attribute__ ((target("default"))) +foo () +{ + return 0; +} + +int __attribute__ ((target("arch=c86-4g-m4"))) foo () { + return 1; +} + +int __attribute__ ((target("arch=c86-4g-m6"))) foo () { + return 2; +} + +int __attribute__ ((target("arch=c86-4g-m7"))) foo () { + return 3; +} + +int main () +{ + int val = foo (); + + if (__builtin_cpu_is ("c86-4g-m4")) + assert (val == 1); + else if (__builtin_cpu_is ("c86-4g-m6")) + assert (val == 2); + else if (__builtin_cpu_is ("c86-4g-m7")) + assert (val == 3); + else + assert (val == 0); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/i386/builtin_target.c b/gcc/testsuite/gcc.target/i386/builtin_target.c index fff643c13b0038e31a1bef03a9932759159ccbe4..fa37f0bdba99450a46f378c70777accb2c0d86d7 100644 --- a/gcc/testsuite/gcc.target/i386/builtin_target.c +++ b/gcc/testsuite/gcc.target/i386/builtin_target.c @@ -54,6 +54,10 @@ check_detailed () assert (__builtin_cpu_is ("amd")); get_amd_cpu (&cpu_model, &cpu_model2, cpu_features2); break; + case VENDOR_HYGON: + assert (__builtin_cpu_is ("hygon")); + get_hygon_cpu (&cpu_model, &cpu_model2, cpu_features2); + break; default: break; } @@ -131,6 +135,8 @@ quick_check () assert (__builtin_cpu_is ("bdver2") >= 0); + assert (__builtin_cpu_is ("c86-4g-m4") >= 0); + return 0; } diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc index faeff3ae2de462dd16bfc6187f17cac684ebb570..3c49c9990c3d3cc8af37137a9a15e757b994b97d 100644 --- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc +++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc @@ -203,6 +203,9 @@ extern void test_arch_znver1 (void) __attribute__((__target__("arch= extern void test_arch_znver2 (void) __attribute__((__target__("arch=znver2"))); extern void test_arch_znver3 (void) __attribute__((__target__("arch=znver3"))); extern void test_arch_znver4 (void) __attribute__((__target__("arch=znver4"))); +extern void test_arch_c86_4g_m4 (void) __attribute__((__target__("arch=c86-4g-m4"))); +extern void test_arch_c86_4g_m6 (void) __attribute__((__target__("arch=c86-4g-m6"))); +extern void test_arch_c86_4g_m7 (void) __attribute__((__target__("arch=c86-4g-m7"))); extern void test_tune_nocona (void) __attribute__((__target__("tune=nocona"))); extern void test_tune_core2 (void) __attribute__((__target__("tune=core2"))); @@ -229,6 +232,9 @@ extern void test_tune_znver1 (void) __attribute__((__target__("tune= extern void test_tune_znver2 (void) __attribute__((__target__("tune=znver2"))); extern void test_tune_znver3 (void) __attribute__((__target__("tune=znver3"))); extern void test_tune_znver4 (void) __attribute__((__target__("tune=znver4"))); +extern void test_tune_c86_4g_m4 (void) __attribute__((__target__("tune=c86-4g-m4"))); +extern void test_tune_c86_4g_m6 (void) __attribute__((__target__("tune=c86-4g-m6"))); +extern void test_tune_c86_4g_m7 (void) __attribute__((__target__("tune=c86-4g-m7"))); extern void test_fpmath_sse (void) __attribute__((__target__("sse2,fpmath=sse"))); extern void test_fpmath_387 (void) __attribute__((__target__("sse2,fpmath=387")));