diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 81aa0a4d281729b92813730dee044fd79cc1482a..b46f78686cc97bac9e71210e5b910b0b208f803f 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -26,7 +26,7 @@ mrs x0, id_aa64mmfr1_el1 ubfx x0, x0, #ID_AA64MMFR1_EL1_HCX_SHIFT, #4 cbz x0, .Lskip_hcrx_\@ - mov_q x0, HCRX_HOST_FLAGS + mov_q x0, (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM) msr_s SYS_HCRX_EL2, x0 .Lskip_hcrx_\@: .endm diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index c11010965b8e06e20bffffb8d627817eb5893e75..a9044be7765808287a4db8bb9d0924126e1c231c 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -107,7 +107,6 @@ #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) #define HCRX_GUEST_FLAGS (HCRX_EL2_SMPME | HCRX_EL2_TCR2En) -#define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM) /* TCR_EL2 Registers bits */ #define TCR_EL2_RES1 ((1U << 31) | (1 << 23)) @@ -317,36 +316,47 @@ * Once we get to a point where the two describe the same thing, we'll * merge the definitions. One day. */ -#define __HFGRTR_EL2_RES0 (GENMASK(63, 56) | GENMASK(53, 51)) +#define __HFGRTR_EL2_RES0 HFGxTR_EL2_RES0 #define __HFGRTR_EL2_MASK GENMASK(49, 0) -#define __HFGRTR_EL2_nMASK (GENMASK(58, 57) | GENMASK(55, 54) | BIT(50)) +#define __HFGRTR_EL2_nMASK ~(__HFGRTR_EL2_RES0 | __HFGRTR_EL2_MASK) -#define __HFGWTR_EL2_RES0 (GENMASK(63, 56) | GENMASK(53, 51) | \ - BIT(46) | BIT(42) | BIT(40) | BIT(28) | \ - GENMASK(26, 25) | BIT(21) | BIT(18) | \ +/* + * The HFGWTR bits are a subset of HFGRTR bits. To ensure we don't miss any + * future additions, define __HFGWTR* macros relative to __HFGRTR* ones. + */ +#define __HFGRTR_ONLY_MASK (BIT(46) | BIT(42) | BIT(40) | BIT(28) | \ + GENMASK(26, 25) | BIT(21) | BIT(18) | \ GENMASK(15, 14) | GENMASK(10, 9) | BIT(2)) -#define __HFGWTR_EL2_MASK GENMASK(49, 0) -#define __HFGWTR_EL2_nMASK (GENMASK(58, 57) | GENMASK(55, 54) | BIT(50)) - -#define __HFGITR_EL2_RES0 GENMASK(63, 57) -#define __HFGITR_EL2_MASK GENMASK(54, 0) -#define __HFGITR_EL2_nMASK GENMASK(56, 55) - -#define __HDFGRTR_EL2_RES0 (BIT(49) | BIT(42) | GENMASK(39, 38) | \ - GENMASK(21, 20) | BIT(8)) -#define __HDFGRTR_EL2_MASK ~__HDFGRTR_EL2_nMASK -#define __HDFGRTR_EL2_nMASK GENMASK(62, 59) - -#define __HDFGWTR_EL2_RES0 (BIT(63) | GENMASK(59, 58) | BIT(51) | BIT(47) | \ - BIT(43) | GENMASK(40, 38) | BIT(34) | BIT(30) | \ - BIT(22) | BIT(9) | BIT(6)) -#define __HDFGWTR_EL2_MASK ~__HDFGWTR_EL2_nMASK -#define __HDFGWTR_EL2_nMASK GENMASK(62, 60) +#define __HFGWTR_EL2_RES0 (__HFGRTR_EL2_RES0 | __HFGRTR_ONLY_MASK) +#define __HFGWTR_EL2_MASK (__HFGRTR_EL2_MASK & ~__HFGRTR_ONLY_MASK) +#define __HFGWTR_EL2_nMASK ~(__HFGWTR_EL2_RES0 | __HFGWTR_EL2_MASK) + +#define __HFGITR_EL2_RES0 HFGITR_EL2_RES0 +#define __HFGITR_EL2_MASK (BIT(62) | BIT(60) | GENMASK(54, 0)) +#define __HFGITR_EL2_nMASK ~(__HFGITR_EL2_RES0 | __HFGITR_EL2_MASK) + +#define __HDFGRTR_EL2_RES0 HDFGRTR_EL2_RES0 +#define __HDFGRTR_EL2_MASK (BIT(63) | GENMASK(58, 50) | GENMASK(48, 43) | \ + GENMASK(41, 40) | GENMASK(37, 22) | \ + GENMASK(19, 9) | GENMASK(7, 0)) +#define __HDFGRTR_EL2_nMASK ~(__HDFGRTR_EL2_RES0 | __HDFGRTR_EL2_MASK) + +#define __HDFGWTR_EL2_RES0 HDFGWTR_EL2_RES0 +#define __HDFGWTR_EL2_MASK (GENMASK(57, 52) | GENMASK(50, 48) | \ + GENMASK(46, 44) | GENMASK(42, 41) | \ + GENMASK(37, 35) | GENMASK(33, 31) | \ + GENMASK(29, 23) | GENMASK(21, 10) | \ + GENMASK(8, 7) | GENMASK(5, 0)) +#define __HDFGWTR_EL2_nMASK ~(__HDFGWTR_EL2_RES0 | __HDFGWTR_EL2_MASK) + +#define __HAFGRTR_EL2_RES0 HAFGRTR_EL2_RES0 +#define __HAFGRTR_EL2_MASK (GENMASK(49, 17) | GENMASK(4, 0)) +#define __HAFGRTR_EL2_nMASK ~(__HAFGRTR_EL2_RES0 | __HAFGRTR_EL2_MASK) /* Similar definitions for HCRX_EL2 */ -#define __HCRX_EL2_RES0 (GENMASK(63, 16) | GENMASK(13, 12)) -#define __HCRX_EL2_MASK (0) -#define __HCRX_EL2_nMASK (GENMASK(15, 14) | GENMASK(4, 0)) +#define __HCRX_EL2_RES0 HCRX_EL2_RES0 +#define __HCRX_EL2_MASK (BIT(6)) +#define __HCRX_EL2_nMASK ~(__HCRX_EL2_RES0 | __HCRX_EL2_MASK) /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ #define HPFAR_MASK (~UL(0xf)) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 4a7ab54abc67a864598b8bce08d350f588c285a7..f78747a9508f6389c63de156030bada0ca435ec5 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -158,6 +158,16 @@ struct kvm_s2_mmu { phys_addr_t pgd_phys; struct kvm_pgtable *pgt; + /* + * VTCR value used on the host. For a non-NV guest (or a NV + * guest that runs in a context where its own S2 doesn't + * apply), its T0SZ value reflects that of the IPA size. + * + * For a shadow S2 MMU, T0SZ reflects the PARange exposed to + * the guest. + */ + u64 vtcr; + /* The last vcpu id that ran on each physical CPU */ int __percpu *last_vcpu_ran; @@ -227,11 +237,31 @@ static inline u16 kvm_mpidr_index(struct kvm_mpidr_data *data, u64 mpidr) return index; } +struct kvm_sysreg_masks; + +enum fgt_group_id { + __NO_FGT_GROUP__, + HFGxTR_GROUP, + HDFGRTR_GROUP, + HDFGWTR_GROUP = HDFGRTR_GROUP, + HFGITR_GROUP, + HAFGRTR_GROUP, + + /* Must be last */ + __NR_FGT_GROUP_IDS__ +}; + struct kvm_arch { struct kvm_s2_mmu mmu; - /* VTCR_EL2 value for this VM */ - u64 vtcr; + /* + * Fine-Grained UNDEF, mimicking the FGT layout defined by the + * architecture. We track them globally, as we present the + * same feature-set to all vcpus. + * + * Index 0 is currently spare. + */ + u64 fgu[__NR_FGT_GROUP_IDS__]; /* Interrupt controller */ struct vgic_dist vgic; @@ -264,10 +294,10 @@ struct kvm_arch { #define KVM_ARCH_FLAG_VM_COUNTER_OFFSET 5 /* Timer PPIs made immutable */ #define KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE 6 - /* SMCCC filter initialized for the VM */ -#define KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED 7 /* Initial ID reg values loaded */ -#define KVM_ARCH_FLAG_ID_REGS_INITIALIZED 8 +#define KVM_ARCH_FLAG_ID_REGS_INITIALIZED 7 + /* Fine-Grained UNDEF initialised */ +#define KVM_ARCH_FLAG_FGU_INITIALIZED 8 unsigned long flags; /* VM-wide vCPU feature set */ @@ -288,6 +318,9 @@ struct kvm_arch { /* PMCR_EL0.N value for the guest */ u8 pmcr_n; + /* Iterator for idreg debugfs */ + u8 idreg_debugfs_iter; + /* Hypercall features firmware registers' descriptor */ struct kvm_smccc_features smccc_feat; struct maple_tree smccc_filter; @@ -306,6 +339,9 @@ struct kvm_arch { #define KVM_ARM_ID_REG_NUM (IDREG_IDX(sys_reg(3, 0, 0, 7, 7)) + 1) u64 id_regs[KVM_ARM_ID_REG_NUM]; + /* Masks for VNCR-baked sysregs */ + struct kvm_sysreg_masks *sysreg_masks; + /* * For an untrusted host VM, 'pkvm.handle' is used to lookup * the associated pKVM instance in the hypervisor. @@ -459,6 +495,7 @@ enum vcpu_sysreg { VNCR(HFGITR_EL2), VNCR(HDFGRTR_EL2), VNCR(HDFGWTR_EL2), + VNCR(HAFGRTR_EL2), VNCR(CNTVOFF_EL2), VNCR(CNTV_CVAL_EL0), @@ -469,6 +506,13 @@ enum vcpu_sysreg { NR_SYS_REGS /* Nothing after this line! */ }; +struct kvm_sysreg_masks { + struct { + u64 res0; + u64 res1; + } mask[NR_SYS_REGS - __VNCR_START__]; +}; + struct kvm_cpu_context { struct user_pt_regs regs; /* sp = sp_el0 */ @@ -597,6 +641,7 @@ struct kvm_vcpu_arch { /* Values of trap registers for the guest. */ u64 hcr_el2; + u64 hcrx_el2; u64 mdcr_el2; /* Exception Information */ @@ -895,7 +940,15 @@ static inline u64 *__ctxt_sys_reg(const struct kvm_cpu_context *ctxt, int r) #define ctxt_sys_reg(c,r) (*__ctxt_sys_reg(c,r)) -#define __vcpu_sys_reg(v,r) (ctxt_sys_reg(&(v)->arch.ctxt, (r))) +u64 kvm_vcpu_sanitise_vncr_reg(const struct kvm_vcpu *, enum vcpu_sysreg); +#define __vcpu_sys_reg(v,r) \ + (*({ \ + const struct kvm_cpu_context *ctxt = &(v)->arch.ctxt; \ + u64 *__r = __ctxt_sys_reg(ctxt, (r)); \ + if (vcpu_has_nv((v)) && (r) >= __VNCR_START__) \ + *__r = kvm_vcpu_sanitise_vncr_reg((v), (r)); \ + __r; \ + })) u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg); void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg); @@ -1084,14 +1137,20 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu); int kvm_handle_sys_reg(struct kvm_vcpu *vcpu); int kvm_handle_cp10_id(struct kvm_vcpu *vcpu); +void kvm_sys_regs_create_debugfs(struct kvm *kvm); void kvm_reset_sys_regs(struct kvm_vcpu *vcpu); int __init kvm_sys_reg_table_init(void); +struct sys_reg_desc; +int __init populate_sysreg_config(const struct sys_reg_desc *sr, + unsigned int idx); int __init populate_nv_trap_config(void); bool lock_all_vcpus(struct kvm *kvm); void unlock_all_vcpus(struct kvm *kvm); +void kvm_init_sysreg(struct kvm_vcpu *); + /* MMIO helpers */ void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len); diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 491220236f693f1f45a12a40b925abd67ff16398..363bb900758da81f0c0cd5f1aacc2397babccb81 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -162,9 +162,9 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v) */ #define KVM_PHYS_SHIFT (40) -#define kvm_phys_shift(kvm) VTCR_EL2_IPA(kvm->arch.vtcr) -#define kvm_phys_size(kvm) (_AC(1, ULL) << kvm_phys_shift(kvm)) -#define kvm_phys_mask(kvm) (kvm_phys_size(kvm) - _AC(1, ULL)) +#define kvm_phys_shift(mmu) VTCR_EL2_IPA((mmu)->vtcr) +#define kvm_phys_size(mmu) (_AC(1, ULL) << kvm_phys_shift(mmu)) +#define kvm_phys_mask(mmu) (kvm_phys_size(mmu) - _AC(1, ULL)) #include #include @@ -311,7 +311,7 @@ static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu) static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu, struct kvm_arch *arch) { - write_sysreg(arch->vtcr, vtcr_el2); + write_sysreg(mmu->vtcr, vtcr_el2); write_sysreg(kvm_get_vttbr(mmu), vttbr_el2); /* diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index 56335e54faa7df0f1b28f94bf2b30826ace92798..053226eae4e550326a4839726120f0eef3ced08d 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -60,7 +60,6 @@ static inline u64 translate_ttbr0_el2_to_ttbr0_el1(u64 ttbr0) return ttbr0 & ~GENMASK_ULL(63, 48); } -extern bool __check_nv_sr_forward(struct kvm_vcpu *vcpu); int kvm_init_nv_sysregs(struct kvm *kvm); diff --git a/arch/arm64/include/asm/stage2_pgtable.h b/arch/arm64/include/asm/stage2_pgtable.h index c8dca8ae359cd25c3bac6017b1b8bb4e85eb992a..23d27623e478b8d4443387676fd3cf04c6c38614 100644 --- a/arch/arm64/include/asm/stage2_pgtable.h +++ b/arch/arm64/include/asm/stage2_pgtable.h @@ -21,13 +21,13 @@ * (IPA_SHIFT - 4). */ #define stage2_pgtable_levels(ipa) ARM64_HW_PGTABLE_LEVELS((ipa) - 4) -#define kvm_stage2_levels(kvm) VTCR_EL2_LVLS(kvm->arch.vtcr) +#define kvm_stage2_levels(mmu) VTCR_EL2_LVLS((mmu)->vtcr) /* * kvm_mmmu_cache_min_pages() is the number of pages required to install * a stage-2 translation. We pre-allocate the entry level page table at * the VM creation. */ -#define kvm_mmu_cache_min_pages(kvm) (kvm_stage2_levels(kvm) - 1) +#define kvm_mmu_cache_min_pages(mmu) (kvm_stage2_levels(mmu) - 1) #endif /* __ARM64_S2_PGTABLE_H_ */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index d1700f6594f5b856e75aed8005b4b9edc0a1d16c..9204b7268385cafc81bdcb0a257306ab58b49bf8 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -629,6 +629,7 @@ #define OP_AT_S1E0W sys_insn(AT_Op0, 0, AT_CRn, 8, 3) #define OP_AT_S1E1RP sys_insn(AT_Op0, 0, AT_CRn, 9, 0) #define OP_AT_S1E1WP sys_insn(AT_Op0, 0, AT_CRn, 9, 1) +#define OP_AT_S1E1A sys_insn(AT_Op0, 0, AT_CRn, 9, 2) #define OP_AT_S1E2R sys_insn(AT_Op0, 4, AT_CRn, 8, 0) #define OP_AT_S1E2W sys_insn(AT_Op0, 4, AT_CRn, 8, 1) #define OP_AT_S12E1R sys_insn(AT_Op0, 4, AT_CRn, 8, 4) @@ -765,10 +766,16 @@ #define OP_TLBI_VMALLS12E1NXS sys_insn(1, 4, 9, 7, 6) /* Misc instructions */ +#define OP_GCSPUSHX sys_insn(1, 0, 7, 7, 4) +#define OP_GCSPOPCX sys_insn(1, 0, 7, 7, 5) +#define OP_GCSPOPX sys_insn(1, 0, 7, 7, 6) +#define OP_GCSPUSHM sys_insn(1, 3, 7, 7, 0) + #define OP_BRB_IALL sys_insn(1, 1, 7, 2, 4) #define OP_BRB_INJ sys_insn(1, 1, 7, 2, 5) #define OP_CFP_RCTX sys_insn(1, 3, 7, 3, 4) #define OP_DVP_RCTX sys_insn(1, 3, 7, 3, 5) +#define OP_COSP_RCTX sys_insn(1, 3, 7, 3, 6) #define OP_CPP_RCTX sys_insn(1, 3, 7, 3, 7) /* diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index cd27d033d9021ed7e23a519520d5d6d3835fb008..45446916c6b3d303309842a9d98f4a0441a62b7c 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -42,7 +42,6 @@ menuconfig KVM select SCHED_INFO select GUEST_PERF_EVENTS if PERF_EVENTS select INTERVAL_TREE - select XARRAY_MULTI select KVM_EXPORT_SYMBOL if KVM=m help Support hosting virtualized guest machines. diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index e5b6fd47c0686e58cf22e4ae8268b22adbd96610..365a9b5e60b090e152af909f476882626b6d81df 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -191,6 +191,11 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } +int kvm_arch_create_vm_debugfs(struct kvm *kvm) +{ + kvm_sys_regs_create_debugfs(kvm); + return 0; +} static void kvm_destroy_mpidr_data(struct kvm *kvm) { @@ -225,6 +230,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_destroy_mpidr_data(kvm); + kfree(kvm->arch.sysreg_masks); kvm_destroy_vcpus(kvm); kvm_unshare_hyp(kvm, kvm + 1); @@ -728,6 +734,12 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) return ret; } + /* + * This needs to happen after NV has imposed its own restrictions on + * the feature set + */ + kvm_init_sysreg(vcpu); + ret = kvm_timer_enable(vcpu); if (ret) return ret; diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c index ee902ff2a50f808d23e3c4e88a970cca40271d17..838b401c422059915f98fea02ff40a140b7dd2c3 100644 --- a/arch/arm64/kvm/emulate-nested.c +++ b/arch/arm64/kvm/emulate-nested.c @@ -427,12 +427,14 @@ static const complex_condition_check ccc[] = { * [19:14] bit number in the FGT register (6 bits) * [20] trap polarity (1 bit) * [25:21] FG filter (5 bits) - * [62:26] Unused (37 bits) + * [35:26] Main SysReg table index (10 bits) + * [62:36] Unused (27 bits) * [63] RES0 - Must be zero, as lost on insertion in the xarray */ #define TC_CGT_BITS 10 #define TC_FGT_BITS 4 #define TC_FGF_BITS 5 +#define TC_SRI_BITS 10 union trap_config { u64 val; @@ -442,7 +444,8 @@ union trap_config { unsigned long bit:6; /* Bit number */ unsigned long pol:1; /* Polarity */ unsigned long fgf:TC_FGF_BITS; /* Fine Grained Filter */ - unsigned long unused:37; /* Unused, should be zero */ + unsigned long sri:TC_SRI_BITS; /* SysReg Index */ + unsigned long unused:27; /* Unused, should be zero */ unsigned long mbz:1; /* Must Be Zero */ }; }; @@ -941,17 +944,6 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = { static DEFINE_XARRAY(sr_forward_xa); -enum fgt_group_id { - __NO_FGT_GROUP__, - HFGxTR_GROUP, - HDFGRTR_GROUP, - HDFGWTR_GROUP, - HFGITR_GROUP, - - /* Must be last */ - __NR_FGT_GROUP_IDS__ -}; - enum fg_filter_id { __NO_FGF__, HCRX_FGTnXS, @@ -977,10 +969,20 @@ enum fg_filter_id { static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = { /* HFGRTR_EL2, HFGWTR_EL2 */ + SR_FGT(SYS_AMAIR2_EL1, HFGxTR, nAMAIR2_EL1, 0), + SR_FGT(SYS_MAIR2_EL1, HFGxTR, nMAIR2_EL1, 0), + SR_FGT(SYS_S2POR_EL1, HFGxTR, nS2POR_EL1, 0), + SR_FGT(SYS_POR_EL1, HFGxTR, nPOR_EL1, 0), + SR_FGT(SYS_POR_EL0, HFGxTR, nPOR_EL0, 0), SR_FGT(SYS_PIR_EL1, HFGxTR, nPIR_EL1, 0), SR_FGT(SYS_PIRE0_EL1, HFGxTR, nPIRE0_EL1, 0), + SR_FGT(SYS_RCWMASK_EL1, HFGxTR, nRCWMASK_EL1, 0), SR_FGT(SYS_TPIDR2_EL0, HFGxTR, nTPIDR2_EL0, 0), SR_FGT(SYS_SMPRI_EL1, HFGxTR, nSMPRI_EL1, 0), + SR_FGT(SYS_GCSCR_EL1, HFGxTR, nGCS_EL1, 0), + SR_FGT(SYS_GCSPR_EL1, HFGxTR, nGCS_EL1, 0), + SR_FGT(SYS_GCSCRE0_EL1, HFGxTR, nGCS_EL0, 0), + SR_FGT(SYS_GCSPR_EL0, HFGxTR, nGCS_EL0, 0), SR_FGT(SYS_ACCDATA_EL1, HFGxTR, nACCDATA_EL1, 0), SR_FGT(SYS_ERXADDR_EL1, HFGxTR, ERXADDR_EL1, 1), SR_FGT(SYS_ERXPFGCDN_EL1, HFGxTR, ERXPFGCDN_EL1, 1), @@ -1042,6 +1044,11 @@ static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = { SR_FGT(SYS_AFSR1_EL1, HFGxTR, AFSR1_EL1, 1), SR_FGT(SYS_AFSR0_EL1, HFGxTR, AFSR0_EL1, 1), /* HFGITR_EL2 */ + SR_FGT(OP_AT_S1E1A, HFGITR, ATS1E1A, 1), + SR_FGT(OP_COSP_RCTX, HFGITR, COSPRCTX, 1), + SR_FGT(OP_GCSPUSHX, HFGITR, nGCSEPP, 0), + SR_FGT(OP_GCSPOPX, HFGITR, nGCSEPP, 0), + SR_FGT(OP_GCSPUSHM, HFGITR, nGCSPUSHM_EL1, 0), SR_FGT(OP_BRB_IALL, HFGITR, nBRBIALL, 0), SR_FGT(OP_BRB_INJ, HFGITR, nBRBINJ, 0), SR_FGT(SYS_DC_CVAC, HFGITR, DCCVAC, 1), @@ -1609,6 +1616,49 @@ static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = { SR_FGT(SYS_PMCR_EL0, HDFGWTR, PMCR_EL0, 1), SR_FGT(SYS_PMSWINC_EL0, HDFGWTR, PMSWINC_EL0, 1), SR_FGT(SYS_OSLAR_EL1, HDFGWTR, OSLAR_EL1, 1), + /* + * HAFGRTR_EL2 + */ + SR_FGT(SYS_AMEVTYPER1_EL0(15), HAFGRTR, AMEVTYPER115_EL0, 1), + SR_FGT(SYS_AMEVTYPER1_EL0(14), HAFGRTR, AMEVTYPER114_EL0, 1), + SR_FGT(SYS_AMEVTYPER1_EL0(13), HAFGRTR, AMEVTYPER113_EL0, 1), + SR_FGT(SYS_AMEVTYPER1_EL0(12), HAFGRTR, AMEVTYPER112_EL0, 1), + SR_FGT(SYS_AMEVTYPER1_EL0(11), HAFGRTR, AMEVTYPER111_EL0, 1), + SR_FGT(SYS_AMEVTYPER1_EL0(10), HAFGRTR, AMEVTYPER110_EL0, 1), + SR_FGT(SYS_AMEVTYPER1_EL0(9), HAFGRTR, AMEVTYPER19_EL0, 1), + SR_FGT(SYS_AMEVTYPER1_EL0(8), HAFGRTR, AMEVTYPER18_EL0, 1), + SR_FGT(SYS_AMEVTYPER1_EL0(7), HAFGRTR, AMEVTYPER17_EL0, 1), + SR_FGT(SYS_AMEVTYPER1_EL0(6), HAFGRTR, AMEVTYPER16_EL0, 1), + SR_FGT(SYS_AMEVTYPER1_EL0(5), HAFGRTR, AMEVTYPER15_EL0, 1), + SR_FGT(SYS_AMEVTYPER1_EL0(4), HAFGRTR, AMEVTYPER14_EL0, 1), + SR_FGT(SYS_AMEVTYPER1_EL0(3), HAFGRTR, AMEVTYPER13_EL0, 1), + SR_FGT(SYS_AMEVTYPER1_EL0(2), HAFGRTR, AMEVTYPER12_EL0, 1), + SR_FGT(SYS_AMEVTYPER1_EL0(1), HAFGRTR, AMEVTYPER11_EL0, 1), + SR_FGT(SYS_AMEVTYPER1_EL0(0), HAFGRTR, AMEVTYPER10_EL0, 1), + SR_FGT(SYS_AMEVCNTR1_EL0(15), HAFGRTR, AMEVCNTR115_EL0, 1), + SR_FGT(SYS_AMEVCNTR1_EL0(14), HAFGRTR, AMEVCNTR114_EL0, 1), + SR_FGT(SYS_AMEVCNTR1_EL0(13), HAFGRTR, AMEVCNTR113_EL0, 1), + SR_FGT(SYS_AMEVCNTR1_EL0(12), HAFGRTR, AMEVCNTR112_EL0, 1), + SR_FGT(SYS_AMEVCNTR1_EL0(11), HAFGRTR, AMEVCNTR111_EL0, 1), + SR_FGT(SYS_AMEVCNTR1_EL0(10), HAFGRTR, AMEVCNTR110_EL0, 1), + SR_FGT(SYS_AMEVCNTR1_EL0(9), HAFGRTR, AMEVCNTR19_EL0, 1), + SR_FGT(SYS_AMEVCNTR1_EL0(8), HAFGRTR, AMEVCNTR18_EL0, 1), + SR_FGT(SYS_AMEVCNTR1_EL0(7), HAFGRTR, AMEVCNTR17_EL0, 1), + SR_FGT(SYS_AMEVCNTR1_EL0(6), HAFGRTR, AMEVCNTR16_EL0, 1), + SR_FGT(SYS_AMEVCNTR1_EL0(5), HAFGRTR, AMEVCNTR15_EL0, 1), + SR_FGT(SYS_AMEVCNTR1_EL0(4), HAFGRTR, AMEVCNTR14_EL0, 1), + SR_FGT(SYS_AMEVCNTR1_EL0(3), HAFGRTR, AMEVCNTR13_EL0, 1), + SR_FGT(SYS_AMEVCNTR1_EL0(2), HAFGRTR, AMEVCNTR12_EL0, 1), + SR_FGT(SYS_AMEVCNTR1_EL0(1), HAFGRTR, AMEVCNTR11_EL0, 1), + SR_FGT(SYS_AMEVCNTR1_EL0(0), HAFGRTR, AMEVCNTR10_EL0, 1), + SR_FGT(SYS_AMCNTENCLR1_EL0, HAFGRTR, AMCNTEN1, 1), + SR_FGT(SYS_AMCNTENSET1_EL0, HAFGRTR, AMCNTEN1, 1), + SR_FGT(SYS_AMCNTENCLR0_EL0, HAFGRTR, AMCNTEN0, 1), + SR_FGT(SYS_AMCNTENSET0_EL0, HAFGRTR, AMCNTEN0, 1), + SR_FGT(SYS_AMEVCNTR0_EL0(3), HAFGRTR, AMEVCNTR03_EL0, 1), + SR_FGT(SYS_AMEVCNTR0_EL0(2), HAFGRTR, AMEVCNTR02_EL0, 1), + SR_FGT(SYS_AMEVCNTR0_EL0(1), HAFGRTR, AMEVCNTR01_EL0, 1), + SR_FGT(SYS_AMEVCNTR0_EL0(0), HAFGRTR, AMEVCNTR00_EL0, 1), }; static union trap_config get_trap_config(u32 sysreg) @@ -1633,6 +1683,28 @@ static __init void print_nv_trap_error(const struct encoding_to_trap_config *tc, err); } +static u32 encoding_next(u32 encoding) +{ + u8 op0, op1, crn, crm, op2; + + op0 = sys_reg_Op0(encoding); + op1 = sys_reg_Op1(encoding); + crn = sys_reg_CRn(encoding); + crm = sys_reg_CRm(encoding); + op2 = sys_reg_Op2(encoding); + + if (op2 < Op2_mask) + return sys_reg(op0, op1, crn, crm, op2 + 1); + if (crm < CRm_mask) + return sys_reg(op0, op1, crn, crm + 1, 0); + if (crn < CRn_mask) + return sys_reg(op0, op1, crn + 1, 0, 0); + if (op1 < Op1_mask) + return sys_reg(op0, op1 + 1, 0, 0, 0); + + return sys_reg(op0 + 1, 0, 0, 0, 0); +} + int __init populate_nv_trap_config(void) { int ret = 0; @@ -1651,23 +1723,18 @@ int __init populate_nv_trap_config(void) ret = -EINVAL; } - if (cgt->encoding != cgt->end) { - prev = xa_store_range(&sr_forward_xa, - cgt->encoding, cgt->end, - xa_mk_value(cgt->tc.val), - GFP_KERNEL); - } else { - prev = xa_store(&sr_forward_xa, cgt->encoding, + for (u32 enc = cgt->encoding; enc <= cgt->end; enc = encoding_next(enc)) { + prev = xa_store(&sr_forward_xa, enc, xa_mk_value(cgt->tc.val), GFP_KERNEL); if (prev && !xa_is_err(prev)) { ret = -EINVAL; print_nv_trap_error(cgt, "Duplicate CGT", ret); } - } - if (xa_is_err(prev)) { - ret = xa_err(prev); - print_nv_trap_error(cgt, "Failed CGT insertion", ret); + if (xa_is_err(prev)) { + ret = xa_err(prev); + print_nv_trap_error(cgt, "Failed CGT insertion", ret); + } } } @@ -1680,6 +1747,7 @@ int __init populate_nv_trap_config(void) for (int i = 0; i < ARRAY_SIZE(encoding_to_fgt); i++) { const struct encoding_to_trap_config *fgt = &encoding_to_fgt[i]; union trap_config tc; + void *prev; if (fgt->tc.fgt >= __NR_FGT_GROUP_IDS__) { ret = -EINVAL; @@ -1694,8 +1762,13 @@ int __init populate_nv_trap_config(void) } tc.val |= fgt->tc.val; - xa_store(&sr_forward_xa, fgt->encoding, - xa_mk_value(tc.val), GFP_KERNEL); + prev = xa_store(&sr_forward_xa, fgt->encoding, + xa_mk_value(tc.val), GFP_KERNEL); + + if (xa_is_err(prev)) { + ret = xa_err(prev); + print_nv_trap_error(fgt, "Failed FGT insertion", ret); + } } kvm_info("nv: %ld fine grained trap handlers\n", @@ -1721,6 +1794,38 @@ int __init populate_nv_trap_config(void) return ret; } +int __init populate_sysreg_config(const struct sys_reg_desc *sr, + unsigned int idx) +{ + union trap_config tc; + u32 encoding; + void *ret; + + /* + * 0 is a valid value for the index, but not for the storage. + * We'll store (idx+1), so check against an offset'd limit. + */ + if (idx >= (BIT(TC_SRI_BITS) - 1)) { + kvm_err("sysreg %s (%d) out of range\n", sr->name, idx); + return -EINVAL; + } + + encoding = sys_reg(sr->Op0, sr->Op1, sr->CRn, sr->CRm, sr->Op2); + tc = get_trap_config(encoding); + + if (tc.sri) { + kvm_err("sysreg %s (%d) duplicate entry (%d)\n", + sr->name, idx - 1, tc.sri); + return -EINVAL; + } + + tc.sri = idx + 1; + ret = xa_store(&sr_forward_xa, encoding, + xa_mk_value(tc.val), GFP_KERNEL); + + return xa_err(ret); +} + static enum trap_behaviour get_behaviour(struct kvm_vcpu *vcpu, const struct trap_bits *tb) { @@ -1768,20 +1873,64 @@ static enum trap_behaviour compute_trap_behaviour(struct kvm_vcpu *vcpu, return __compute_trap_behaviour(vcpu, tc.cgt, b); } -static bool check_fgt_bit(u64 val, const union trap_config tc) +static u64 kvm_get_sysreg_res0(struct kvm *kvm, enum vcpu_sysreg sr) { - return ((val >> tc.bit) & 1) == tc.pol; + struct kvm_sysreg_masks *masks; + + /* Only handle the VNCR-backed regs for now */ + if (sr < __VNCR_START__) + return 0; + + masks = kvm->arch.sysreg_masks; + + return masks->mask[sr - __VNCR_START__].res0; } -#define sanitised_sys_reg(vcpu, reg) \ - ({ \ - u64 __val; \ - __val = __vcpu_sys_reg(vcpu, reg); \ - __val &= ~__ ## reg ## _RES0; \ - (__val); \ - }) +static bool check_fgt_bit(struct kvm *kvm, bool is_read, + u64 val, const union trap_config tc) +{ + enum vcpu_sysreg sr; + + if (tc.pol) + return (val & BIT(tc.bit)); -bool __check_nv_sr_forward(struct kvm_vcpu *vcpu) + /* + * FGTs with negative polarities are an absolute nightmare, as + * we need to evaluate the bit in the light of the feature + * that defines it. WTF were they thinking? + * + * So let's check if the bit has been earmarked as RES0, as + * this indicates an unimplemented feature. + */ + if (val & BIT(tc.bit)) + return false; + + switch ((enum fgt_group_id)tc.fgt) { + case HFGxTR_GROUP: + sr = is_read ? HFGRTR_EL2 : HFGWTR_EL2; + break; + + case HDFGRTR_GROUP: + sr = is_read ? HDFGRTR_EL2 : HDFGWTR_EL2; + break; + + case HAFGRTR_GROUP: + sr = HAFGRTR_EL2; + break; + + case HFGITR_GROUP: + sr = HFGITR_EL2; + break; + + default: + WARN_ONCE(1, "Unhandled FGT group"); + return false; + } + + return !(kvm_get_sysreg_res0(kvm, sr) & BIT(tc.bit)); +} + +bool triage_sysreg_trap(struct kvm_vcpu *vcpu, int *sr_index) { union trap_config tc; enum trap_behaviour b; @@ -1789,9 +1938,6 @@ bool __check_nv_sr_forward(struct kvm_vcpu *vcpu) u32 sysreg; u64 esr, val; - if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu)) - return false; - esr = kvm_vcpu_get_esr(vcpu); sysreg = esr_sys64_to_sysreg(esr); is_read = (esr & ESR_ELx_SYS64_ISS_DIR_MASK) == ESR_ELx_SYS64_ISS_DIR_READ; @@ -1802,13 +1948,27 @@ bool __check_nv_sr_forward(struct kvm_vcpu *vcpu) * A value of 0 for the whole entry means that we know nothing * for this sysreg, and that it cannot be re-injected into the * nested hypervisor. In this situation, let's cut it short. - * - * Note that ultimately, we could also make use of the xarray - * to store the index of the sysreg in the local descriptor - * array, avoiding another search... Hint, hint... */ if (!tc.val) - return false; + goto local; + + /* + * If a sysreg can be trapped using a FGT, first check whether we + * trap for the purpose of forbidding the feature. In that case, + * inject an UNDEF. + */ + if (tc.fgt != __NO_FGT_GROUP__ && + (vcpu->kvm->arch.fgu[tc.fgt] & BIT(tc.bit))) { + kvm_inject_undefined(vcpu); + return true; + } + + /* + * If we're not nesting, immediately return to the caller, with the + * sysreg index, should we have it. + */ + if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu)) + goto local; switch ((enum fgt_group_id)tc.fgt) { case __NO_FGT_GROUP__: @@ -1816,21 +1976,24 @@ bool __check_nv_sr_forward(struct kvm_vcpu *vcpu) case HFGxTR_GROUP: if (is_read) - val = sanitised_sys_reg(vcpu, HFGRTR_EL2); + val = __vcpu_sys_reg(vcpu, HFGRTR_EL2); else - val = sanitised_sys_reg(vcpu, HFGWTR_EL2); + val = __vcpu_sys_reg(vcpu, HFGWTR_EL2); break; case HDFGRTR_GROUP: - case HDFGWTR_GROUP: if (is_read) - val = sanitised_sys_reg(vcpu, HDFGRTR_EL2); + val = __vcpu_sys_reg(vcpu, HDFGRTR_EL2); else - val = sanitised_sys_reg(vcpu, HDFGWTR_EL2); + val = __vcpu_sys_reg(vcpu, HDFGWTR_EL2); + break; + + case HAFGRTR_GROUP: + val = __vcpu_sys_reg(vcpu, HAFGRTR_EL2); break; case HFGITR_GROUP: - val = sanitised_sys_reg(vcpu, HFGITR_EL2); + val = __vcpu_sys_reg(vcpu, HFGITR_EL2); switch (tc.fgf) { u64 tmp; @@ -1838,7 +2001,7 @@ bool __check_nv_sr_forward(struct kvm_vcpu *vcpu) break; case HCRX_FGTnXS: - tmp = sanitised_sys_reg(vcpu, HCRX_EL2); + tmp = __vcpu_sys_reg(vcpu, HCRX_EL2); if (tmp & HCRX_EL2_FGTnXS) tc.fgt = __NO_FGT_GROUP__; } @@ -1847,10 +2010,11 @@ bool __check_nv_sr_forward(struct kvm_vcpu *vcpu) case __NR_FGT_GROUP_IDS__: /* Something is really wrong, bail out */ WARN_ONCE(1, "__NR_FGT_GROUP_IDS__"); - return false; + goto local; } - if (tc.fgt != __NO_FGT_GROUP__ && check_fgt_bit(val, tc)) + if (tc.fgt != __NO_FGT_GROUP__ && check_fgt_bit(vcpu->kvm, is_read, + val, tc)) goto inject; b = compute_trap_behaviour(vcpu, tc); @@ -1859,6 +2023,26 @@ bool __check_nv_sr_forward(struct kvm_vcpu *vcpu) ((b & BEHAVE_FORWARD_WRITE) && !is_read)) goto inject; +local: + if (!tc.sri) { + struct sys_reg_params params; + + params = esr_sys64_to_params(esr); + + /* + * Check for the IMPDEF range, as per DDI0487 J.a, + * D18.3.2 Reserved encodings for IMPLEMENTATION + * DEFINED registers. + */ + if (!(params.Op0 == 3 && (params.CRn & 0b1011) == 0b1011)) + print_sys_reg_msg(¶ms, + "Unsupported guest access at: %lx\n", + *vcpu_pc(vcpu)); + kvm_inject_undefined(vcpu); + return true; + } + + *sr_index = tc.sri - 1; return false; inject: diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 2b6c9724d55fdd3ee99b21f9234cd0c1fcbfe941..2d9c7ca9905a59eb4208d79b240af182d143f284 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -78,102 +78,141 @@ static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) clr |= ~hfg & __ ## reg ## _nMASK; \ } while(0) +#define reg_to_fgt_group_id(reg) \ + ({ \ + enum fgt_group_id id; \ + switch(reg) { \ + case HFGRTR_EL2: \ + case HFGWTR_EL2: \ + id = HFGxTR_GROUP; \ + break; \ + case HFGITR_EL2: \ + id = HFGITR_GROUP; \ + break; \ + case HDFGRTR_EL2: \ + case HDFGWTR_EL2: \ + id = HDFGRTR_GROUP; \ + break; \ + case HAFGRTR_EL2: \ + id = HAFGRTR_GROUP; \ + break; \ + default: \ + BUILD_BUG_ON(1); \ + } \ + \ + id; \ + }) + +#define compute_undef_clr_set(vcpu, kvm, reg, clr, set) \ + do { \ + u64 hfg = kvm->arch.fgu[reg_to_fgt_group_id(reg)]; \ + set |= hfg & __ ## reg ## _MASK; \ + clr |= hfg & __ ## reg ## _nMASK; \ + } while(0) -static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu) -{ - struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt); - u64 r_clr = 0, w_clr = 0, r_set = 0, w_set = 0, tmp; - u64 r_val, w_val; - - if (!cpus_have_final_cap(ARM64_HAS_FGT)) - return; - - ctxt_sys_reg(hctxt, HFGRTR_EL2) = read_sysreg_s(SYS_HFGRTR_EL2); - ctxt_sys_reg(hctxt, HFGWTR_EL2) = read_sysreg_s(SYS_HFGWTR_EL2); +#define update_fgt_traps_cs(hctxt, vcpu, kvm, reg, clr, set) \ + do { \ + u64 c = 0, s = 0; \ + \ + ctxt_sys_reg(hctxt, reg) = read_sysreg_s(SYS_ ## reg); \ + if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) \ + compute_clr_set(vcpu, reg, c, s); \ + \ + compute_undef_clr_set(vcpu, kvm, reg, c, s); \ + \ + s |= set; \ + c |= clr; \ + if (c || s) { \ + u64 val = __ ## reg ## _nMASK; \ + val |= s; \ + val &= ~c; \ + write_sysreg_s(val, SYS_ ## reg); \ + } \ + } while(0) - if (cpus_have_final_cap(ARM64_SME)) { - tmp = HFGxTR_EL2_nSMPRI_EL1_MASK | HFGxTR_EL2_nTPIDR2_EL0_MASK; +#define update_fgt_traps(hctxt, vcpu, kvm, reg) \ + update_fgt_traps_cs(hctxt, vcpu, kvm, reg, 0, 0) - r_clr |= tmp; - w_clr |= tmp; - } - - /* - * Trap guest writes to TCR_EL1 to prevent it from enabling HA or HD. - */ - if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38)) - w_set |= HFGxTR_EL2_TCR_EL1_MASK; +/* + * Validate the fine grain trap masks. + * Check that the masks do not overlap and that all bits are accounted for. + */ +#define CHECK_FGT_MASKS(reg) \ + do { \ + BUILD_BUG_ON((__ ## reg ## _MASK) & (__ ## reg ## _nMASK)); \ + BUILD_BUG_ON(~((__ ## reg ## _RES0) ^ (__ ## reg ## _MASK) ^ \ + (__ ## reg ## _nMASK))); \ + } while(0) - if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) { - compute_clr_set(vcpu, HFGRTR_EL2, r_clr, r_set); - compute_clr_set(vcpu, HFGWTR_EL2, w_clr, w_set); - } +static inline bool cpu_has_amu(void) +{ + u64 pfr0 = read_sysreg_s(SYS_ID_AA64PFR0_EL1); - /* The default is not to trap anything but ACCDATA_EL1 */ - r_val = __HFGRTR_EL2_nMASK & ~HFGxTR_EL2_nACCDATA_EL1; - r_val |= r_set; - r_val &= ~r_clr; + return cpuid_feature_extract_unsigned_field(pfr0, + ID_AA64PFR0_EL1_AMU_SHIFT); +} - w_val = __HFGWTR_EL2_nMASK & ~HFGxTR_EL2_nACCDATA_EL1; - w_val |= w_set; - w_val &= ~w_clr; +static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt); + struct kvm *kvm = kern_hyp_va(vcpu->kvm); - write_sysreg_s(r_val, SYS_HFGRTR_EL2); - write_sysreg_s(w_val, SYS_HFGWTR_EL2); + CHECK_FGT_MASKS(HFGRTR_EL2); + CHECK_FGT_MASKS(HFGWTR_EL2); + CHECK_FGT_MASKS(HFGITR_EL2); + CHECK_FGT_MASKS(HDFGRTR_EL2); + CHECK_FGT_MASKS(HDFGWTR_EL2); + CHECK_FGT_MASKS(HAFGRTR_EL2); + CHECK_FGT_MASKS(HCRX_EL2); - if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu)) + if (!cpus_have_final_cap(ARM64_HAS_FGT)) return; - ctxt_sys_reg(hctxt, HFGITR_EL2) = read_sysreg_s(SYS_HFGITR_EL2); - - r_set = r_clr = 0; - compute_clr_set(vcpu, HFGITR_EL2, r_clr, r_set); - r_val = __HFGITR_EL2_nMASK; - r_val |= r_set; - r_val &= ~r_clr; - - write_sysreg_s(r_val, SYS_HFGITR_EL2); - - ctxt_sys_reg(hctxt, HDFGRTR_EL2) = read_sysreg_s(SYS_HDFGRTR_EL2); - ctxt_sys_reg(hctxt, HDFGWTR_EL2) = read_sysreg_s(SYS_HDFGWTR_EL2); + update_fgt_traps(hctxt, vcpu, kvm, HFGRTR_EL2); + update_fgt_traps_cs(hctxt, vcpu, kvm, HFGWTR_EL2, 0, + cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38) ? + HFGxTR_EL2_TCR_EL1_MASK : 0); + update_fgt_traps(hctxt, vcpu, kvm, HFGITR_EL2); + update_fgt_traps(hctxt, vcpu, kvm, HDFGRTR_EL2); + update_fgt_traps(hctxt, vcpu, kvm, HDFGWTR_EL2); - r_clr = r_set = w_clr = w_set = 0; - - compute_clr_set(vcpu, HDFGRTR_EL2, r_clr, r_set); - compute_clr_set(vcpu, HDFGWTR_EL2, w_clr, w_set); - - r_val = __HDFGRTR_EL2_nMASK; - r_val |= r_set; - r_val &= ~r_clr; - - w_val = __HDFGWTR_EL2_nMASK; - w_val |= w_set; - w_val &= ~w_clr; - - write_sysreg_s(r_val, SYS_HDFGRTR_EL2); - write_sysreg_s(w_val, SYS_HDFGWTR_EL2); + if (cpu_has_amu()) + update_fgt_traps(hctxt, vcpu, kvm, HAFGRTR_EL2); } +#define __deactivate_fgt(htcxt, vcpu, kvm, reg) \ + do { \ + if ((vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) || \ + kvm->arch.fgu[reg_to_fgt_group_id(reg)]) \ + write_sysreg_s(ctxt_sys_reg(hctxt, reg), \ + SYS_ ## reg); \ + } while(0) + static inline void __deactivate_traps_hfgxtr(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt); + struct kvm *kvm = kern_hyp_va(vcpu->kvm); if (!cpus_have_final_cap(ARM64_HAS_FGT)) return; - write_sysreg_s(ctxt_sys_reg(hctxt, HFGRTR_EL2), SYS_HFGRTR_EL2); - write_sysreg_s(ctxt_sys_reg(hctxt, HFGWTR_EL2), SYS_HFGWTR_EL2); - - if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu)) - return; + __deactivate_fgt(hctxt, vcpu, kvm, HFGRTR_EL2); + if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38)) + write_sysreg_s(ctxt_sys_reg(hctxt, HFGWTR_EL2), SYS_HFGWTR_EL2); + else + __deactivate_fgt(hctxt, vcpu, kvm, HFGWTR_EL2); + __deactivate_fgt(hctxt, vcpu, kvm, HFGITR_EL2); + __deactivate_fgt(hctxt, vcpu, kvm, HDFGRTR_EL2); + __deactivate_fgt(hctxt, vcpu, kvm, HDFGWTR_EL2); - write_sysreg_s(ctxt_sys_reg(hctxt, HFGITR_EL2), SYS_HFGITR_EL2); - write_sysreg_s(ctxt_sys_reg(hctxt, HDFGRTR_EL2), SYS_HDFGRTR_EL2); - write_sysreg_s(ctxt_sys_reg(hctxt, HDFGWTR_EL2), SYS_HDFGWTR_EL2); + if (cpu_has_amu()) + __deactivate_fgt(hctxt, vcpu, kvm, HAFGRTR_EL2); } static inline void __activate_traps_common(struct kvm_vcpu *vcpu) { + struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt); + /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */ write_sysreg(1 << 15, hstr_el2); @@ -184,11 +223,8 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) * EL1 instead of being trapped to EL2. */ if (kvm_arm_support_pmu_v3()) { - struct kvm_cpu_context *hctxt; - write_sysreg(0, pmselr_el0); - hctxt = host_data_ptr(host_ctxt); ctxt_sys_reg(hctxt, PMUSERENR_EL0) = read_sysreg(pmuserenr_el0); write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); vcpu_set_flag(vcpu, PMUSERENR_ON_CPU); @@ -198,7 +234,7 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); if (cpus_have_final_cap(ARM64_HAS_HCX)) { - u64 hcrx = HCRX_GUEST_FLAGS; + u64 hcrx = vcpu->arch.hcrx_el2; if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) { u64 clr = 0, set = 0; @@ -208,6 +244,7 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) hcrx &= ~clr; } + ctxt_sys_reg(hctxt, HCRX_EL2) = read_sysreg_s(SYS_HCRX_EL2); write_sysreg_s(hcrx, SYS_HCRX_EL2); } @@ -216,19 +253,18 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu) { + struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt); + write_sysreg(*host_data_ptr(host_debug_state.mdcr_el2), mdcr_el2); write_sysreg(0, hstr_el2); if (kvm_arm_support_pmu_v3()) { - struct kvm_cpu_context *hctxt; - - hctxt = host_data_ptr(host_ctxt); write_sysreg(ctxt_sys_reg(hctxt, PMUSERENR_EL0), pmuserenr_el0); vcpu_clear_flag(vcpu, PMUSERENR_ON_CPU); } if (cpus_have_final_cap(ARM64_HAS_HCX)) - write_sysreg_s(HCRX_HOST_FLAGS, SYS_HCRX_EL2); + write_sysreg_s(ctxt_sys_reg(hctxt, HCRX_EL2), SYS_HCRX_EL2); __deactivate_traps_hfgxtr(vcpu); } diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index bb6b571ec627dede466c5fa1d05785a9c9f78764..4be6a7fa007082ac008c83422e9bb69b0f5da324 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -27,16 +27,34 @@ static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt) ctxt_sys_reg(ctxt, TPIDRRO_EL0) = read_sysreg(tpidrro_el0); } -static inline bool ctxt_has_mte(struct kvm_cpu_context *ctxt) +static inline struct kvm_vcpu *ctxt_to_vcpu(struct kvm_cpu_context *ctxt) { struct kvm_vcpu *vcpu = ctxt->__hyp_running_vcpu; if (!vcpu) vcpu = container_of(ctxt, struct kvm_vcpu, arch.ctxt); + return vcpu; +} + +static inline bool ctxt_has_mte(struct kvm_cpu_context *ctxt) +{ + struct kvm_vcpu *vcpu = ctxt_to_vcpu(ctxt); + return kvm_has_mte(kern_hyp_va(vcpu->kvm)); } +static inline bool ctxt_has_s1pie(struct kvm_cpu_context *ctxt) +{ + struct kvm_vcpu *vcpu; + + if (!cpus_have_final_cap(ARM64_HAS_S1PIE)) + return false; + + vcpu = ctxt_to_vcpu(ctxt); + return kvm_has_feat(kern_hyp_va(vcpu->kvm), ID_AA64MMFR3_EL1, S1PIE, IMP); +} + static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) { ctxt_sys_reg(ctxt, SCTLR_EL1) = read_sysreg_el1(SYS_SCTLR); @@ -55,7 +73,7 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) ctxt_sys_reg(ctxt, CONTEXTIDR_EL1) = read_sysreg_el1(SYS_CONTEXTIDR); ctxt_sys_reg(ctxt, AMAIR_EL1) = read_sysreg_el1(SYS_AMAIR); ctxt_sys_reg(ctxt, CNTKCTL_EL1) = read_sysreg_el1(SYS_CNTKCTL); - if (cpus_have_final_cap(ARM64_HAS_S1PIE)) { + if (ctxt_has_s1pie(ctxt)) { ctxt_sys_reg(ctxt, PIR_EL1) = read_sysreg_el1(SYS_PIR); ctxt_sys_reg(ctxt, PIRE0_EL1) = read_sysreg_el1(SYS_PIRE0); } @@ -131,7 +149,7 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) write_sysreg_el1(ctxt_sys_reg(ctxt, CONTEXTIDR_EL1), SYS_CONTEXTIDR); write_sysreg_el1(ctxt_sys_reg(ctxt, AMAIR_EL1), SYS_AMAIR); write_sysreg_el1(ctxt_sys_reg(ctxt, CNTKCTL_EL1), SYS_CNTKCTL); - if (cpus_have_final_cap(ARM64_HAS_S1PIE)) { + if (ctxt_has_s1pie(ctxt)) { write_sysreg_el1(ctxt_sys_reg(ctxt, PIR_EL1), SYS_PIR); write_sysreg_el1(ctxt_sys_reg(ctxt, PIRE0_EL1), SYS_PIRE0); } diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h index 37440e1dda9306f7abde4cd24cc32c0d229b81ce..fec1167f0366c55c487f5af02da54a262862c5c5 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h +++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h @@ -69,6 +69,8 @@ ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SSBS) \ ) +#define PVM_ID_AA64PFR2_ALLOW 0ULL + /* * Allow for protected VMs: * - Mixed-endian @@ -101,6 +103,7 @@ * - Privileged Access Never * - SError interrupt exceptions from speculative reads * - Enhanced Translation Synchronization + * - Control for cache maintenance permission */ #define PVM_ID_AA64MMFR1_ALLOW (\ ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS) | \ @@ -108,7 +111,8 @@ ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HPDS) | \ ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_PAN) | \ ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_SpecSEI) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_ETS) \ + ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_ETS) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_CMOW) \ ) /* @@ -133,6 +137,8 @@ ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_E0PD) \ ) +#define PVM_ID_AA64MMFR3_ALLOW (0ULL) + /* * No support for Scalable Vectors for protected VMs: * Requires additional support from KVM, e.g., context-switching and @@ -178,10 +184,18 @@ ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_RNDR) \ ) +/* Restrict pointer authentication to the basic version. */ +#define PVM_ID_AA64ISAR1_RESTRICT_UNSIGNED (\ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA), ID_AA64ISAR1_EL1_APA_PAuth) | \ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API), ID_AA64ISAR1_EL1_API_PAuth) \ + ) + +#define PVM_ID_AA64ISAR2_RESTRICT_UNSIGNED (\ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3), ID_AA64ISAR2_EL1_APA3_PAuth) \ + ) + #define PVM_ID_AA64ISAR1_ALLOW (\ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_DPB) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API) | \ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_JSCVT) | \ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_FCMA) | \ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_LRCPC) | \ @@ -196,8 +210,8 @@ ) #define PVM_ID_AA64ISAR2_ALLOW (\ - ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) \ + ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_ATS1A)| \ + ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3) \ ) u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id); diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 9d703441278bd72e33cec10adba5b9b0afb145bf..8d0a5834e8830059d43d464c4187ff63990e9770 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -129,8 +129,8 @@ static void prepare_host_vtcr(void) parange = kvm_get_parange(id_aa64mmfr0_el1_sys_val); phys_shift = id_aa64mmfr0_parange_to_phys_shift(parange); - host_mmu.arch.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val, - id_aa64mmfr1_el1_sys_val, phys_shift); + host_mmu.arch.mmu.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val, + id_aa64mmfr1_el1_sys_val, phys_shift); } static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot); @@ -235,7 +235,7 @@ int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd) unsigned long nr_pages; int ret; - nr_pages = kvm_pgtable_stage2_pgd_size(vm->kvm.arch.vtcr) >> PAGE_SHIFT; + nr_pages = kvm_pgtable_stage2_pgd_size(mmu->vtcr) >> PAGE_SHIFT; ret = hyp_pool_init(&vm->pool, hyp_virt_to_pfn(pgd), nr_pages, 0); if (ret) return ret; @@ -295,7 +295,7 @@ int __pkvm_prot_finalize(void) return -EPERM; params->vttbr = kvm_get_vttbr(mmu); - params->vtcr = host_mmu.arch.vtcr; + params->vtcr = mmu->vtcr; params->hcr_el2 |= HCR_VM; /* diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 80fe9378deac9f9099f48d642d4bd3480b4a1c85..93ac7210cfcbbc9b3e1fe2fa6c02100c20daa942 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -116,6 +116,10 @@ static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu) if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_TraceFilt), feature_ids)) mdcr_set |= MDCR_EL2_TTRF; + /* Trap External Trace */ + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_ExtTrcBuff), feature_ids)) + mdcr_clear |= MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT; + vcpu->arch.mdcr_el2 |= mdcr_set; vcpu->arch.mdcr_el2 &= ~mdcr_clear; } @@ -289,7 +293,7 @@ static void init_pkvm_hyp_vm(struct kvm *host_kvm, struct pkvm_hyp_vm *hyp_vm, { hyp_vm->host_kvm = host_kvm; hyp_vm->kvm.created_vcpus = nr_vcpus; - hyp_vm->kvm.arch.vtcr = host_mmu.arch.vtcr; + hyp_vm->kvm.arch.mmu.vtcr = host_mmu.arch.mmu.vtcr; } static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu, @@ -469,7 +473,7 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, } vm_size = pkvm_get_hyp_vm_size(nr_vcpus); - pgd_size = kvm_pgtable_stage2_pgd_size(host_mmu.arch.vtcr); + pgd_size = kvm_pgtable_stage2_pgd_size(host_mmu.arch.mmu.vtcr); ret = -ENOMEM; diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index b0586d79d2e075b0806adc1d5d079ba3c105c8d4..9c20e5ac71ad6832ddc8c0b85eb64fcc14e23011 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -1532,7 +1532,7 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, kvm_pgtable_force_pte_cb_t force_pte_cb) { size_t pgd_sz; - u64 vtcr = mmu->arch->vtcr; + u64 vtcr = mmu->vtcr; u32 ia_bits = VTCR_EL2_IPA(vtcr); u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr); u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0; diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index 3dffe74928ade21b62d32b098efde2d5e46911e4..ba5dbbfd2e7d2eac48a5095a13e1766ed78fd509 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -144,12 +144,10 @@ static bool kvm_smccc_test_fw_bmap(struct kvm_vcpu *vcpu, u32 func_id) ARM_SMCCC_SMC_64, \ 0, ARM_SMCCC_FUNC_MASK) -static void init_smccc_filter(struct kvm *kvm) +static int kvm_smccc_filter_insert_reserved(struct kvm *kvm) { int r; - mt_init(&kvm->arch.smccc_filter); - /* * Prevent userspace from handling any SMCCC calls in the architecture * range, avoiding the risk of misrepresenting Spectre mitigation status @@ -159,14 +157,25 @@ static void init_smccc_filter(struct kvm *kvm) SMC32_ARCH_RANGE_BEGIN, SMC32_ARCH_RANGE_END, xa_mk_value(KVM_SMCCC_FILTER_HANDLE), GFP_KERNEL_ACCOUNT); - WARN_ON_ONCE(r); + if (r) + goto out_destroy; r = mtree_insert_range(&kvm->arch.smccc_filter, SMC64_ARCH_RANGE_BEGIN, SMC64_ARCH_RANGE_END, xa_mk_value(KVM_SMCCC_FILTER_HANDLE), GFP_KERNEL_ACCOUNT); - WARN_ON_ONCE(r); + if (r) + goto out_destroy; + return 0; +out_destroy: + mtree_destroy(&kvm->arch.smccc_filter); + return r; +} + +static bool kvm_smccc_filter_configured(struct kvm *kvm) +{ + return !mtree_empty(&kvm->arch.smccc_filter); } static int kvm_smccc_set_filter(struct kvm *kvm, struct kvm_smccc_filter __user *uaddr) @@ -195,13 +204,14 @@ static int kvm_smccc_set_filter(struct kvm *kvm, struct kvm_smccc_filter __user goto out_unlock; } + if (!kvm_smccc_filter_configured(kvm)) { + r = kvm_smccc_filter_insert_reserved(kvm); + if (WARN_ON_ONCE(r)) + goto out_unlock; + } + r = mtree_insert_range(&kvm->arch.smccc_filter, start, end, xa_mk_value(filter.action), GFP_KERNEL_ACCOUNT); - if (r) - goto out_unlock; - - set_bit(KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED, &kvm->arch.flags); - out_unlock: mutex_unlock(&kvm->arch.config_lock); return r; @@ -212,7 +222,7 @@ static u8 kvm_smccc_filter_get_action(struct kvm *kvm, u32 func_id) unsigned long idx = func_id; void *val; - if (!test_bit(KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED, &kvm->arch.flags)) + if (!kvm_smccc_filter_configured(kvm)) return KVM_SMCCC_FILTER_HANDLE; /* @@ -428,7 +438,7 @@ void kvm_arm_init_hypercalls(struct kvm *kvm) smccc_feat->std_hyp_bmap = KVM_ARM_SMCCC_STD_HYP_FEATURES; smccc_feat->vendor_hyp_bmap = KVM_ARM_SMCCC_VENDOR_HYP_FEATURES; - init_smccc_filter(kvm); + mt_init(&kvm->arch.smccc_filter); } void kvm_arm_teardown_hypercalls(struct kvm *kvm) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 61441564cfcc72cb461cd43f00e09d39d02e53ff..ce9fea4d61b3a225c1fd6056da8272d0f6cbc775 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -891,7 +891,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); - kvm->arch.vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift); + mmu->vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift); if (mmu->pgt != NULL) { kvm_err("kvm_arch already initialized?\n"); @@ -1066,7 +1066,8 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, phys_addr_t addr; int ret = 0; struct kvm_mmu_memory_cache cache = { .gfp_zero = __GFP_ZERO }; - struct kvm_pgtable *pgt = kvm->arch.mmu.pgt; + struct kvm_s2_mmu *mmu = &kvm->arch.mmu; + struct kvm_pgtable *pgt = mmu->pgt; enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_DEVICE | KVM_PGTABLE_PROT_R | (writable ? KVM_PGTABLE_PROT_W : 0); @@ -1079,7 +1080,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, for (addr = guest_ipa; addr < guest_ipa + size; addr += PAGE_SIZE) { ret = kvm_mmu_topup_memory_cache(&cache, - kvm_mmu_cache_min_pages(kvm)); + kvm_mmu_cache_min_pages(mmu)); if (ret) break; @@ -1443,7 +1444,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (fault_status != ESR_ELx_FSC_PERM || (logging_active && write_fault)) { ret = kvm_mmu_topup_memory_cache(memcache, - kvm_mmu_cache_min_pages(kvm)); + kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu)); if (ret) return ret; } @@ -1790,7 +1791,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) } /* Userspace should not be able to register out-of-bounds IPAs */ - VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->kvm)); + VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->arch.hw_mmu)); if (fault_status == ESR_ELx_FSC_ACCESS) { handle_access_fault(vcpu, fault_ipa); @@ -2066,7 +2067,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, * Prevent userspace from creating a memory region outside of the IPA * space addressable by the KVM guest IPA space. */ - if ((new->base_gfn + new->npages) > (kvm_phys_size(kvm) >> PAGE_SHIFT)) + if ((new->base_gfn + new->npages) > (kvm_phys_size(&kvm->arch.mmu) >> PAGE_SHIFT)) return -EFAULT; hva = new->userspace_addr; diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index d55e809e26cb377e736621c681619b37df66d69f..ced30c90521a02713a4e0e06c1d7b1430df55ea6 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -163,15 +163,280 @@ static u64 limit_nv_id_reg(u32 id, u64 val) return val; } + +u64 kvm_vcpu_sanitise_vncr_reg(const struct kvm_vcpu *vcpu, enum vcpu_sysreg sr) +{ + u64 v = ctxt_sys_reg(&vcpu->arch.ctxt, sr); + struct kvm_sysreg_masks *masks; + + masks = vcpu->kvm->arch.sysreg_masks; + + if (masks) { + sr -= __VNCR_START__; + + v &= ~masks->mask[sr].res0; + v |= masks->mask[sr].res1; + } + + return v; +} + +static void set_sysreg_masks(struct kvm *kvm, int sr, u64 res0, u64 res1) +{ + int i = sr - __VNCR_START__; + + kvm->arch.sysreg_masks->mask[i].res0 = res0; + kvm->arch.sysreg_masks->mask[i].res1 = res1; +} + int kvm_init_nv_sysregs(struct kvm *kvm) { + u64 res0, res1; + int ret = 0; + mutex_lock(&kvm->arch.config_lock); + if (kvm->arch.sysreg_masks) + goto out; + + kvm->arch.sysreg_masks = kzalloc(sizeof(*(kvm->arch.sysreg_masks)), + GFP_KERNEL); + if (!kvm->arch.sysreg_masks) { + ret = -ENOMEM; + goto out; + } + for (int i = 0; i < KVM_ARM_ID_REG_NUM; i++) kvm->arch.id_regs[i] = limit_nv_id_reg(IDX_IDREG(i), kvm->arch.id_regs[i]); + /* VTTBR_EL2 */ + res0 = res1 = 0; + if (!kvm_has_feat_enum(kvm, ID_AA64MMFR1_EL1, VMIDBits, 16)) + res0 |= GENMASK(63, 56); + if (!kvm_has_feat(kvm, ID_AA64MMFR2_EL1, CnP, IMP)) + res0 |= VTTBR_CNP_BIT; + set_sysreg_masks(kvm, VTTBR_EL2, res0, res1); + + /* VTCR_EL2 */ + res0 = GENMASK(63, 32) | GENMASK(30, 20); + res1 = BIT(31); + set_sysreg_masks(kvm, VTCR_EL2, res0, res1); + + /* VMPIDR_EL2 */ + res0 = GENMASK(63, 40) | GENMASK(30, 24); + res1 = BIT(31); + set_sysreg_masks(kvm, VMPIDR_EL2, res0, res1); + + /* HCR_EL2 */ + res0 = BIT(48); + res1 = HCR_RW; + if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, TWED, IMP)) + res0 |= GENMASK(63, 59); + if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, MTE, MTE2)) + res0 |= (HCR_TID5 | HCR_DCT | HCR_ATA); + if (!kvm_has_feat(kvm, ID_AA64MMFR2_EL1, EVT, TTLBxS)) + res0 |= (HCR_TTLBIS | HCR_TTLBOS); + if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, CSV2, CSV2_2) && + !kvm_has_feat(kvm, ID_AA64PFR1_EL1, CSV2_frac, CSV2_1p2)) + res0 |= HCR_ENSCXT; + if (!kvm_has_feat(kvm, ID_AA64MMFR2_EL1, EVT, IMP)) + res0 |= (HCR_TOCU | HCR_TICAB | HCR_TID4); + if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, AMU, V1P1)) + res0 |= HCR_AMVOFFEN; + if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, RAS, V1P1)) + res0 |= HCR_FIEN; + if (!kvm_has_feat(kvm, ID_AA64MMFR2_EL1, FWB, IMP)) + res0 |= HCR_FWB; + if (!kvm_has_feat(kvm, ID_AA64MMFR2_EL1, NV, NV2)) + res0 |= HCR_NV2; + if (!kvm_has_feat(kvm, ID_AA64MMFR2_EL1, NV, IMP)) + res0 |= (HCR_AT | HCR_NV1 | HCR_NV); + if (!(__vcpu_has_feature(&kvm->arch, KVM_ARM_VCPU_PTRAUTH_ADDRESS) && + __vcpu_has_feature(&kvm->arch, KVM_ARM_VCPU_PTRAUTH_GENERIC))) + res0 |= (HCR_API | HCR_APK); + if (!kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TME, IMP)) + res0 |= BIT(39); + if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, RAS, IMP)) + res0 |= (HCR_TEA | HCR_TERR); + if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, LO, IMP)) + res0 |= HCR_TLOR; + if (!kvm_has_feat(kvm, ID_AA64MMFR4_EL1, E2H0, IMP)) + res1 |= HCR_E2H; + set_sysreg_masks(kvm, HCR_EL2, res0, res1); + + /* HCRX_EL2 */ + res0 = HCRX_EL2_RES0; + res1 = HCRX_EL2_RES1; + if (!kvm_has_feat(kvm, ID_AA64ISAR3_EL1, PACM, TRIVIAL_IMP)) + res0 |= HCRX_EL2_PACMEn; + if (!kvm_has_feat(kvm, ID_AA64PFR2_EL1, FPMR, IMP)) + res0 |= HCRX_EL2_EnFPM; + if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, GCS, IMP)) + res0 |= HCRX_EL2_GCSEn; + if (!kvm_has_feat(kvm, ID_AA64ISAR2_EL1, SYSREG_128, IMP)) + res0 |= HCRX_EL2_EnIDCP128; + if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, ADERR, DEV_ASYNC)) + res0 |= (HCRX_EL2_EnSDERR | HCRX_EL2_EnSNERR); + if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, DF2, IMP)) + res0 |= HCRX_EL2_TMEA; + if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, D128, IMP)) + res0 |= HCRX_EL2_D128En; + if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, THE, IMP)) + res0 |= HCRX_EL2_PTTWI; + if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, SCTLRX, IMP)) + res0 |= HCRX_EL2_SCTLR2En; + if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, TCRX, IMP)) + res0 |= HCRX_EL2_TCR2En; + if (!kvm_has_feat(kvm, ID_AA64ISAR2_EL1, MOPS, IMP)) + res0 |= (HCRX_EL2_MSCEn | HCRX_EL2_MCE2); + if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, CMOW, IMP)) + res0 |= HCRX_EL2_CMOW; + if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, NMI, IMP)) + res0 |= (HCRX_EL2_VFNMI | HCRX_EL2_VINMI | HCRX_EL2_TALLINT); + if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, SME, IMP) || + !(read_sysreg_s(SYS_SMIDR_EL1) & SMIDR_EL1_SMPS)) + res0 |= HCRX_EL2_SMPME; + if (!kvm_has_feat(kvm, ID_AA64ISAR1_EL1, XS, IMP)) + res0 |= (HCRX_EL2_FGTnXS | HCRX_EL2_FnXS); + if (!kvm_has_feat(kvm, ID_AA64ISAR1_EL1, LS64, LS64_V)) + res0 |= HCRX_EL2_EnASR; + if (!kvm_has_feat(kvm, ID_AA64ISAR1_EL1, LS64, LS64)) + res0 |= HCRX_EL2_EnALS; + if (!kvm_has_feat(kvm, ID_AA64ISAR1_EL1, LS64, LS64_ACCDATA)) + res0 |= HCRX_EL2_EnAS0; + set_sysreg_masks(kvm, HCRX_EL2, res0, res1); + + /* HFG[RW]TR_EL2 */ + res0 = res1 = 0; + if (!(__vcpu_has_feature(&kvm->arch, KVM_ARM_VCPU_PTRAUTH_ADDRESS) && + __vcpu_has_feature(&kvm->arch, KVM_ARM_VCPU_PTRAUTH_GENERIC))) + res0 |= (HFGxTR_EL2_APDAKey | HFGxTR_EL2_APDBKey | + HFGxTR_EL2_APGAKey | HFGxTR_EL2_APIAKey | + HFGxTR_EL2_APIBKey); + if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, LO, IMP)) + res0 |= (HFGxTR_EL2_LORC_EL1 | HFGxTR_EL2_LOREA_EL1 | + HFGxTR_EL2_LORID_EL1 | HFGxTR_EL2_LORN_EL1 | + HFGxTR_EL2_LORSA_EL1); + if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, CSV2, CSV2_2) && + !kvm_has_feat(kvm, ID_AA64PFR1_EL1, CSV2_frac, CSV2_1p2)) + res0 |= (HFGxTR_EL2_SCXTNUM_EL1 | HFGxTR_EL2_SCXTNUM_EL0); + if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, GIC, IMP)) + res0 |= HFGxTR_EL2_ICC_IGRPENn_EL1; + if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, RAS, IMP)) + res0 |= (HFGxTR_EL2_ERRIDR_EL1 | HFGxTR_EL2_ERRSELR_EL1 | + HFGxTR_EL2_ERXFR_EL1 | HFGxTR_EL2_ERXCTLR_EL1 | + HFGxTR_EL2_ERXSTATUS_EL1 | HFGxTR_EL2_ERXMISCn_EL1 | + HFGxTR_EL2_ERXPFGF_EL1 | HFGxTR_EL2_ERXPFGCTL_EL1 | + HFGxTR_EL2_ERXPFGCDN_EL1 | HFGxTR_EL2_ERXADDR_EL1); + if (!kvm_has_feat(kvm, ID_AA64ISAR1_EL1, LS64, LS64_ACCDATA)) + res0 |= HFGxTR_EL2_nACCDATA_EL1; + if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, GCS, IMP)) + res0 |= (HFGxTR_EL2_nGCS_EL0 | HFGxTR_EL2_nGCS_EL1); + if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, SME, IMP)) + res0 |= (HFGxTR_EL2_nSMPRI_EL1 | HFGxTR_EL2_nTPIDR2_EL0); + if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, THE, IMP)) + res0 |= HFGxTR_EL2_nRCWMASK_EL1; + if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S1PIE, IMP)) + res0 |= (HFGxTR_EL2_nPIRE0_EL1 | HFGxTR_EL2_nPIR_EL1); + if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S1POE, IMP)) + res0 |= (HFGxTR_EL2_nPOR_EL0 | HFGxTR_EL2_nPOR_EL1); + if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S2POE, IMP)) + res0 |= HFGxTR_EL2_nS2POR_EL1; + if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, AIE, IMP)) + res0 |= (HFGxTR_EL2_nMAIR2_EL1 | HFGxTR_EL2_nAMAIR2_EL1); + set_sysreg_masks(kvm, HFGRTR_EL2, res0 | __HFGRTR_EL2_RES0, res1); + set_sysreg_masks(kvm, HFGWTR_EL2, res0 | __HFGWTR_EL2_RES0, res1); + + /* HDFG[RW]TR_EL2 */ + res0 = res1 = 0; + if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, DoubleLock, IMP)) + res0 |= HDFGRTR_EL2_OSDLR_EL1; + if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMUVer, IMP)) + res0 |= (HDFGRTR_EL2_PMEVCNTRn_EL0 | HDFGRTR_EL2_PMEVTYPERn_EL0 | + HDFGRTR_EL2_PMCCFILTR_EL0 | HDFGRTR_EL2_PMCCNTR_EL0 | + HDFGRTR_EL2_PMCNTEN | HDFGRTR_EL2_PMINTEN | + HDFGRTR_EL2_PMOVS | HDFGRTR_EL2_PMSELR_EL0 | + HDFGRTR_EL2_PMMIR_EL1 | HDFGRTR_EL2_PMUSERENR_EL0 | + HDFGRTR_EL2_PMCEIDn_EL0); + if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMSVer, IMP)) + res0 |= (HDFGRTR_EL2_PMBLIMITR_EL1 | HDFGRTR_EL2_PMBPTR_EL1 | + HDFGRTR_EL2_PMBSR_EL1 | HDFGRTR_EL2_PMSCR_EL1 | + HDFGRTR_EL2_PMSEVFR_EL1 | HDFGRTR_EL2_PMSFCR_EL1 | + HDFGRTR_EL2_PMSICR_EL1 | HDFGRTR_EL2_PMSIDR_EL1 | + HDFGRTR_EL2_PMSIRR_EL1 | HDFGRTR_EL2_PMSLATFR_EL1 | + HDFGRTR_EL2_PMBIDR_EL1); + if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceVer, IMP)) + res0 |= (HDFGRTR_EL2_TRC | HDFGRTR_EL2_TRCAUTHSTATUS | + HDFGRTR_EL2_TRCAUXCTLR | HDFGRTR_EL2_TRCCLAIM | + HDFGRTR_EL2_TRCCNTVRn | HDFGRTR_EL2_TRCID | + HDFGRTR_EL2_TRCIMSPECn | HDFGRTR_EL2_TRCOSLSR | + HDFGRTR_EL2_TRCPRGCTLR | HDFGRTR_EL2_TRCSEQSTR | + HDFGRTR_EL2_TRCSSCSRn | HDFGRTR_EL2_TRCSTATR | + HDFGRTR_EL2_TRCVICTLR); + if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceBuffer, IMP)) + res0 |= (HDFGRTR_EL2_TRBBASER_EL1 | HDFGRTR_EL2_TRBIDR_EL1 | + HDFGRTR_EL2_TRBLIMITR_EL1 | HDFGRTR_EL2_TRBMAR_EL1 | + HDFGRTR_EL2_TRBPTR_EL1 | HDFGRTR_EL2_TRBSR_EL1 | + HDFGRTR_EL2_TRBTRG_EL1); + if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, BRBE, IMP)) + res0 |= (HDFGRTR_EL2_nBRBIDR | HDFGRTR_EL2_nBRBCTL | + HDFGRTR_EL2_nBRBDATA); + if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMSVer, V1P2)) + res0 |= HDFGRTR_EL2_nPMSNEVFR_EL1; + set_sysreg_masks(kvm, HDFGRTR_EL2, res0 | HDFGRTR_EL2_RES0, res1); + + /* Reuse the bits from the read-side and add the write-specific stuff */ + if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMUVer, IMP)) + res0 |= (HDFGWTR_EL2_PMCR_EL0 | HDFGWTR_EL2_PMSWINC_EL0); + if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceVer, IMP)) + res0 |= HDFGWTR_EL2_TRCOSLAR; + if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceFilt, IMP)) + res0 |= HDFGWTR_EL2_TRFCR_EL1; + set_sysreg_masks(kvm, HFGWTR_EL2, res0 | HDFGWTR_EL2_RES0, res1); + + /* HFGITR_EL2 */ + res0 = HFGITR_EL2_RES0; + res1 = HFGITR_EL2_RES1; + if (!kvm_has_feat(kvm, ID_AA64ISAR1_EL1, DPB, DPB2)) + res0 |= HFGITR_EL2_DCCVADP; + if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, PAN, PAN2)) + res0 |= (HFGITR_EL2_ATS1E1RP | HFGITR_EL2_ATS1E1WP); + if (!kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, OS)) + res0 |= (HFGITR_EL2_TLBIRVAALE1OS | HFGITR_EL2_TLBIRVALE1OS | + HFGITR_EL2_TLBIRVAAE1OS | HFGITR_EL2_TLBIRVAE1OS | + HFGITR_EL2_TLBIVAALE1OS | HFGITR_EL2_TLBIVALE1OS | + HFGITR_EL2_TLBIVAAE1OS | HFGITR_EL2_TLBIASIDE1OS | + HFGITR_EL2_TLBIVAE1OS | HFGITR_EL2_TLBIVMALLE1OS); + if (!kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, RANGE)) + res0 |= (HFGITR_EL2_TLBIRVAALE1 | HFGITR_EL2_TLBIRVALE1 | + HFGITR_EL2_TLBIRVAAE1 | HFGITR_EL2_TLBIRVAE1 | + HFGITR_EL2_TLBIRVAALE1IS | HFGITR_EL2_TLBIRVALE1IS | + HFGITR_EL2_TLBIRVAAE1IS | HFGITR_EL2_TLBIRVAE1IS | + HFGITR_EL2_TLBIRVAALE1OS | HFGITR_EL2_TLBIRVALE1OS | + HFGITR_EL2_TLBIRVAAE1OS | HFGITR_EL2_TLBIRVAE1OS); + if (!kvm_has_feat(kvm, ID_AA64ISAR1_EL1, SPECRES, IMP)) + res0 |= (HFGITR_EL2_CFPRCTX | HFGITR_EL2_DVPRCTX | + HFGITR_EL2_CPPRCTX); + if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, BRBE, IMP)) + res0 |= (HFGITR_EL2_nBRBINJ | HFGITR_EL2_nBRBIALL); + if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, GCS, IMP)) + res0 |= (HFGITR_EL2_nGCSPUSHM_EL1 | HFGITR_EL2_nGCSSTR_EL1 | + HFGITR_EL2_nGCSEPP); + if (!kvm_has_feat(kvm, ID_AA64ISAR1_EL1, SPECRES, COSP_RCTX)) + res0 |= HFGITR_EL2_COSPRCTX; + if (!kvm_has_feat(kvm, ID_AA64ISAR2_EL1, ATS1A, IMP)) + res0 |= HFGITR_EL2_ATS1E1A; + set_sysreg_masks(kvm, HFGITR_EL2, res0, res1); + + /* HAFGRTR_EL2 - not a lot to see here */ + res0 = HAFGRTR_EL2_RES0; + res1 = HAFGRTR_EL2_RES1; + if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, AMU, V1P1)) + res0 |= ~(res0 | res1); + set_sysreg_masks(kvm, HAFGRTR_EL2, res0, res1); +out: mutex_unlock(&kvm->arch.config_lock); - return 0; + return ret; } diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c index 0f6e85fcf581aded1b34f9d33927bbf58342cabf..9095f4ed79db24e7682f54a02da8f33469da0b61 100644 --- a/arch/arm64/kvm/pkvm.c +++ b/arch/arm64/kvm/pkvm.c @@ -132,7 +132,7 @@ static int __pkvm_create_hyp_vm(struct kvm *host_kvm) if (host_kvm->created_vcpus < 1) return -EINVAL; - pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.vtcr); + pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.mmu.vtcr); /* * The PGD pages will be reclaimed using a hyp_memcache which implies diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 49625da2c08928415d157b73c5e6768cd10f4ae5..99226a88206720784c8f26a9e40614f6061d1240 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -2252,16 +2253,6 @@ static u64 reset_hcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) * guest... */ static const struct sys_reg_desc sys_reg_descs[] = { - { SYS_DESC(SYS_DC_ISW), access_dcsw }, - { SYS_DESC(SYS_DC_IGSW), access_dcgsw }, - { SYS_DESC(SYS_DC_IGDSW), access_dcgsw }, - { SYS_DESC(SYS_DC_CSW), access_dcsw }, - { SYS_DESC(SYS_DC_CGSW), access_dcgsw }, - { SYS_DESC(SYS_DC_CGDSW), access_dcgsw }, - { SYS_DESC(SYS_DC_CISW), access_dcsw }, - { SYS_DESC(SYS_DC_CIGSW), access_dcgsw }, - { SYS_DESC(SYS_DC_CIGDSW), access_dcgsw }, - DBG_BCR_BVR_WCR_WVR_EL1(0), DBG_BCR_BVR_WCR_WVR_EL1(1), { SYS_DESC(SYS_MDCCINT_EL1), trap_debug_regs, reset_val, MDCCINT_EL1, 0 }, @@ -2757,6 +2748,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_DACR32_EL2), NULL, reset_unknown, DACR32_EL2 }, EL2_REG_VNCR(HDFGRTR_EL2, reset_val, 0), EL2_REG_VNCR(HDFGWTR_EL2, reset_val, 0), + EL2_REG_VNCR(HAFGRTR_EL2, reset_val, 0), EL2_REG_REDIR(SPSR_EL2, reset_val, 0), EL2_REG_REDIR(ELR_EL2, reset_val, 0), { SYS_DESC(SYS_SP_EL1), access_sp_el1}, @@ -2788,6 +2780,18 @@ static const struct sys_reg_desc sys_reg_descs[] = { EL2_REG(SP_EL2, NULL, reset_unknown, 0), }; +static struct sys_reg_desc sys_insn_descs[] = { + { SYS_DESC(SYS_DC_ISW), access_dcsw }, + { SYS_DESC(SYS_DC_IGSW), access_dcgsw }, + { SYS_DESC(SYS_DC_IGDSW), access_dcgsw }, + { SYS_DESC(SYS_DC_CSW), access_dcsw }, + { SYS_DESC(SYS_DC_CGSW), access_dcgsw }, + { SYS_DESC(SYS_DC_CGDSW), access_dcgsw }, + { SYS_DESC(SYS_DC_CISW), access_dcsw }, + { SYS_DESC(SYS_DC_CIGSW), access_dcgsw }, + { SYS_DESC(SYS_DC_CIGDSW), access_dcgsw }, +}; + static const struct sys_reg_desc *first_idreg; static bool trap_dbgdidr(struct kvm_vcpu *vcpu, @@ -3444,12 +3448,6 @@ int kvm_handle_cp14_32(struct kvm_vcpu *vcpu) return kvm_handle_cp_32(vcpu, ¶ms, cp14_regs, ARRAY_SIZE(cp14_regs)); } -static bool is_imp_def_sys_reg(struct sys_reg_params *params) -{ - // See ARM DDI 0487E.a, section D12.3.2 - return params->Op0 == 3 && (params->CRn & 0b1011) == 0b1011; -} - /** * emulate_sys_reg - Emulate a guest access to an AArch64 system register * @vcpu: The VCPU pointer @@ -3458,26 +3456,106 @@ static bool is_imp_def_sys_reg(struct sys_reg_params *params) * Return: true if the system register access was successful, false otherwise. */ static bool emulate_sys_reg(struct kvm_vcpu *vcpu, - struct sys_reg_params *params) + struct sys_reg_params *params) { const struct sys_reg_desc *r; r = find_reg(params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); - if (likely(r)) { perform_access(vcpu, params, r); return true; } - if (is_imp_def_sys_reg(params)) { - kvm_inject_undefined(vcpu); + print_sys_reg_msg(params, + "Unsupported guest sys_reg access at: %lx [%08lx]\n", + *vcpu_pc(vcpu), *vcpu_cpsr(vcpu)); + kvm_inject_undefined(vcpu); + + return false; +} + +static void *idregs_debug_start(struct seq_file *s, loff_t *pos) +{ + struct kvm *kvm = s->private; + u8 *iter; + + mutex_lock(&kvm->arch.config_lock); + + iter = &kvm->arch.idreg_debugfs_iter; + if (test_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags) && + *iter == (u8)~0) { + *iter = *pos; + if (*iter >= KVM_ARM_ID_REG_NUM) + iter = NULL; } else { - print_sys_reg_msg(params, - "Unsupported guest sys_reg access at: %lx [%08lx]\n", - *vcpu_pc(vcpu), *vcpu_cpsr(vcpu)); - kvm_inject_undefined(vcpu); + iter = ERR_PTR(-EBUSY); } - return false; + + mutex_unlock(&kvm->arch.config_lock); + + return iter; +} + +static void *idregs_debug_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct kvm *kvm = s->private; + + (*pos)++; + + if ((kvm->arch.idreg_debugfs_iter + 1) < KVM_ARM_ID_REG_NUM) { + kvm->arch.idreg_debugfs_iter++; + + return &kvm->arch.idreg_debugfs_iter; + } + + return NULL; +} + +static void idregs_debug_stop(struct seq_file *s, void *v) +{ + struct kvm *kvm = s->private; + + if (IS_ERR(v)) + return; + + mutex_lock(&kvm->arch.config_lock); + + kvm->arch.idreg_debugfs_iter = ~0; + + mutex_unlock(&kvm->arch.config_lock); +} + +static int idregs_debug_show(struct seq_file *s, void *v) +{ + struct kvm *kvm = s->private; + const struct sys_reg_desc *desc; + + desc = first_idreg + kvm->arch.idreg_debugfs_iter; + + if (!desc->name) + return 0; + + seq_printf(s, "%20s:\t%016llx\n", + desc->name, IDREG(kvm, IDX_IDREG(kvm->arch.idreg_debugfs_iter))); + + return 0; +} + +static const struct seq_operations idregs_debug_sops = { + .start = idregs_debug_start, + .next = idregs_debug_next, + .stop = idregs_debug_stop, + .show = idregs_debug_show, +}; + +DEFINE_SEQ_ATTRIBUTE(idregs_debug); + +void kvm_sys_regs_create_debugfs(struct kvm *kvm) +{ + kvm->arch.idreg_debugfs_iter = ~0; + + debugfs_create_file("idregs", 0444, kvm->debugfs_dentry, kvm, + &idregs_debug_fops); } static void kvm_reset_id_regs(struct kvm_vcpu *vcpu) @@ -3527,28 +3605,39 @@ void kvm_reset_sys_regs(struct kvm_vcpu *vcpu) } /** - * kvm_handle_sys_reg -- handles a mrs/msr trap on a guest sys_reg access + * kvm_handle_sys_reg -- handles a system instruction or mrs/msr instruction + * trap on a guest execution * @vcpu: The VCPU pointer */ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu) { + const struct sys_reg_desc *desc = NULL; struct sys_reg_params params; unsigned long esr = kvm_vcpu_get_esr(vcpu); int Rt = kvm_vcpu_sys_get_rt(vcpu); + int sr_idx; trace_kvm_handle_sys_reg(esr); - if (__check_nv_sr_forward(vcpu)) + if (triage_sysreg_trap(vcpu, &sr_idx)) return 1; params = esr_sys64_to_params(esr); params.regval = vcpu_get_reg(vcpu, Rt); - if (!emulate_sys_reg(vcpu, ¶ms)) - return 1; + /* System registers have Op0=={2,3}, as per DDI487 J.a C5.1.2 */ + if (params.Op0 == 2 || params.Op0 == 3) + desc = &sys_reg_descs[sr_idx]; + else + desc = &sys_insn_descs[sr_idx]; + + perform_access(vcpu, ¶ms, desc); - if (!params.is_write) + /* Read from system register? */ + if (!params.is_write && + (params.Op0 == 2 || params.Op0 == 3)) vcpu_set_reg(vcpu, Rt, params.regval); + return 1; } @@ -3990,11 +4079,84 @@ int kvm_vm_ioctl_get_reg_writable_masks(struct kvm *kvm, struct reg_mask_range * return 0; } +void kvm_init_sysreg(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + + mutex_lock(&kvm->arch.config_lock); + + /* + * In the absence of FGT, we cannot independently trap TLBI + * Range instructions. This isn't great, but trapping all + * TLBIs would be far worse. Live with it... + */ + if (!kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, OS)) + vcpu->arch.hcr_el2 |= HCR_TTLBOS; + + if (cpus_have_final_cap(ARM64_HAS_HCX)) { + vcpu->arch.hcrx_el2 = HCRX_GUEST_FLAGS; + + if (kvm_has_feat(kvm, ID_AA64ISAR2_EL1, MOPS, IMP)) + vcpu->arch.hcrx_el2 |= (HCRX_EL2_MSCEn | HCRX_EL2_MCE2); + } + + if (test_bit(KVM_ARCH_FLAG_FGU_INITIALIZED, &kvm->arch.flags)) + goto out; + + kvm->arch.fgu[HFGxTR_GROUP] = (HFGxTR_EL2_nAMAIR2_EL1 | + HFGxTR_EL2_nMAIR2_EL1 | + HFGxTR_EL2_nS2POR_EL1 | + HFGxTR_EL2_nPOR_EL1 | + HFGxTR_EL2_nPOR_EL0 | + HFGxTR_EL2_nACCDATA_EL1 | + HFGxTR_EL2_nSMPRI_EL1_MASK | + HFGxTR_EL2_nTPIDR2_EL0_MASK); + + if (!kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, OS)) + kvm->arch.fgu[HFGITR_GROUP] |= (HFGITR_EL2_TLBIRVAALE1OS| + HFGITR_EL2_TLBIRVALE1OS | + HFGITR_EL2_TLBIRVAAE1OS | + HFGITR_EL2_TLBIRVAE1OS | + HFGITR_EL2_TLBIVAALE1OS | + HFGITR_EL2_TLBIVALE1OS | + HFGITR_EL2_TLBIVAAE1OS | + HFGITR_EL2_TLBIASIDE1OS | + HFGITR_EL2_TLBIVAE1OS | + HFGITR_EL2_TLBIVMALLE1OS); + + if (!kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, RANGE)) + kvm->arch.fgu[HFGITR_GROUP] |= (HFGITR_EL2_TLBIRVAALE1 | + HFGITR_EL2_TLBIRVALE1 | + HFGITR_EL2_TLBIRVAAE1 | + HFGITR_EL2_TLBIRVAE1 | + HFGITR_EL2_TLBIRVAALE1IS| + HFGITR_EL2_TLBIRVALE1IS | + HFGITR_EL2_TLBIRVAAE1IS | + HFGITR_EL2_TLBIRVAE1IS | + HFGITR_EL2_TLBIRVAALE1OS| + HFGITR_EL2_TLBIRVALE1OS | + HFGITR_EL2_TLBIRVAAE1OS | + HFGITR_EL2_TLBIRVAE1OS); + + if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S1PIE, IMP)) + kvm->arch.fgu[HFGxTR_GROUP] |= (HFGxTR_EL2_nPIRE0_EL1 | + HFGxTR_EL2_nPIR_EL1); + + if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, AMU, IMP)) + kvm->arch.fgu[HAFGRTR_GROUP] |= ~(HAFGRTR_EL2_RES0 | + HAFGRTR_EL2_RES1); + + set_bit(KVM_ARCH_FLAG_FGU_INITIALIZED, &kvm->arch.flags); +out: + mutex_unlock(&kvm->arch.config_lock); +} + int __init kvm_sys_reg_table_init(void) { struct sys_reg_params params; bool valid = true; unsigned int i; + int ret = 0; /* Make sure tables are unique and in order. */ valid &= check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs), false); @@ -4003,6 +4165,7 @@ int __init kvm_sys_reg_table_init(void) valid &= check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs), true); valid &= check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs), true); valid &= check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs), false); + valid &= check_sysreg_table(sys_insn_descs, ARRAY_SIZE(sys_insn_descs), false); if (!valid) return -EINVAL; @@ -4017,8 +4180,13 @@ int __init kvm_sys_reg_table_init(void) if (!first_idreg) return -EINVAL; - if (kvm_get_mode() == KVM_MODE_NV) - return populate_nv_trap_config(); + ret = populate_nv_trap_config(); - return 0; + for (i = 0; !ret && i < ARRAY_SIZE(sys_reg_descs); i++) + ret = populate_sysreg_config(sys_reg_descs + i, i); + + for (i = 0; !ret && i < ARRAY_SIZE(sys_insn_descs); i++) + ret = populate_sysreg_config(sys_insn_descs + i, i); + + return ret; } diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index c65c129b35001049ed4b14c02ec6fcfb8e37b1fa..997eea21ba2ab3d7b08ae8f6178d208b74453606 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -233,6 +233,8 @@ int kvm_sys_reg_get_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, const struct sys_reg_desc table[], unsigned int num); +bool triage_sysreg_trap(struct kvm_vcpu *vcpu, int *sr_index); + #define AA32(_x) .aarch32_map = AA32_##_x #define Op0(_x) .Op0 = _x #define Op1(_x) .Op1 = _x diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c index 2f9e8c611f6421369f815427594954162cbbb735..1d26bb5b02f4b592775dee6f964938f8ce4c866c 100644 --- a/arch/arm64/kvm/vgic/vgic-kvm-device.c +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c @@ -27,7 +27,8 @@ int vgic_check_iorange(struct kvm *kvm, phys_addr_t ioaddr, if (addr + size < addr) return -EINVAL; - if (addr & ~kvm_phys_mask(kvm) || addr + size > kvm_phys_size(kvm)) + if (addr & ~kvm_phys_mask(&kvm->arch.mmu) || + (addr + size) > kvm_phys_size(&kvm->arch.mmu)) return -E2BIG; return 0; diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index fdeec84c22ad1b7ce75b190551bc5e3b74bb8750..747ae1629ca30c1cfb1581a64efedd7aba4dcdd9 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -1321,7 +1321,10 @@ Enum 63:60 HPMN0 0b0000 UNPREDICTABLE 0b0001 DEF EndEnum -Res0 59:56 +UnsignedEnum 59:56 ExtTrcBuff + 0b0000 NI + 0b0001 IMP +EndEnum UnsignedEnum 55:52 BRBE 0b0000 NI 0b0001 IMP @@ -1524,6 +1527,7 @@ EndEnum UnsignedEnum 43:40 SPECRES 0b0000 NI 0b0001 IMP + 0b0010 COSP_RCTX EndEnum UnsignedEnum 39:36 SB 0b0000 NI @@ -1561,6 +1565,7 @@ UnsignedEnum 11:8 API 0b0011 PAuth2 0b0100 FPAC 0b0101 FPACCOMBINE + 0b0110 PAuth_LR EndEnum UnsignedEnum 7:4 APA 0b0000 NI @@ -1569,6 +1574,7 @@ UnsignedEnum 7:4 APA 0b0011 PAuth2 0b0100 FPAC 0b0101 FPACCOMBINE + 0b0110 PAuth_LR EndEnum UnsignedEnum 3:0 DPB 0b0000 NI @@ -1647,7 +1653,12 @@ EndEnum EndSysreg Sysreg ID_AA64ISAR3_EL1 3 0 0 6 3 -Res0 63:12 +Res0 63:16 +UnsignedEnum 15:12 PACM + 0b0000 NI + 0b0001 TRIVIAL_IMP + 0b0010 FULL_IMP +EndEnum UnsignedEnum 11:8 TLBIW 0b0000 NI 0b0001 IMP @@ -2337,10 +2348,18 @@ Sysreg CONTEXTIDR_EL1 3 0 13 0 1 Fields CONTEXTIDR_ELx EndSysreg +Sysreg RCWSMASK_EL1 3 0 13 0 3 +Field 63:0 RCWSMASK +EndSysreg + Sysreg TPIDR_EL1 3 0 13 0 4 Field 63:0 ThreadID EndSysreg +Sysreg RCWMASK_EL1 3 0 13 0 6 +Field 63:0 RCWMASK +EndSysreg + Sysreg SCXTNUM_EL1 3 0 13 0 7 Field 63:0 SoftwareContextNumber EndSysreg @@ -3088,6 +3107,33 @@ Field 1 PIE Field 0 PnCH EndSysreg +SysregFields MAIR2_ELx +Field 63:56 Attr7 +Field 55:48 Attr6 +Field 47:40 Attr5 +Field 39:32 Attr4 +Field 31:24 Attr3 +Field 23:16 Attr2 +Field 15:8 Attr1 +Field 7:0 Attr0 +EndSysregFields + +Sysreg MAIR2_EL1 3 0 10 2 1 +Fields MAIR2_ELx +EndSysreg + +Sysreg MAIR2_EL2 3 4 10 1 1 +Fields MAIR2_ELx +EndSysreg + +Sysreg AMAIR2_EL1 3 0 10 3 1 +Field 63:0 ImpDef +EndSysreg + +Sysreg AMAIR2_EL2 3 4 10 3 1 +Field 63:0 ImpDef +EndSysreg + SysregFields PIRx_ELx Field 63:60 Perm15 Field 59:56 Perm14 @@ -3139,6 +3185,14 @@ Sysreg POR_EL12 3 5 10 2 4 Fields PIRx_ELx EndSysreg +Sysreg S2POR_EL1 3 0 10 2 5 +Fields PIRx_ELx +EndSysreg + +Sysreg S2PIR_EL2 3 4 10 2 5 +Fields PIRx_ELx +EndSysreg + Sysreg LORSA_EL1 3 0 10 4 0 Res0 63:52 Field 51:16 SA