From b99387e4b7c9952f32db7287fa99c1784bcfd7dc Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 21 Oct 2024 01:17:22 +0530 Subject: [PATCH 01/26] RISC-V: KVM: Order the object files alphabetically ANBZ: #33223 commit e403a90ad65628d32843f5d40542502659bc4573 upstream. Order the object files alphabetically in the Makefile so that it is very predictable inserting new object files in the future. Signed-off-by: Anup Patel Reviewed-by: Atish Patra Link: https://lore.kernel.org/r/20241020194734.58686-2-apatel@ventanamicro.com Signed-off-by: Anup Patel [Ruidong: Conflicts: reason: Context-only conflict in arch/riscv/kvm/Makefile due to surrounding lines differing from upstream; resolved by accepting upstream ordering change. ] Signed-off-by: Ruidong Tian --- arch/riscv/kvm/Makefile | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile index 27375eecdb97..1423d47f993f 100644 --- a/arch/riscv/kvm/Makefile +++ b/arch/riscv/kvm/Makefile @@ -9,28 +9,30 @@ include $(srctree)/virt/kvm/Makefile.kvm obj-$(CONFIG_KVM) += kvm.o +# Ordered alphabetically +kvm-y += aia.o +kvm-y += aia_aplic.o +kvm-y += aia_device.o +kvm-y += aia_imsic.o kvm-y += main.o -kvm-y += vm.o -kvm-y += vmid.o -kvm-y += tlb.o kvm-y += mmu.o +kvm-y += tlb.o kvm-y += vcpu.o kvm-y += vcpu_exit.o kvm-y += vcpu_fp.o -kvm-y += vcpu_vector.o kvm-y += vcpu_insn.o kvm-y += vcpu_onereg.o -kvm-y += vcpu_switch.o +kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_pmu.o kvm-y += vcpu_sbi.o -kvm-$(CONFIG_RISCV_SBI_V01) += vcpu_sbi_v01.o kvm-y += vcpu_sbi_base.o -kvm-y += vcpu_sbi_replace.o kvm-y += vcpu_sbi_hsm.o +kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_sbi_pmu.o +kvm-y += vcpu_sbi_replace.o kvm-y += vcpu_sbi_sta.o kvm-y += vcpu_sbi_system.o +kvm-$(CONFIG_RISCV_SBI_V01) += vcpu_sbi_v01.o +kvm-y += vcpu_switch.o kvm-y += vcpu_timer.o -kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_pmu.o vcpu_sbi_pmu.o -kvm-y += aia.o -kvm-y += aia_device.o -kvm-y += aia_aplic.o -kvm-y += aia_imsic.o +kvm-y += vcpu_vector.o +kvm-y += vm.o +kvm-y += vmid.o -- Gitee From a0932ce487081a4550302be2f89a35ba173ed4f8 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 21 Oct 2024 01:17:23 +0530 Subject: [PATCH 02/26] RISC-V: KVM: Save/restore HSTATUS in C source ANBZ: #33223 commit b6114a7e2433e91fedee3ed983abf77b3ebc167c upstream. We will be optimizing HSTATUS CSR access via shared memory setup using the SBI nested acceleration extension. To facilitate this, we first move HSTATUS save/restore in kvm_riscv_vcpu_enter_exit(). Signed-off-by: Anup Patel Reviewed-by: Atish Patra Link: https://lore.kernel.org/r/20241020194734.58686-3-apatel@ventanamicro.com Signed-off-by: Anup Patel Signed-off-by: Ruidong Tian --- arch/riscv/kvm/vcpu.c | 9 +++++++++ arch/riscv/kvm/vcpu_switch.S | 36 +++++++++++++----------------------- 2 files changed, 22 insertions(+), 23 deletions(-) diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index 966b8e9a1078..08d619eae00a 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -725,9 +725,18 @@ static __always_inline void kvm_riscv_vcpu_swap_in_host_state(struct kvm_vcpu *v */ static void noinstr kvm_riscv_vcpu_enter_exit(struct kvm_vcpu *vcpu) { + struct kvm_cpu_context *gcntx = &vcpu->arch.guest_context; + struct kvm_cpu_context *hcntx = &vcpu->arch.host_context; + kvm_riscv_vcpu_swap_in_guest_state(vcpu); guest_state_enter_irqoff(); + + hcntx->hstatus = csr_swap(CSR_HSTATUS, gcntx->hstatus); + __kvm_riscv_switch_to(&vcpu->arch); + + gcntx->hstatus = csr_swap(CSR_HSTATUS, hcntx->hstatus); + vcpu->arch.last_exit_cpu = vcpu->cpu; guest_state_exit_irqoff(); kvm_riscv_vcpu_swap_in_host_state(vcpu); diff --git a/arch/riscv/kvm/vcpu_switch.S b/arch/riscv/kvm/vcpu_switch.S index 0c26189aa01c..f83643c4fdb9 100644 --- a/arch/riscv/kvm/vcpu_switch.S +++ b/arch/riscv/kvm/vcpu_switch.S @@ -43,35 +43,30 @@ SYM_FUNC_START(__kvm_riscv_switch_to) /* Load Guest CSR values */ REG_L t0, (KVM_ARCH_GUEST_SSTATUS)(a0) - REG_L t1, (KVM_ARCH_GUEST_HSTATUS)(a0) - REG_L t2, (KVM_ARCH_GUEST_SCOUNTEREN)(a0) - la t4, .Lkvm_switch_return - REG_L t5, (KVM_ARCH_GUEST_SEPC)(a0) + REG_L t1, (KVM_ARCH_GUEST_SCOUNTEREN)(a0) + la t3, .Lkvm_switch_return + REG_L t4, (KVM_ARCH_GUEST_SEPC)(a0) /* Save Host and Restore Guest SSTATUS */ csrrw t0, CSR_SSTATUS, t0 - /* Save Host and Restore Guest HSTATUS */ - csrrw t1, CSR_HSTATUS, t1 - /* Save Host and Restore Guest SCOUNTEREN */ - csrrw t2, CSR_SCOUNTEREN, t2 + csrrw t1, CSR_SCOUNTEREN, t1 /* Save Host STVEC and change it to return path */ - csrrw t4, CSR_STVEC, t4 + csrrw t3, CSR_STVEC, t3 /* Save Host SSCRATCH and change it to struct kvm_vcpu_arch pointer */ - csrrw t3, CSR_SSCRATCH, a0 + csrrw t2, CSR_SSCRATCH, a0 /* Restore Guest SEPC */ - csrw CSR_SEPC, t5 + csrw CSR_SEPC, t4 /* Store Host CSR values */ REG_S t0, (KVM_ARCH_HOST_SSTATUS)(a0) - REG_S t1, (KVM_ARCH_HOST_HSTATUS)(a0) - REG_S t2, (KVM_ARCH_HOST_SCOUNTEREN)(a0) - REG_S t3, (KVM_ARCH_HOST_SSCRATCH)(a0) - REG_S t4, (KVM_ARCH_HOST_STVEC)(a0) + REG_S t1, (KVM_ARCH_HOST_SCOUNTEREN)(a0) + REG_S t2, (KVM_ARCH_HOST_SSCRATCH)(a0) + REG_S t3, (KVM_ARCH_HOST_STVEC)(a0) /* Restore Guest GPRs (except A0) */ REG_L ra, (KVM_ARCH_GUEST_RA)(a0) @@ -153,8 +148,7 @@ SYM_FUNC_START(__kvm_riscv_switch_to) REG_L t1, (KVM_ARCH_HOST_STVEC)(a0) REG_L t2, (KVM_ARCH_HOST_SSCRATCH)(a0) REG_L t3, (KVM_ARCH_HOST_SCOUNTEREN)(a0) - REG_L t4, (KVM_ARCH_HOST_HSTATUS)(a0) - REG_L t5, (KVM_ARCH_HOST_SSTATUS)(a0) + REG_L t4, (KVM_ARCH_HOST_SSTATUS)(a0) /* Save Guest SEPC */ csrr t0, CSR_SEPC @@ -168,18 +162,14 @@ SYM_FUNC_START(__kvm_riscv_switch_to) /* Save Guest and Restore Host SCOUNTEREN */ csrrw t3, CSR_SCOUNTEREN, t3 - /* Save Guest and Restore Host HSTATUS */ - csrrw t4, CSR_HSTATUS, t4 - /* Save Guest and Restore Host SSTATUS */ - csrrw t5, CSR_SSTATUS, t5 + csrrw t4, CSR_SSTATUS, t4 /* Store Guest CSR values */ REG_S t0, (KVM_ARCH_GUEST_SEPC)(a0) REG_S t2, (KVM_ARCH_GUEST_A0)(a0) REG_S t3, (KVM_ARCH_GUEST_SCOUNTEREN)(a0) - REG_S t4, (KVM_ARCH_GUEST_HSTATUS)(a0) - REG_S t5, (KVM_ARCH_GUEST_SSTATUS)(a0) + REG_S t4, (KVM_ARCH_GUEST_SSTATUS)(a0) /* Restore Host GPRs (except A0 and T0-T6) */ REG_L ra, (KVM_ARCH_HOST_RA)(a0) -- Gitee From 44f7ec91531cbced6b8017933b19f89cb6336d0f Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 21 Oct 2024 01:17:24 +0530 Subject: [PATCH 03/26] RISC-V: KVM: Save/restore SCOUNTEREN in C source ANBZ: #33223 commit b922307a5fecfcf33ca1697f6ec33c9274b75c46 upstream. The SCOUNTEREN CSR need not be saved/restored in the low-level __kvm_riscv_switch_to() function hence move the SCOUNTEREN CSR save/restore to the kvm_riscv_vcpu_swap_in_guest_state() and kvm_riscv_vcpu_swap_in_host_state() functions in C sources. Also, re-arrange the CSR save/restore and related GPR usage in the low-level __kvm_riscv_switch_to() low-level function. Signed-off-by: Anup Patel Reviewed-by: Atish Patra Link: https://lore.kernel.org/r/20241020194734.58686-4-apatel@ventanamicro.com Signed-off-by: Anup Patel Signed-off-by: Ruidong Tian --- arch/riscv/kvm/vcpu.c | 2 ++ arch/riscv/kvm/vcpu_switch.S | 52 +++++++++++++++--------------------- 2 files changed, 23 insertions(+), 31 deletions(-) diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index 08d619eae00a..8d71ab61131d 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -696,6 +696,7 @@ static __always_inline void kvm_riscv_vcpu_swap_in_guest_state(struct kvm_vcpu * struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; struct kvm_vcpu_config *cfg = &vcpu->arch.cfg; + vcpu->arch.host_scounteren = csr_swap(CSR_SCOUNTEREN, csr->scounteren); vcpu->arch.host_senvcfg = csr_swap(CSR_SENVCFG, csr->senvcfg); if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN) && (cfg->hstateen0 & SMSTATEEN0_SSTATEEN0)) @@ -709,6 +710,7 @@ static __always_inline void kvm_riscv_vcpu_swap_in_host_state(struct kvm_vcpu *v struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; struct kvm_vcpu_config *cfg = &vcpu->arch.cfg; + csr->scounteren = csr_swap(CSR_SCOUNTEREN, vcpu->arch.host_scounteren); csr->senvcfg = csr_swap(CSR_SENVCFG, vcpu->arch.host_senvcfg); if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN) && (cfg->hstateen0 & SMSTATEEN0_SSTATEEN0)) diff --git a/arch/riscv/kvm/vcpu_switch.S b/arch/riscv/kvm/vcpu_switch.S index f83643c4fdb9..3f8cbc21a644 100644 --- a/arch/riscv/kvm/vcpu_switch.S +++ b/arch/riscv/kvm/vcpu_switch.S @@ -43,30 +43,25 @@ SYM_FUNC_START(__kvm_riscv_switch_to) /* Load Guest CSR values */ REG_L t0, (KVM_ARCH_GUEST_SSTATUS)(a0) - REG_L t1, (KVM_ARCH_GUEST_SCOUNTEREN)(a0) - la t3, .Lkvm_switch_return - REG_L t4, (KVM_ARCH_GUEST_SEPC)(a0) + la t1, .Lkvm_switch_return + REG_L t2, (KVM_ARCH_GUEST_SEPC)(a0) /* Save Host and Restore Guest SSTATUS */ csrrw t0, CSR_SSTATUS, t0 - /* Save Host and Restore Guest SCOUNTEREN */ - csrrw t1, CSR_SCOUNTEREN, t1 - /* Save Host STVEC and change it to return path */ - csrrw t3, CSR_STVEC, t3 - - /* Save Host SSCRATCH and change it to struct kvm_vcpu_arch pointer */ - csrrw t2, CSR_SSCRATCH, a0 + csrrw t1, CSR_STVEC, t1 /* Restore Guest SEPC */ - csrw CSR_SEPC, t4 + csrw CSR_SEPC, t2 + + /* Save Host SSCRATCH and change it to struct kvm_vcpu_arch pointer */ + csrrw t3, CSR_SSCRATCH, a0 /* Store Host CSR values */ REG_S t0, (KVM_ARCH_HOST_SSTATUS)(a0) - REG_S t1, (KVM_ARCH_HOST_SCOUNTEREN)(a0) - REG_S t2, (KVM_ARCH_HOST_SSCRATCH)(a0) - REG_S t3, (KVM_ARCH_HOST_STVEC)(a0) + REG_S t1, (KVM_ARCH_HOST_STVEC)(a0) + REG_S t3, (KVM_ARCH_HOST_SSCRATCH)(a0) /* Restore Guest GPRs (except A0) */ REG_L ra, (KVM_ARCH_GUEST_RA)(a0) @@ -145,31 +140,26 @@ SYM_FUNC_START(__kvm_riscv_switch_to) REG_S t6, (KVM_ARCH_GUEST_T6)(a0) /* Load Host CSR values */ - REG_L t1, (KVM_ARCH_HOST_STVEC)(a0) - REG_L t2, (KVM_ARCH_HOST_SSCRATCH)(a0) - REG_L t3, (KVM_ARCH_HOST_SCOUNTEREN)(a0) - REG_L t4, (KVM_ARCH_HOST_SSTATUS)(a0) - - /* Save Guest SEPC */ - csrr t0, CSR_SEPC + REG_L t0, (KVM_ARCH_HOST_STVEC)(a0) + REG_L t1, (KVM_ARCH_HOST_SSCRATCH)(a0) + REG_L t2, (KVM_ARCH_HOST_SSTATUS)(a0) /* Save Guest A0 and Restore Host SSCRATCH */ - csrrw t2, CSR_SSCRATCH, t2 + csrrw t1, CSR_SSCRATCH, t1 - /* Restore Host STVEC */ - csrw CSR_STVEC, t1 + /* Save Guest SEPC */ + csrr t3, CSR_SEPC - /* Save Guest and Restore Host SCOUNTEREN */ - csrrw t3, CSR_SCOUNTEREN, t3 + /* Restore Host STVEC */ + csrw CSR_STVEC, t0 /* Save Guest and Restore Host SSTATUS */ - csrrw t4, CSR_SSTATUS, t4 + csrrw t2, CSR_SSTATUS, t2 /* Store Guest CSR values */ - REG_S t0, (KVM_ARCH_GUEST_SEPC)(a0) - REG_S t2, (KVM_ARCH_GUEST_A0)(a0) - REG_S t3, (KVM_ARCH_GUEST_SCOUNTEREN)(a0) - REG_S t4, (KVM_ARCH_GUEST_SSTATUS)(a0) + REG_S t1, (KVM_ARCH_GUEST_A0)(a0) + REG_S t2, (KVM_ARCH_GUEST_SSTATUS)(a0) + REG_S t3, (KVM_ARCH_GUEST_SEPC)(a0) /* Restore Host GPRs (except A0 and T0-T6) */ REG_L ra, (KVM_ARCH_HOST_RA)(a0) -- Gitee From 0a16100d4e0343b021d7479fbce364b3dce7a2f0 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 21 Oct 2024 01:17:25 +0530 Subject: [PATCH 04/26] RISC-V: KVM: Break down the __kvm_riscv_switch_to() into macros ANBZ: #33223 commit 8f57adac39167de7416c7eae91b4621454def3cd upstream. Break down the __kvm_riscv_switch_to() function into macros so that these macros can be later re-used by SBI NACL extension based low-level switch function. Signed-off-by: Anup Patel Reviewed-by: Atish Patra Link: https://lore.kernel.org/r/20241020194734.58686-5-apatel@ventanamicro.com Signed-off-by: Anup Patel Signed-off-by: Ruidong Tian --- arch/riscv/kvm/vcpu_switch.S | 52 +++++++++++++++++++++++++++--------- 1 file changed, 40 insertions(+), 12 deletions(-) diff --git a/arch/riscv/kvm/vcpu_switch.S b/arch/riscv/kvm/vcpu_switch.S index 3f8cbc21a644..9f13e5ce6a18 100644 --- a/arch/riscv/kvm/vcpu_switch.S +++ b/arch/riscv/kvm/vcpu_switch.S @@ -11,11 +11,7 @@ #include #include - .text - .altmacro - .option norelax - -SYM_FUNC_START(__kvm_riscv_switch_to) +.macro SAVE_HOST_GPRS /* Save Host GPRs (except A0 and T0-T6) */ REG_S ra, (KVM_ARCH_HOST_RA)(a0) REG_S sp, (KVM_ARCH_HOST_SP)(a0) @@ -40,10 +36,12 @@ SYM_FUNC_START(__kvm_riscv_switch_to) REG_S s9, (KVM_ARCH_HOST_S9)(a0) REG_S s10, (KVM_ARCH_HOST_S10)(a0) REG_S s11, (KVM_ARCH_HOST_S11)(a0) +.endm +.macro SAVE_HOST_AND_RESTORE_GUEST_CSRS __resume_addr /* Load Guest CSR values */ REG_L t0, (KVM_ARCH_GUEST_SSTATUS)(a0) - la t1, .Lkvm_switch_return + la t1, \__resume_addr REG_L t2, (KVM_ARCH_GUEST_SEPC)(a0) /* Save Host and Restore Guest SSTATUS */ @@ -62,7 +60,9 @@ SYM_FUNC_START(__kvm_riscv_switch_to) REG_S t0, (KVM_ARCH_HOST_SSTATUS)(a0) REG_S t1, (KVM_ARCH_HOST_STVEC)(a0) REG_S t3, (KVM_ARCH_HOST_SSCRATCH)(a0) +.endm +.macro RESTORE_GUEST_GPRS /* Restore Guest GPRs (except A0) */ REG_L ra, (KVM_ARCH_GUEST_RA)(a0) REG_L sp, (KVM_ARCH_GUEST_SP)(a0) @@ -97,13 +97,9 @@ SYM_FUNC_START(__kvm_riscv_switch_to) /* Restore Guest A0 */ REG_L a0, (KVM_ARCH_GUEST_A0)(a0) +.endm - /* Resume Guest */ - sret - - /* Back to Host */ - .align 2 -.Lkvm_switch_return: +.macro SAVE_GUEST_GPRS /* Swap Guest A0 with SSCRATCH */ csrrw a0, CSR_SSCRATCH, a0 @@ -138,7 +134,9 @@ SYM_FUNC_START(__kvm_riscv_switch_to) REG_S t4, (KVM_ARCH_GUEST_T4)(a0) REG_S t5, (KVM_ARCH_GUEST_T5)(a0) REG_S t6, (KVM_ARCH_GUEST_T6)(a0) +.endm +.macro SAVE_GUEST_AND_RESTORE_HOST_CSRS /* Load Host CSR values */ REG_L t0, (KVM_ARCH_HOST_STVEC)(a0) REG_L t1, (KVM_ARCH_HOST_SSCRATCH)(a0) @@ -160,7 +158,9 @@ SYM_FUNC_START(__kvm_riscv_switch_to) REG_S t1, (KVM_ARCH_GUEST_A0)(a0) REG_S t2, (KVM_ARCH_GUEST_SSTATUS)(a0) REG_S t3, (KVM_ARCH_GUEST_SEPC)(a0) +.endm +.macro RESTORE_HOST_GPRS /* Restore Host GPRs (except A0 and T0-T6) */ REG_L ra, (KVM_ARCH_HOST_RA)(a0) REG_L sp, (KVM_ARCH_HOST_SP)(a0) @@ -185,6 +185,34 @@ SYM_FUNC_START(__kvm_riscv_switch_to) REG_L s9, (KVM_ARCH_HOST_S9)(a0) REG_L s10, (KVM_ARCH_HOST_S10)(a0) REG_L s11, (KVM_ARCH_HOST_S11)(a0) +.endm + + .text + .altmacro + .option norelax + + /* + * Parameters: + * A0 <= Pointer to struct kvm_vcpu_arch + */ +SYM_FUNC_START(__kvm_riscv_switch_to) + SAVE_HOST_GPRS + + SAVE_HOST_AND_RESTORE_GUEST_CSRS .Lkvm_switch_return + + RESTORE_GUEST_GPRS + + /* Resume Guest using SRET */ + sret + + /* Back to Host */ + .align 2 +.Lkvm_switch_return: + SAVE_GUEST_GPRS + + SAVE_GUEST_AND_RESTORE_HOST_CSRS + + RESTORE_HOST_GPRS /* Return to C code */ ret -- Gitee From 5e47c0a2264811e61d789d634da0669c91715953 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 21 Oct 2024 01:17:26 +0530 Subject: [PATCH 05/26] RISC-V: KVM: Replace aia_set_hvictl() with aia_hvictl_value() ANBZ: #33223 commit 5d8f7ee9286e981449416ce20bba6546995f585a upstream. The aia_set_hvictl() internally writes the HVICTL CSR which makes it difficult to optimize the CSR write using SBI NACL extension for kvm_riscv_vcpu_aia_update_hvip() function so replace aia_set_hvictl() with new aia_hvictl_value() which only computes the HVICTL value. Signed-off-by: Anup Patel Reviewed-by: Atish Patra Link: https://lore.kernel.org/r/20241020194734.58686-6-apatel@ventanamicro.com Signed-off-by: Anup Patel Signed-off-by: Ruidong Tian --- arch/riscv/kvm/aia.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c index 9f3b527596de..7adda442af0e 100644 --- a/arch/riscv/kvm/aia.c +++ b/arch/riscv/kvm/aia.c @@ -51,7 +51,7 @@ static int aia_find_hgei(struct kvm_vcpu *owner) return hgei; } -static void aia_set_hvictl(bool ext_irq_pending) +static inline unsigned long aia_hvictl_value(bool ext_irq_pending) { unsigned long hvictl; @@ -62,7 +62,7 @@ static void aia_set_hvictl(bool ext_irq_pending) hvictl = (IRQ_S_EXT << HVICTL_IID_SHIFT) & HVICTL_IID; hvictl |= ext_irq_pending; - csr_write(CSR_HVICTL, hvictl); + return hvictl; } #ifdef CONFIG_32BIT @@ -130,7 +130,7 @@ void kvm_riscv_vcpu_aia_update_hvip(struct kvm_vcpu *vcpu) #ifdef CONFIG_32BIT csr_write(CSR_HVIPH, vcpu->arch.aia_context.guest_csr.hviph); #endif - aia_set_hvictl(!!(csr->hvip & BIT(IRQ_VS_EXT))); + csr_write(CSR_HVICTL, aia_hvictl_value(!!(csr->hvip & BIT(IRQ_VS_EXT)))); } void kvm_riscv_vcpu_aia_load(struct kvm_vcpu *vcpu, int cpu) @@ -536,7 +536,7 @@ void kvm_riscv_aia_enable(void) if (!kvm_riscv_aia_available()) return; - aia_set_hvictl(false); + csr_write(CSR_HVICTL, aia_hvictl_value(false)); csr_write(CSR_HVIPRIO1, 0x0); csr_write(CSR_HVIPRIO2, 0x0); #ifdef CONFIG_32BIT @@ -572,7 +572,7 @@ void kvm_riscv_aia_disable(void) csr_clear(CSR_HIE, BIT(IRQ_S_GEXT)); disable_percpu_irq(hgei_parent_irq); - aia_set_hvictl(false); + csr_write(CSR_HVICTL, aia_hvictl_value(false)); raw_spin_lock_irqsave(&hgctrl->lock, flags); -- Gitee From ec4d37b74a05ca2fbd4ff42dd50761d22a5c4ff5 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 21 Oct 2024 01:17:27 +0530 Subject: [PATCH 06/26] RISC-V: KVM: Don't setup SGEI for zero guest external interrupts ANBZ: #33223 commit 15ff2ff3c3b99f986fde919dffab27007bbe35ed upstream. No need to setup SGEI local interrupt when there are zero guest external interrupts (i.e. zero HW IMSIC guest files). Signed-off-by: Anup Patel Reviewed-by: Atish Patra Link: https://lore.kernel.org/r/20241020194734.58686-7-apatel@ventanamicro.com Signed-off-by: Anup Patel Signed-off-by: Ruidong Tian --- arch/riscv/kvm/aia.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c index 7adda442af0e..8800fc95e662 100644 --- a/arch/riscv/kvm/aia.c +++ b/arch/riscv/kvm/aia.c @@ -499,6 +499,10 @@ static int aia_hgei_init(void) hgctrl->free_bitmap = 0; } + /* Skip SGEI interrupt setup for zero guest external interrupts */ + if (!kvm_riscv_aia_nr_hgei) + goto skip_sgei_interrupt; + /* Find INTC irq domain */ domain = irq_find_matching_fwnode(riscv_get_intc_hwnode(), DOMAIN_BUS_ANY); @@ -522,11 +526,16 @@ static int aia_hgei_init(void) return rc; } +skip_sgei_interrupt: return 0; } static void aia_hgei_exit(void) { + /* Do nothing for zero guest external interrupts */ + if (!kvm_riscv_aia_nr_hgei) + return; + /* Free per-CPU SGEI interrupt */ free_percpu_irq(hgei_parent_irq, &aia_hgei); } -- Gitee From 951693d490bbaec2cdd08c7a158e9fab2e8b78a9 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 21 Oct 2024 01:17:28 +0530 Subject: [PATCH 07/26] RISC-V: Add defines for the SBI nested acceleration extension ANBZ: #33223 commit 5daf89e73d77a5edb21c9b2d67a1b5bf02e61e5a upstream. Add defines for the new SBI nested acceleration extension which was ratified as part of the SBI v2.0 specification. Signed-off-by: Anup Patel Reviewed-by: Atish Patra Link: https://lore.kernel.org/r/20241020194734.58686-8-apatel@ventanamicro.com Signed-off-by: Anup Patel [Ruidong: Conflicts: reason: Context-only conflict in arch/riscv/include/asm/sbi.h due to surrounding lines differing from upstream; resolved by accepting upstream changes. ] Signed-off-by: Ruidong Tian --- arch/riscv/include/asm/sbi.h | 120 +++++++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index 3a525aabe7df..0caedd7eb01f 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -33,6 +33,7 @@ enum sbi_ext_id { SBI_EXT_PMU = 0x504D55, SBI_EXT_DBCN = 0x4442434E, SBI_EXT_STA = 0x535441, + SBI_EXT_NACL = 0x4E41434C, SBI_EXT_SSE = 0x535345, /* Experimentals extensions must lie within this range */ @@ -340,6 +341,125 @@ struct sbi_sta_struct { #define SBI_SHMEM_DISABLE -1 +enum sbi_ext_nacl_fid { + SBI_EXT_NACL_PROBE_FEATURE = 0x0, + SBI_EXT_NACL_SET_SHMEM = 0x1, + SBI_EXT_NACL_SYNC_CSR = 0x2, + SBI_EXT_NACL_SYNC_HFENCE = 0x3, + SBI_EXT_NACL_SYNC_SRET = 0x4, +}; + +enum sbi_ext_nacl_feature { + SBI_NACL_FEAT_SYNC_CSR = 0x0, + SBI_NACL_FEAT_SYNC_HFENCE = 0x1, + SBI_NACL_FEAT_SYNC_SRET = 0x2, + SBI_NACL_FEAT_AUTOSWAP_CSR = 0x3, +}; + +#define SBI_NACL_SHMEM_ADDR_SHIFT 12 +#define SBI_NACL_SHMEM_SCRATCH_OFFSET 0x0000 +#define SBI_NACL_SHMEM_SCRATCH_SIZE 0x1000 +#define SBI_NACL_SHMEM_SRET_OFFSET 0x0000 +#define SBI_NACL_SHMEM_SRET_SIZE 0x0200 +#define SBI_NACL_SHMEM_AUTOSWAP_OFFSET (SBI_NACL_SHMEM_SRET_OFFSET + \ + SBI_NACL_SHMEM_SRET_SIZE) +#define SBI_NACL_SHMEM_AUTOSWAP_SIZE 0x0080 +#define SBI_NACL_SHMEM_UNUSED_OFFSET (SBI_NACL_SHMEM_AUTOSWAP_OFFSET + \ + SBI_NACL_SHMEM_AUTOSWAP_SIZE) +#define SBI_NACL_SHMEM_UNUSED_SIZE 0x0580 +#define SBI_NACL_SHMEM_HFENCE_OFFSET (SBI_NACL_SHMEM_UNUSED_OFFSET + \ + SBI_NACL_SHMEM_UNUSED_SIZE) +#define SBI_NACL_SHMEM_HFENCE_SIZE 0x0780 +#define SBI_NACL_SHMEM_DBITMAP_OFFSET (SBI_NACL_SHMEM_HFENCE_OFFSET + \ + SBI_NACL_SHMEM_HFENCE_SIZE) +#define SBI_NACL_SHMEM_DBITMAP_SIZE 0x0080 +#define SBI_NACL_SHMEM_CSR_OFFSET (SBI_NACL_SHMEM_DBITMAP_OFFSET + \ + SBI_NACL_SHMEM_DBITMAP_SIZE) +#define SBI_NACL_SHMEM_CSR_SIZE ((__riscv_xlen / 8) * 1024) +#define SBI_NACL_SHMEM_SIZE (SBI_NACL_SHMEM_CSR_OFFSET + \ + SBI_NACL_SHMEM_CSR_SIZE) + +#define SBI_NACL_SHMEM_CSR_INDEX(__csr_num) \ + ((((__csr_num) & 0xc00) >> 2) | ((__csr_num) & 0xff)) + +#define SBI_NACL_SHMEM_HFENCE_ENTRY_SZ ((__riscv_xlen / 8) * 4) +#define SBI_NACL_SHMEM_HFENCE_ENTRY_MAX \ + (SBI_NACL_SHMEM_HFENCE_SIZE / \ + SBI_NACL_SHMEM_HFENCE_ENTRY_SZ) +#define SBI_NACL_SHMEM_HFENCE_ENTRY(__num) \ + (SBI_NACL_SHMEM_HFENCE_OFFSET + \ + (__num) * SBI_NACL_SHMEM_HFENCE_ENTRY_SZ) +#define SBI_NACL_SHMEM_HFENCE_ENTRY_CONFIG(__num) \ + SBI_NACL_SHMEM_HFENCE_ENTRY(__num) +#define SBI_NACL_SHMEM_HFENCE_ENTRY_PNUM(__num)\ + (SBI_NACL_SHMEM_HFENCE_ENTRY(__num) + (__riscv_xlen / 8)) +#define SBI_NACL_SHMEM_HFENCE_ENTRY_PCOUNT(__num)\ + (SBI_NACL_SHMEM_HFENCE_ENTRY(__num) + \ + ((__riscv_xlen / 8) * 3)) + +#define SBI_NACL_SHMEM_HFENCE_CONFIG_PEND_BITS 1 +#define SBI_NACL_SHMEM_HFENCE_CONFIG_PEND_SHIFT \ + (__riscv_xlen - SBI_NACL_SHMEM_HFENCE_CONFIG_PEND_BITS) +#define SBI_NACL_SHMEM_HFENCE_CONFIG_PEND_MASK \ + ((1UL << SBI_NACL_SHMEM_HFENCE_CONFIG_PEND_BITS) - 1) +#define SBI_NACL_SHMEM_HFENCE_CONFIG_PEND \ + (SBI_NACL_SHMEM_HFENCE_CONFIG_PEND_MASK << \ + SBI_NACL_SHMEM_HFENCE_CONFIG_PEND_SHIFT) + +#define SBI_NACL_SHMEM_HFENCE_CONFIG_RSVD1_BITS 3 +#define SBI_NACL_SHMEM_HFENCE_CONFIG_RSVD1_SHIFT \ + (SBI_NACL_SHMEM_HFENCE_CONFIG_PEND_SHIFT - \ + SBI_NACL_SHMEM_HFENCE_CONFIG_RSVD1_BITS) + +#define SBI_NACL_SHMEM_HFENCE_CONFIG_TYPE_BITS 4 +#define SBI_NACL_SHMEM_HFENCE_CONFIG_TYPE_SHIFT \ + (SBI_NACL_SHMEM_HFENCE_CONFIG_RSVD1_SHIFT - \ + SBI_NACL_SHMEM_HFENCE_CONFIG_TYPE_BITS) +#define SBI_NACL_SHMEM_HFENCE_CONFIG_TYPE_MASK \ + ((1UL << SBI_NACL_SHMEM_HFENCE_CONFIG_TYPE_BITS) - 1) + +#define SBI_NACL_SHMEM_HFENCE_TYPE_GVMA 0x0 +#define SBI_NACL_SHMEM_HFENCE_TYPE_GVMA_ALL 0x1 +#define SBI_NACL_SHMEM_HFENCE_TYPE_GVMA_VMID 0x2 +#define SBI_NACL_SHMEM_HFENCE_TYPE_GVMA_VMID_ALL 0x3 +#define SBI_NACL_SHMEM_HFENCE_TYPE_VVMA 0x4 +#define SBI_NACL_SHMEM_HFENCE_TYPE_VVMA_ALL 0x5 +#define SBI_NACL_SHMEM_HFENCE_TYPE_VVMA_ASID 0x6 +#define SBI_NACL_SHMEM_HFENCE_TYPE_VVMA_ASID_ALL 0x7 + +#define SBI_NACL_SHMEM_HFENCE_CONFIG_RSVD2_BITS 1 +#define SBI_NACL_SHMEM_HFENCE_CONFIG_RSVD2_SHIFT \ + (SBI_NACL_SHMEM_HFENCE_CONFIG_TYPE_SHIFT - \ + SBI_NACL_SHMEM_HFENCE_CONFIG_RSVD2_BITS) + +#define SBI_NACL_SHMEM_HFENCE_CONFIG_ORDER_BITS 7 +#define SBI_NACL_SHMEM_HFENCE_CONFIG_ORDER_SHIFT \ + (SBI_NACL_SHMEM_HFENCE_CONFIG_RSVD2_SHIFT - \ + SBI_NACL_SHMEM_HFENCE_CONFIG_ORDER_BITS) +#define SBI_NACL_SHMEM_HFENCE_CONFIG_ORDER_MASK \ + ((1UL << SBI_NACL_SHMEM_HFENCE_CONFIG_ORDER_BITS) - 1) +#define SBI_NACL_SHMEM_HFENCE_ORDER_BASE 12 + +#if __riscv_xlen == 32 +#define SBI_NACL_SHMEM_HFENCE_CONFIG_ASID_BITS 9 +#define SBI_NACL_SHMEM_HFENCE_CONFIG_VMID_BITS 7 +#else +#define SBI_NACL_SHMEM_HFENCE_CONFIG_ASID_BITS 16 +#define SBI_NACL_SHMEM_HFENCE_CONFIG_VMID_BITS 14 +#endif +#define SBI_NACL_SHMEM_HFENCE_CONFIG_VMID_SHIFT \ + SBI_NACL_SHMEM_HFENCE_CONFIG_ASID_BITS +#define SBI_NACL_SHMEM_HFENCE_CONFIG_ASID_MASK \ + ((1UL << SBI_NACL_SHMEM_HFENCE_CONFIG_ASID_BITS) - 1) +#define SBI_NACL_SHMEM_HFENCE_CONFIG_VMID_MASK \ + ((1UL << SBI_NACL_SHMEM_HFENCE_CONFIG_VMID_BITS) - 1) + +#define SBI_NACL_SHMEM_AUTOSWAP_FLAG_HSTATUS BIT(0) +#define SBI_NACL_SHMEM_AUTOSWAP_HSTATUS ((__riscv_xlen / 8) * 1) + +#define SBI_NACL_SHMEM_SRET_X(__i) ((__riscv_xlen / 8) * (__i)) +#define SBI_NACL_SHMEM_SRET_X_LAST 31 + /* SBI spec version fields */ #define SBI_SPEC_VERSION_DEFAULT 0x1 #define SBI_SPEC_VERSION_MAJOR_SHIFT 24 -- Gitee From 9063839d17748db08b1c8fc1f073bf63936bcb80 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 29 Aug 2024 21:35:54 -0700 Subject: [PATCH 08/26] KVM: Rename arch hooks related to per-CPU virtualization enabling ANBZ: #33223 commit 071f24ad28cdcdfa544fdb79b1b1d2b423717a11 upstream. Rename the per-CPU hooks used to enable virtualization in hardware to align with the KVM-wide helpers in kvm_main.c, and to better capture that the callbacks are invoked on every online CPU. No functional change intended. Suggested-by: Paolo Bonzini Signed-off-by: Sean Christopherson Reviewed-by: Kai Huang Message-ID: <20240830043600.127750-5-seanjc@google.com> Signed-off-by: Paolo Bonzini Signed-off-by: Ruidong Tian --- arch/arm64/kvm/arm.c | 6 +++--- arch/loongarch/kvm/main.c | 4 ++-- arch/mips/kvm/mips.c | 4 ++-- arch/riscv/kvm/main.c | 4 ++-- arch/x86/kvm/x86.c | 6 +++--- include/linux/kvm_host.h | 4 ++-- virt/kvm/kvm_main.c | 4 ++-- 7 files changed, 16 insertions(+), 16 deletions(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index e5b6fd47c068..94a2d119cb97 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -2080,7 +2080,7 @@ static void cpu_hyp_uninit(void *discard) } } -int kvm_arch_hardware_enable(void) +int kvm_arch_enable_virtualization_cpu(void) { /* * Most calls to this function are made with migration @@ -2100,7 +2100,7 @@ int kvm_arch_hardware_enable(void) return 0; } -void kvm_arch_hardware_disable(void) +void kvm_arch_disable_virtualization_cpu(void) { kvm_timer_cpu_down(); kvm_vgic_cpu_down(); @@ -2309,7 +2309,7 @@ static int __init do_pkvm_init(u32 hyp_va_bits) /* * The stub hypercalls are now disabled, so set our local flag to - * prevent a later re-init attempt in kvm_arch_hardware_enable(). + * prevent a later re-init attempt in kvm_arch_enable_virtualization_cpu(). */ __this_cpu_write(kvm_hyp_initialized, 1); preempt_enable(); diff --git a/arch/loongarch/kvm/main.c b/arch/loongarch/kvm/main.c index c7763f951377..4d003e8c728b 100644 --- a/arch/loongarch/kvm/main.c +++ b/arch/loongarch/kvm/main.c @@ -280,7 +280,7 @@ long kvm_arch_dev_ioctl(struct file *filp, return -ENOIOCTLCMD; } -int kvm_arch_hardware_enable(void) +int kvm_arch_enable_virtualization_cpu(void) { unsigned long env, gcfg = 0; @@ -325,7 +325,7 @@ int kvm_arch_hardware_enable(void) return 0; } -void kvm_arch_hardware_disable(void) +void kvm_arch_disable_virtualization_cpu(void) { write_csr_gcfg(0); write_csr_gstat(0); diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 231ac052b506..c2e5295947be 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -125,12 +125,12 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) return 1; } -int kvm_arch_hardware_enable(void) +int kvm_arch_enable_virtualization_cpu(void) { return kvm_mips_callbacks->hardware_enable(); } -void kvm_arch_hardware_disable(void) +void kvm_arch_disable_virtualization_cpu(void) { kvm_mips_callbacks->hardware_disable(); } diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c index 225a435d9c9a..228f74c16781 100644 --- a/arch/riscv/kvm/main.c +++ b/arch/riscv/kvm/main.c @@ -20,7 +20,7 @@ long kvm_arch_dev_ioctl(struct file *filp, return -EINVAL; } -int kvm_arch_hardware_enable(void) +int kvm_arch_enable_virtualization_cpu(void) { unsigned long hideleg, hedeleg; @@ -49,7 +49,7 @@ int kvm_arch_hardware_enable(void) return 0; } -void kvm_arch_hardware_disable(void) +void kvm_arch_disable_virtualization_cpu(void) { kvm_riscv_aia_disable(); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4e6481ba66c2..6222dc0ec650 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -371,7 +371,7 @@ static void kvm_on_user_return(struct user_return_notifier *urn) /* * Disabling irqs at this point since the following code could be - * interrupted and executed through kvm_arch_hardware_disable() + * interrupted and executed through kvm_arch_disable_virtualization_cpu() */ local_irq_save(flags); if (msrs->registered) { @@ -12295,7 +12295,7 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) } EXPORT_SYMBOL_GPL(kvm_vcpu_deliver_sipi_vector); -int kvm_arch_hardware_enable(void) +int kvm_arch_enable_virtualization_cpu(void) { struct kvm *kvm; struct kvm_vcpu *vcpu; @@ -12391,7 +12391,7 @@ int kvm_arch_hardware_enable(void) return 0; } -void kvm_arch_hardware_disable(void) +void kvm_arch_disable_virtualization_cpu(void) { static_call(kvm_x86_hardware_disable)(); drop_user_return_notifiers(); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 23948a67a8a3..f7753eca807c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1514,8 +1514,8 @@ static inline void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) {} #endif #ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING -int kvm_arch_hardware_enable(void); -void kvm_arch_hardware_disable(void); +int kvm_arch_enable_virtualization_cpu(void); +void kvm_arch_disable_virtualization_cpu(void); #endif int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7cc475b875be..bcd8b7639f21 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -5546,7 +5546,7 @@ static int __hardware_enable_nolock(void) if (__this_cpu_read(hardware_enabled)) return 0; - if (kvm_arch_hardware_enable()) { + if (kvm_arch_enable_virtualization_cpu()) { pr_info("kvm: enabling virtualization on CPU%d failed\n", raw_smp_processor_id()); return -EIO; @@ -5587,7 +5587,7 @@ static void hardware_disable_nolock(void *junk) if (!__this_cpu_read(hardware_enabled)) return; - kvm_arch_hardware_disable(); + kvm_arch_disable_virtualization_cpu(); __this_cpu_write(hardware_enabled, false); } -- Gitee From 670a820c2476633155f72bc3dec782e2cba31767 Mon Sep 17 00:00:00 2001 From: Chao Du Date: Tue, 2 Apr 2024 06:26:26 +0000 Subject: [PATCH 09/26] RISC-V: KVM: Implement kvm_arch_vcpu_ioctl_set_guest_debug() ANBZ: #33223 commit edcbe90f128922830228b9a5656c944a5d7ac306 upstream. kvm_vm_ioctl_check_extension(): Return 1 if KVM_CAP_SET_GUEST_DEBUG is been checked. kvm_arch_vcpu_ioctl_set_guest_debug(): Update the guest_debug flags from userspace accordingly. Route the breakpoint exceptions to HS mode if the VCPU is being debugged by userspace, by clearing the corresponding bit in hedeleg. Initialize the hedeleg configuration in kvm_riscv_vcpu_setup_config(). Write the actual CSR in kvm_arch_vcpu_load(). Signed-off-by: Chao Du Reviewed-by: Anup Patel Link: https://lore.kernel.org/r/20240402062628.5425-2-duchao@eswincomputing.com Signed-off-by: Anup Patel Signed-off-by: Ruidong Tian --- arch/riscv/include/asm/kvm_host.h | 12 ++++++++++++ arch/riscv/kvm/main.c | 18 ++---------------- arch/riscv/kvm/vcpu.c | 16 ++++++++++++++-- arch/riscv/kvm/vm.c | 1 + 4 files changed, 29 insertions(+), 18 deletions(-) diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h index bfbf9e74ad75..e2a0d544f1f8 100644 --- a/arch/riscv/include/asm/kvm_host.h +++ b/arch/riscv/include/asm/kvm_host.h @@ -43,6 +43,17 @@ KVM_ARCH_REQ_FLAGS(5, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(6) +#define KVM_HEDELEG_DEFAULT (BIT(EXC_INST_MISALIGNED) | \ + BIT(EXC_BREAKPOINT) | \ + BIT(EXC_SYSCALL) | \ + BIT(EXC_INST_PAGE_FAULT) | \ + BIT(EXC_LOAD_PAGE_FAULT) | \ + BIT(EXC_STORE_PAGE_FAULT)) + +#define KVM_HIDELEG_DEFAULT (BIT(IRQ_VS_SOFT) | \ + BIT(IRQ_VS_TIMER) | \ + BIT(IRQ_VS_EXT)) + enum kvm_riscv_hfence_type { KVM_RISCV_HFENCE_UNKNOWN = 0, KVM_RISCV_HFENCE_GVMA_VMID_GPA, @@ -178,6 +189,7 @@ struct kvm_vcpu_csr { struct kvm_vcpu_config { u64 henvcfg; u64 hstateen0; + unsigned long hedeleg; }; struct kvm_vcpu_smstateen_csr { diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c index 228f74c16781..f3427f6de608 100644 --- a/arch/riscv/kvm/main.c +++ b/arch/riscv/kvm/main.c @@ -22,22 +22,8 @@ long kvm_arch_dev_ioctl(struct file *filp, int kvm_arch_enable_virtualization_cpu(void) { - unsigned long hideleg, hedeleg; - - hedeleg = 0; - hedeleg |= (1UL << EXC_INST_MISALIGNED); - hedeleg |= (1UL << EXC_BREAKPOINT); - hedeleg |= (1UL << EXC_SYSCALL); - hedeleg |= (1UL << EXC_INST_PAGE_FAULT); - hedeleg |= (1UL << EXC_LOAD_PAGE_FAULT); - hedeleg |= (1UL << EXC_STORE_PAGE_FAULT); - csr_write(CSR_HEDELEG, hedeleg); - - hideleg = 0; - hideleg |= (1UL << IRQ_VS_SOFT); - hideleg |= (1UL << IRQ_VS_TIMER); - hideleg |= (1UL << IRQ_VS_EXT); - csr_write(CSR_HIDELEG, hideleg); + csr_write(CSR_HEDELEG, KVM_HEDELEG_DEFAULT); + csr_write(CSR_HIDELEG, KVM_HIDELEG_DEFAULT); /* VS should access only the time counter directly. Everything else should trap */ csr_write(CSR_HCOUNTEREN, 0x02); diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index 8d71ab61131d..83a1f258d52d 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -529,8 +529,15 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { - /* TODO; To be implemented later. */ - return -EINVAL; + if (dbg->control & KVM_GUESTDBG_ENABLE) { + vcpu->guest_debug = dbg->control; + vcpu->arch.cfg.hedeleg &= ~BIT(EXC_BREAKPOINT); + } else { + vcpu->guest_debug = 0; + vcpu->arch.cfg.hedeleg |= BIT(EXC_BREAKPOINT); + } + + return 0; } static void kvm_riscv_vcpu_setup_config(struct kvm_vcpu *vcpu) @@ -563,6 +570,10 @@ static void kvm_riscv_vcpu_setup_config(struct kvm_vcpu *vcpu) if (riscv_isa_extension_available(isa, SMSTATEEN)) cfg->hstateen0 |= SMSTATEEN0_SSTATEEN0; } + + cfg->hedeleg = KVM_HEDELEG_DEFAULT; + if (vcpu->guest_debug) + cfg->hedeleg &= ~BIT(EXC_BREAKPOINT); } void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) @@ -577,6 +588,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) csr_write(CSR_VSEPC, csr->vsepc); csr_write(CSR_VSCAUSE, csr->vscause); csr_write(CSR_VSTVAL, csr->vstval); + csr_write(CSR_HEDELEG, cfg->hedeleg); csr_write(CSR_HVIP, csr->hvip); csr_write(CSR_VSATP, csr->vsatp); csr_write(CSR_HENVCFG, cfg->henvcfg); diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c index d86024055ff6..636765cd53f0 100644 --- a/arch/riscv/kvm/vm.c +++ b/arch/riscv/kvm/vm.c @@ -187,6 +187,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_READONLY_MEM: case KVM_CAP_MP_STATE: case KVM_CAP_IMMEDIATE_EXIT: + case KVM_CAP_SET_GUEST_DEBUG: r = 1; break; case KVM_CAP_NR_VCPUS: -- Gitee From 7680e4802f2c7c20ef89775d4958e2a3548a6119 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 11 Jan 2024 03:12:59 -0500 Subject: [PATCH 10/26] KVM: define __KVM_HAVE_GUEST_DEBUG unconditionally ANBZ: #33223 commit 6bda055d625860736f7ea5b4eda816f276899d8b upstream. Since all architectures (for historical reasons) have to define struct kvm_guest_debug_arch, and since userspace has to check KVM_CHECK_EXTENSION(KVM_CAP_SET_GUEST_DEBUG) anyway, there is no advantage in masking the capability #define itself. Remove the #define __KVM_HAVE_GUEST_DEBUG from architecture-specific headers. Signed-off-by: Paolo Bonzini Signed-off-by: Ruidong Tian --- arch/arm64/include/uapi/asm/kvm.h | 1 - arch/powerpc/include/uapi/asm/kvm.h | 1 - arch/s390/include/uapi/asm/kvm.h | 1 - arch/x86/include/uapi/asm/kvm.h | 1 - include/uapi/linux/kvm.h | 6 ++++-- 5 files changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index b56d0d62cda8..974a6a4cdc02 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -37,7 +37,6 @@ #include #include -#define __KVM_HAVE_GUEST_DEBUG #define __KVM_HAVE_IRQ_LINE #define __KVM_HAVE_READONLY_MEM #define __KVM_HAVE_VCPU_EVENTS diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 9f18fa090f1f..0572d5238b13 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -28,7 +28,6 @@ #define __KVM_HAVE_PPC_SMT #define __KVM_HAVE_IRQCHIP #define __KVM_HAVE_IRQ_LINE -#define __KVM_HAVE_GUEST_DEBUG /* Not always available, but if it is, this is the correct offset. */ #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index abe926d43cbe..b3ec6ddd6a13 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -12,7 +12,6 @@ #include #define __KVM_S390 -#define __KVM_HAVE_GUEST_DEBUG /* Device control API: s390-specific devices */ #define KVM_DEV_FLIC_GET_ALL_IRQS 1 diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index a448d0964fc0..fbe3ae1f64d2 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -40,7 +40,6 @@ #define __KVM_HAVE_IRQ_LINE #define __KVM_HAVE_MSI #define __KVM_HAVE_USER_NMI -#define __KVM_HAVE_GUEST_DEBUG #define __KVM_HAVE_MSIX #define __KVM_HAVE_MCE #define __KVM_HAVE_PIT_STATE2 diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 03df59900b7d..bb3b4b681fd5 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -85,6 +85,10 @@ struct kvm_debug_guest { /* *** End of deprecated interfaces *** */ +/* + * Backwards-compatible definitions. + */ +#define __KVM_HAVE_GUEST_DEBUG /* for KVM_SET_USER_MEMORY_REGION */ struct kvm_userspace_memory_region { @@ -996,9 +1000,7 @@ struct kvm_ppc_resize_hpt { /* Bug in KVM_SET_USER_MEMORY_REGION fixed: */ #define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21 #define KVM_CAP_USER_NMI 22 -#ifdef __KVM_HAVE_GUEST_DEBUG #define KVM_CAP_SET_GUEST_DEBUG 23 -#endif #ifdef __KVM_HAVE_PIT #define KVM_CAP_REINJECT_CONTROL 24 #endif -- Gitee From 9cf83cfe8a03162738a07d763da1ae6a02e9d139 Mon Sep 17 00:00:00 2001 From: Chao Du Date: Tue, 2 Apr 2024 06:26:27 +0000 Subject: [PATCH 11/26] RISC-V: KVM: Handle breakpoint exits for VCPU ANBZ: #33223 commit 1df1fb521b9dcf6a2fa8f74f9f39d40e5a6bd233 upstream. Exit to userspace for breakpoint traps. Set the exit_reason as KVM_EXIT_DEBUG before exit. Signed-off-by: Chao Du Reviewed-by: Anup Patel Link: https://lore.kernel.org/r/20240402062628.5425-3-duchao@eswincomputing.com Signed-off-by: Anup Patel Signed-off-by: Ruidong Tian --- arch/riscv/kvm/vcpu_exit.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/riscv/kvm/vcpu_exit.c b/arch/riscv/kvm/vcpu_exit.c index e992b2870cc0..6e0c18412795 100644 --- a/arch/riscv/kvm/vcpu_exit.c +++ b/arch/riscv/kvm/vcpu_exit.c @@ -235,6 +235,10 @@ int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV) ret = kvm_riscv_vcpu_sbi_ecall(vcpu, run); break; + case EXC_BREAKPOINT: + run->exit_reason = KVM_EXIT_DEBUG; + ret = 0; + break; default: break; } -- Gitee From 6f159687f7f7e34158b755bc0152f624247cb190 Mon Sep 17 00:00:00 2001 From: Chao Du Date: Tue, 2 Apr 2024 06:26:28 +0000 Subject: [PATCH 12/26] RISC-V: KVM: selftests: Add ebreak test support ANBZ: #33223 commit f1c48c1ec73538a8e49695445a0fbc52156aac42 upstream. Initial support for RISC-V KVM ebreak test. Check the exit reason and the PC when guest debug is enabled. Also to make sure the guest could handle the ebreak exception without exiting to the VMM when guest debug is not enabled. Signed-off-by: Chao Du Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20240402062628.5425-4-duchao@eswincomputing.com Signed-off-by: Anup Patel [Ruidong: Conflicts: reason: Context-only conflict in tools/testing/selftests/kvm/Makefile due to surrounding lines differing from upstream; resolved by accepting upstream changes. ] Signed-off-by: Ruidong Tian --- tools/testing/selftests/kvm/Makefile | 1 + .../testing/selftests/kvm/riscv/ebreak_test.c | 82 +++++++++++++++++++ 2 files changed, 83 insertions(+) create mode 100644 tools/testing/selftests/kvm/riscv/ebreak_test.c diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index ce9479760ac8..197ba0aebf7a 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -191,6 +191,7 @@ TEST_GEN_PROGS_s390x += rseq_test TEST_GEN_PROGS_s390x += set_memory_region_test TEST_GEN_PROGS_s390x += kvm_binary_stats_test +TEST_GEN_PROGS_riscv += riscv/ebreak_test TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test TEST_GEN_PROGS_riscv += arch_timer TEST_GEN_PROGS_riscv += demand_paging_test diff --git a/tools/testing/selftests/kvm/riscv/ebreak_test.c b/tools/testing/selftests/kvm/riscv/ebreak_test.c new file mode 100644 index 000000000000..823c132069b4 --- /dev/null +++ b/tools/testing/selftests/kvm/riscv/ebreak_test.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * RISC-V KVM ebreak test. + * + * Copyright 2024 Beijing ESWIN Computing Technology Co., Ltd. + * + */ +#include "kvm_util.h" + +#define LABEL_ADDRESS(v) ((uint64_t)&(v)) + +extern unsigned char sw_bp_1, sw_bp_2; +static uint64_t sw_bp_addr; + +static void guest_code(void) +{ + asm volatile( + ".option push\n" + ".option norvc\n" + "sw_bp_1: ebreak\n" + "sw_bp_2: ebreak\n" + ".option pop\n" + ); + GUEST_ASSERT_EQ(READ_ONCE(sw_bp_addr), LABEL_ADDRESS(sw_bp_2)); + + GUEST_DONE(); +} + +static void guest_breakpoint_handler(struct ex_regs *regs) +{ + WRITE_ONCE(sw_bp_addr, regs->epc); + regs->epc += 4; +} + +int main(void) +{ + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + uint64_t pc; + struct kvm_guest_debug debug = { + .control = KVM_GUESTDBG_ENABLE, + }; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_SET_GUEST_DEBUG)); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + vm_init_vector_tables(vm); + vcpu_init_vector_tables(vcpu); + vm_install_exception_handler(vm, EXC_BREAKPOINT, + guest_breakpoint_handler); + + /* + * Enable the guest debug. + * ebreak should exit to the VMM with KVM_EXIT_DEBUG reason. + */ + vcpu_guest_debug_set(vcpu, &debug); + vcpu_run(vcpu); + + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG); + + vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc), &pc); + TEST_ASSERT_EQ(pc, LABEL_ADDRESS(sw_bp_1)); + + /* skip sw_bp_1 */ + vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.pc), pc + 4); + + /* + * Disable all debug controls. + * Guest should handle the ebreak without exiting to the VMM. + */ + memset(&debug, 0, sizeof(debug)); + vcpu_guest_debug_set(vcpu, &debug); + + vcpu_run(vcpu); + + TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); + + kvm_vm_free(vm); + + return 0; +} -- Gitee From fddda74f9eeb45de0106224d1731bc60259702cd Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 8 Jul 2024 11:28:47 +0800 Subject: [PATCH 13/26] riscv: stacktrace: Add USER_STACKTRACE support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #33223 commit 1a7483318274d0ec60f160e604c2a1dbce27fc0a upstream. Currently, userstacktrace is unsupported for riscv. So use the perf_callchain_user() code as blueprint to implement the arch_stack_walk_user() which add userstacktrace support on riscv. Meanwhile, we can use arch_stack_walk_user() to simplify the implementation of perf_callchain_user(). A ftrace test case is shown as below: # cd /sys/kernel/debug/tracing # echo 1 > options/userstacktrace # echo 1 > options/sym-userobj # echo 1 > events/sched/sched_process_fork/enable # cat trace ...... bash-178 [000] ...1. 97.968395: sched_process_fork: comm=bash pid=178 child_comm=bash child_pid=231 bash-178 [000] ...1. 97.970075: => /lib/libc.so.6[+0xb5090] Also a simple perf test is ok as below: # perf record -e cpu-clock --call-graph fp top # perf report --call-graph ..... [[31m 66.54%[[m 0.00% top [kernel.kallsyms] [k] ret_from_exception | ---ret_from_exception | |--[[31m58.97%[[m--do_trap_ecall_u | | | |--[[31m17.34%[[m--__riscv_sys_read | | ksys_read | | | | | --[[31m16.88%[[m--vfs_read | | | | | |--[[31m10.90%[[m--seq_read Signed-off-by: Jinjie Ruan Tested-by: Jinjie Ruan Cc: Björn Töpel Link: https://lore.kernel.org/r/20240708032847.2998158-3-ruanjinjie@huawei.com Signed-off-by: Palmer Dabbelt Signed-off-by: Ruidong Tian --- arch/riscv/Kconfig | 1 + arch/riscv/kernel/perf_callchain.c | 46 ++---------------------------- arch/riscv/kernel/stacktrace.c | 43 ++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 43 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 2b952aaecaf6..59bdd20354a5 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -187,6 +187,7 @@ config RISCV select THREAD_INFO_IN_TASK select TRACE_IRQFLAGS_SUPPORT select UACCESS_MEMCPY if !MMU + select USER_STACKTRACE_SUPPORT select ZONE_DMA32 if 64BIT config CLANG_SUPPORTS_DYNAMIC_FTRACE diff --git a/arch/riscv/kernel/perf_callchain.c b/arch/riscv/kernel/perf_callchain.c index 2932791e9388..c7468af77c66 100644 --- a/arch/riscv/kernel/perf_callchain.c +++ b/arch/riscv/kernel/perf_callchain.c @@ -6,37 +6,9 @@ #include -/* - * Get the return address for a single stackframe and return a pointer to the - * next frame tail. - */ -static unsigned long user_backtrace(struct perf_callchain_entry_ctx *entry, - unsigned long fp, unsigned long reg_ra) +static bool fill_callchain(void *entry, unsigned long pc) { - struct stackframe buftail; - unsigned long ra = 0; - unsigned long __user *user_frame_tail = - (unsigned long __user *)(fp - sizeof(struct stackframe)); - - /* Check accessibility of one struct frame_tail beyond */ - if (!access_ok(user_frame_tail, sizeof(buftail))) - return 0; - if (__copy_from_user_inatomic(&buftail, user_frame_tail, - sizeof(buftail))) - return 0; - - if (reg_ra != 0) - ra = reg_ra; - else - ra = buftail.ra; - - fp = buftail.fp; - if (ra != 0) - perf_callchain_store(entry, ra); - else - return 0; - - return fp; + return perf_callchain_store(entry, pc) == 0; } /* @@ -56,19 +28,7 @@ static unsigned long user_backtrace(struct perf_callchain_entry_ctx *entry, void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { - unsigned long fp = 0; - - fp = regs->s0; - perf_callchain_store(entry, regs->epc); - - fp = user_backtrace(entry, fp, regs->ra); - while (fp && !(fp & 0x7) && entry->nr < entry->max_stack) - fp = user_backtrace(entry, fp, 0); -} - -static bool fill_callchain(void *entry, unsigned long pc) -{ - return perf_callchain_store(entry, pc) == 0; + arch_stack_walk_user(fill_callchain, entry, regs); } void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 10e311b2759d..72abddc892c4 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -162,3 +162,46 @@ noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry, void { walk_stackframe(task, regs, consume_entry, cookie); } + +/* + * Get the return address for a single stackframe and return a pointer to the + * next frame tail. + */ +static unsigned long unwind_user_frame(stack_trace_consume_fn consume_entry, + void *cookie, unsigned long fp, + unsigned long reg_ra) +{ + struct stackframe buftail; + unsigned long ra = 0; + unsigned long __user *user_frame_tail = + (unsigned long __user *)(fp - sizeof(struct stackframe)); + + /* Check accessibility of one struct frame_tail beyond */ + if (!access_ok(user_frame_tail, sizeof(buftail))) + return 0; + if (__copy_from_user_inatomic(&buftail, user_frame_tail, + sizeof(buftail))) + return 0; + + ra = reg_ra ? : buftail.ra; + + fp = buftail.fp; + if (!ra || !consume_entry(cookie, ra)) + return 0; + + return fp; +} + +void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie, + const struct pt_regs *regs) +{ + unsigned long fp = 0; + + fp = regs->s0; + if (!consume_entry(cookie, regs->epc)) + return; + + fp = unwind_user_frame(consume_entry, cookie, fp, regs->ra); + while (fp && !(fp & 0x7)) + fp = unwind_user_frame(consume_entry, cookie, fp, 0); +} -- Gitee From b57cc0d33b631b0361f291dc8568a221ab472346 Mon Sep 17 00:00:00 2001 From: Quan Zhou Date: Tue, 15 Oct 2024 10:58:24 +0800 Subject: [PATCH 14/26] riscv: perf: add guest vs host distinction ANBZ: #33223 commit 5bb5ccb3e8d8dba29941cd78d5c1bcd27b227b4a upstream. Introduce basic guest support in perf, enabling it to distinguish between PMU interrupts in the host or guest, and collect fundamental information. Signed-off-by: Quan Zhou Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/a67d527dc1b11493fe11f7f53584772fdd983744.1728957131.git.zhouquan@iscas.ac.cn Signed-off-by: Anup Patel Signed-off-by: Ruidong Tian --- arch/riscv/include/asm/perf_event.h | 6 +++++ arch/riscv/kernel/perf_callchain.c | 38 +++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/arch/riscv/include/asm/perf_event.h b/arch/riscv/include/asm/perf_event.h index 665bbc9b2f84..38926b4a902d 100644 --- a/arch/riscv/include/asm/perf_event.h +++ b/arch/riscv/include/asm/perf_event.h @@ -8,7 +8,11 @@ #ifndef _ASM_RISCV_PERF_EVENT_H #define _ASM_RISCV_PERF_EVENT_H +#ifdef CONFIG_PERF_EVENTS #include +extern unsigned long perf_instruction_pointer(struct pt_regs *regs); +extern unsigned long perf_misc_flags(struct pt_regs *regs); +#define perf_misc_flags(regs) perf_misc_flags(regs) #define perf_arch_bpf_user_pt_regs(regs) (struct user_regs_struct *)regs #define perf_arch_fetch_caller_regs(regs, __ip) { \ @@ -17,4 +21,6 @@ (regs)->sp = current_stack_pointer; \ (regs)->status = SR_PP; \ } +#endif + #endif /* _ASM_RISCV_PERF_EVENT_H */ diff --git a/arch/riscv/kernel/perf_callchain.c b/arch/riscv/kernel/perf_callchain.c index c7468af77c66..c2c81a80f816 100644 --- a/arch/riscv/kernel/perf_callchain.c +++ b/arch/riscv/kernel/perf_callchain.c @@ -28,11 +28,49 @@ static bool fill_callchain(void *entry, unsigned long pc) void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + if (perf_guest_state()) { + /* TODO: We don't support guest os callchain now */ + return; + } + arch_stack_walk_user(fill_callchain, entry, regs); } void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + if (perf_guest_state()) { + /* TODO: We don't support guest os callchain now */ + return; + } + walk_stackframe(NULL, regs, fill_callchain, entry); } + +unsigned long perf_instruction_pointer(struct pt_regs *regs) +{ + if (perf_guest_state()) + return perf_guest_get_ip(); + + return instruction_pointer(regs); +} + +unsigned long perf_misc_flags(struct pt_regs *regs) +{ + unsigned int guest_state = perf_guest_state(); + unsigned long misc = 0; + + if (guest_state) { + if (guest_state & PERF_GUEST_USER) + misc |= PERF_RECORD_MISC_GUEST_USER; + else + misc |= PERF_RECORD_MISC_GUEST_KERNEL; + } else { + if (user_mode(regs)) + misc |= PERF_RECORD_MISC_USER; + else + misc |= PERF_RECORD_MISC_KERNEL; + } + + return misc; +} -- Gitee From bb925468604444d6b781ef19709c71a30b0913f1 Mon Sep 17 00:00:00 2001 From: Quan Zhou Date: Tue, 15 Oct 2024 10:58:37 +0800 Subject: [PATCH 15/26] riscv: KVM: add basic support for host vs guest profiling ANBZ: #33223 commit eded6754f398b5b4950e8f593f75fee63a8b49ad upstream. For the information collected on the host side, we need to identify which data originates from the guest and record these events separately, this can be achieved by having KVM register perf callbacks. Signed-off-by: Quan Zhou Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/00342d535311eb0629b9ba4f1e457a48e2abee33.1728957131.git.zhouquan@iscas.ac.cn Signed-off-by: Anup Patel Signed-off-by: Ruidong Tian --- arch/riscv/include/asm/kvm_host.h | 10 ++++++++++ arch/riscv/kvm/Kconfig | 1 + arch/riscv/kvm/main.c | 12 ++++++++++-- arch/riscv/kvm/vcpu.c | 7 +++++++ 4 files changed, 28 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h index e2a0d544f1f8..a8179b946168 100644 --- a/arch/riscv/include/asm/kvm_host.h +++ b/arch/riscv/include/asm/kvm_host.h @@ -296,6 +296,16 @@ struct kvm_vcpu_arch { } sta; }; +/* + * Returns true if a Performance Monitoring Interrupt (PMI), a.k.a. perf event, + * arrived in guest context. For riscv, any event that arrives while a vCPU is + * loaded is considered to be "in guest". + */ +static inline bool kvm_arch_pmi_in_guest(struct kvm_vcpu *vcpu) +{ + return IS_ENABLED(CONFIG_GUEST_PERF_EVENTS) && !!vcpu; +} + static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} diff --git a/arch/riscv/kvm/Kconfig b/arch/riscv/kvm/Kconfig index 148e52b516cf..116240ae3213 100644 --- a/arch/riscv/kvm/Kconfig +++ b/arch/riscv/kvm/Kconfig @@ -33,6 +33,7 @@ config KVM select MMU_NOTIFIER select PREEMPT_NOTIFIERS select SCHED_INFO + select GUEST_PERF_EVENTS if PERF_EVENTS help Support hosting virtualized guest machines. diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c index f3427f6de608..5682e338ae6d 100644 --- a/arch/riscv/kvm/main.c +++ b/arch/riscv/kvm/main.c @@ -51,6 +51,12 @@ void kvm_arch_disable_virtualization_cpu(void) csr_write(CSR_HIDELEG, 0); } +static void kvm_riscv_teardown(void) +{ + kvm_riscv_aia_exit(); + kvm_unregister_perf_callbacks(); +} + static int __init riscv_kvm_init(void) { int rc; @@ -105,9 +111,11 @@ static int __init riscv_kvm_init(void) kvm_info("AIA available with %d guest external interrupts\n", kvm_riscv_aia_nr_hgei); + kvm_register_perf_callbacks(NULL); + rc = kvm_init(sizeof(struct kvm_vcpu), 0, THIS_MODULE); if (rc) { - kvm_riscv_aia_exit(); + kvm_riscv_teardown(); return rc; } @@ -117,7 +125,7 @@ module_init(riscv_kvm_init); static void __exit riscv_kvm_exit(void) { - kvm_riscv_aia_exit(); + kvm_riscv_teardown(); kvm_exit(); } diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index 83a1f258d52d..daa9b1180f65 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -233,6 +233,13 @@ bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) return (vcpu->arch.guest_context.sstatus & SR_SPP) ? true : false; } +#ifdef CONFIG_GUEST_PERF_EVENTS +unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.guest_context.sepc; +} +#endif + vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) { return VM_FAULT_SIGBUS; -- Gitee From 2c17b551c7dfcc2ead84e822bc3e944dc82c5495 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Mon, 17 Mar 2025 00:41:44 -0700 Subject: [PATCH 16/26] RISC-V: KVM: Teardown riscv specific bits after kvm_exit ANBZ: #33223 commit 2d117e67f318303f6ab699a5511d1fac3f170545 upstream. During a module removal, kvm_exit invokes arch specific disable call which disables AIA. However, we invoke aia_exit before kvm_exit resulting in the following warning. KVM kernel module can't be inserted afterwards due to inconsistent state of IRQ. [25469.031389] percpu IRQ 31 still enabled on CPU0! [25469.031732] WARNING: CPU: 3 PID: 943 at kernel/irq/manage.c:2476 __free_percpu_irq+0xa2/0x150 [25469.031804] Modules linked in: kvm(-) [25469.031848] CPU: 3 UID: 0 PID: 943 Comm: rmmod Not tainted 6.14.0-rc5-06947-g91c763118f47-dirty #2 [25469.031905] Hardware name: riscv-virtio,qemu (DT) [25469.031928] epc : __free_percpu_irq+0xa2/0x150 [25469.031976] ra : __free_percpu_irq+0xa2/0x150 [25469.032197] epc : ffffffff8007db1e ra : ffffffff8007db1e sp : ff2000000088bd50 [25469.032241] gp : ffffffff8131cef8 tp : ff60000080b96400 t0 : ff2000000088baf8 [25469.032285] t1 : fffffffffffffffc t2 : 5249207570637265 s0 : ff2000000088bd90 [25469.032329] s1 : ff60000098b21080 a0 : 037d527a15eb4f00 a1 : 037d527a15eb4f00 [25469.032372] a2 : 0000000000000023 a3 : 0000000000000001 a4 : ffffffff8122dbf8 [25469.032410] a5 : 0000000000000fff a6 : 0000000000000000 a7 : ffffffff8122dc10 [25469.032448] s2 : ff60000080c22eb0 s3 : 0000000200000022 s4 : 000000000000001f [25469.032488] s5 : ff60000080c22e00 s6 : ffffffff80c351c0 s7 : 0000000000000000 [25469.032582] s8 : 0000000000000003 s9 : 000055556b7fb490 s10: 00007ffff0e12fa0 [25469.032621] s11: 00007ffff0e13e9a t3 : ffffffff81354ac7 t4 : ffffffff81354ac7 [25469.032664] t5 : ffffffff81354ac8 t6 : ffffffff81354ac7 [25469.032698] status: 0000000200000100 badaddr: ffffffff8007db1e cause: 0000000000000003 [25469.032738] [] __free_percpu_irq+0xa2/0x150 [25469.032797] [] free_percpu_irq+0x30/0x5e [25469.032856] [] kvm_riscv_aia_exit+0x40/0x42 [kvm] [25469.033947] [] cleanup_module+0x10/0x32 [kvm] [25469.035300] [] __riscv_sys_delete_module+0x18e/0x1fc [25469.035374] [] syscall_handler+0x3a/0x46 [25469.035456] [] do_trap_ecall_u+0x72/0x134 [25469.035536] [] handle_exception+0x148/0x156 Invoke aia_exit and other arch specific cleanup functions after kvm_exit so that disable gets a chance to be called first before exit. Fixes: 54e43320c2ba ("RISC-V: KVM: Initial skeletal support for AIA") Fixes: eded6754f398 ("riscv: KVM: add basic support for host vs guest profiling") Signed-off-by: Atish Patra Reviewed-by: Anup Patel Reviewed-by: Sean Christopherson Link: https://lore.kernel.org/r/20250317-kvm_exit_fix-v1-1-aa5240c5dbd2@rivosinc.com Signed-off-by: Anup Patel Signed-off-by: Ruidong Tian --- arch/riscv/kvm/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c index 5682e338ae6d..506303dbe10e 100644 --- a/arch/riscv/kvm/main.c +++ b/arch/riscv/kvm/main.c @@ -125,8 +125,8 @@ module_init(riscv_kvm_init); static void __exit riscv_kvm_exit(void) { - kvm_riscv_teardown(); - kvm_exit(); + + kvm_riscv_teardown(); } module_exit(riscv_kvm_exit); -- Gitee From 9a20ea6d7daf940aec6e7d4b3a6617165c9d62d5 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 21 Oct 2024 01:17:29 +0530 Subject: [PATCH 17/26] RISC-V: KVM: Add common nested acceleration support ANBZ: #33223 commit d466c19cead5904d4d5d92adb2b213b411d3b849 upstream. Add a common nested acceleration support which will be shared by all parts of KVM RISC-V. This nested acceleration support detects and enables SBI NACL extension usage based on static keys which ensures minimum impact on the non-nested scenario. Signed-off-by: Anup Patel Reviewed-by: Atish Patra Link: https://lore.kernel.org/r/20241020194734.58686-9-apatel@ventanamicro.com Signed-off-by: Anup Patel Signed-off-by: Ruidong Tian --- arch/riscv/include/asm/kvm_nacl.h | 239 ++++++++++++++++++++++++++++++ arch/riscv/kvm/Makefile | 1 + arch/riscv/kvm/main.c | 51 ++++++- arch/riscv/kvm/nacl.c | 152 +++++++++++++++++++ 4 files changed, 441 insertions(+), 2 deletions(-) create mode 100644 arch/riscv/include/asm/kvm_nacl.h create mode 100644 arch/riscv/kvm/nacl.c diff --git a/arch/riscv/include/asm/kvm_nacl.h b/arch/riscv/include/asm/kvm_nacl.h new file mode 100644 index 000000000000..8f3e3ebf5017 --- /dev/null +++ b/arch/riscv/include/asm/kvm_nacl.h @@ -0,0 +1,239 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024 Ventana Micro Systems Inc. + */ + +#ifndef __KVM_NACL_H +#define __KVM_NACL_H + +#include +#include +#include +#include +#include + +DECLARE_STATIC_KEY_FALSE(kvm_riscv_nacl_available); +#define kvm_riscv_nacl_available() \ + static_branch_unlikely(&kvm_riscv_nacl_available) + +DECLARE_STATIC_KEY_FALSE(kvm_riscv_nacl_sync_csr_available); +#define kvm_riscv_nacl_sync_csr_available() \ + static_branch_unlikely(&kvm_riscv_nacl_sync_csr_available) + +DECLARE_STATIC_KEY_FALSE(kvm_riscv_nacl_sync_hfence_available); +#define kvm_riscv_nacl_sync_hfence_available() \ + static_branch_unlikely(&kvm_riscv_nacl_sync_hfence_available) + +DECLARE_STATIC_KEY_FALSE(kvm_riscv_nacl_sync_sret_available); +#define kvm_riscv_nacl_sync_sret_available() \ + static_branch_unlikely(&kvm_riscv_nacl_sync_sret_available) + +DECLARE_STATIC_KEY_FALSE(kvm_riscv_nacl_autoswap_csr_available); +#define kvm_riscv_nacl_autoswap_csr_available() \ + static_branch_unlikely(&kvm_riscv_nacl_autoswap_csr_available) + +struct kvm_riscv_nacl { + void *shmem; + phys_addr_t shmem_phys; +}; +DECLARE_PER_CPU(struct kvm_riscv_nacl, kvm_riscv_nacl); + +void __kvm_riscv_nacl_hfence(void *shmem, + unsigned long control, + unsigned long page_num, + unsigned long page_count); + +int kvm_riscv_nacl_enable(void); + +void kvm_riscv_nacl_disable(void); + +void kvm_riscv_nacl_exit(void); + +int kvm_riscv_nacl_init(void); + +#ifdef CONFIG_32BIT +#define lelong_to_cpu(__x) le32_to_cpu(__x) +#define cpu_to_lelong(__x) cpu_to_le32(__x) +#else +#define lelong_to_cpu(__x) le64_to_cpu(__x) +#define cpu_to_lelong(__x) cpu_to_le64(__x) +#endif + +#define nacl_shmem() \ + this_cpu_ptr(&kvm_riscv_nacl)->shmem + +#define nacl_scratch_read_long(__shmem, __offset) \ +({ \ + unsigned long *__p = (__shmem) + \ + SBI_NACL_SHMEM_SCRATCH_OFFSET + \ + (__offset); \ + lelong_to_cpu(*__p); \ +}) + +#define nacl_scratch_write_long(__shmem, __offset, __val) \ +do { \ + unsigned long *__p = (__shmem) + \ + SBI_NACL_SHMEM_SCRATCH_OFFSET + \ + (__offset); \ + *__p = cpu_to_lelong(__val); \ +} while (0) + +#define nacl_scratch_write_longs(__shmem, __offset, __array, __count) \ +do { \ + unsigned int __i; \ + unsigned long *__p = (__shmem) + \ + SBI_NACL_SHMEM_SCRATCH_OFFSET + \ + (__offset); \ + for (__i = 0; __i < (__count); __i++) \ + __p[__i] = cpu_to_lelong((__array)[__i]); \ +} while (0) + +#define nacl_sync_hfence(__e) \ + sbi_ecall(SBI_EXT_NACL, SBI_EXT_NACL_SYNC_HFENCE, \ + (__e), 0, 0, 0, 0, 0) + +#define nacl_hfence_mkconfig(__type, __order, __vmid, __asid) \ +({ \ + unsigned long __c = SBI_NACL_SHMEM_HFENCE_CONFIG_PEND; \ + __c |= ((__type) & SBI_NACL_SHMEM_HFENCE_CONFIG_TYPE_MASK) \ + << SBI_NACL_SHMEM_HFENCE_CONFIG_TYPE_SHIFT; \ + __c |= (((__order) - SBI_NACL_SHMEM_HFENCE_ORDER_BASE) & \ + SBI_NACL_SHMEM_HFENCE_CONFIG_ORDER_MASK) \ + << SBI_NACL_SHMEM_HFENCE_CONFIG_ORDER_SHIFT; \ + __c |= ((__vmid) & SBI_NACL_SHMEM_HFENCE_CONFIG_VMID_MASK) \ + << SBI_NACL_SHMEM_HFENCE_CONFIG_VMID_SHIFT; \ + __c |= ((__asid) & SBI_NACL_SHMEM_HFENCE_CONFIG_ASID_MASK); \ + __c; \ +}) + +#define nacl_hfence_mkpnum(__order, __addr) \ + ((__addr) >> (__order)) + +#define nacl_hfence_mkpcount(__order, __size) \ + ((__size) >> (__order)) + +#define nacl_hfence_gvma(__shmem, __gpa, __gpsz, __order) \ +__kvm_riscv_nacl_hfence(__shmem, \ + nacl_hfence_mkconfig(SBI_NACL_SHMEM_HFENCE_TYPE_GVMA, \ + __order, 0, 0), \ + nacl_hfence_mkpnum(__order, __gpa), \ + nacl_hfence_mkpcount(__order, __gpsz)) + +#define nacl_hfence_gvma_all(__shmem) \ +__kvm_riscv_nacl_hfence(__shmem, \ + nacl_hfence_mkconfig(SBI_NACL_SHMEM_HFENCE_TYPE_GVMA_ALL, \ + 0, 0, 0), 0, 0) + +#define nacl_hfence_gvma_vmid(__shmem, __vmid, __gpa, __gpsz, __order) \ +__kvm_riscv_nacl_hfence(__shmem, \ + nacl_hfence_mkconfig(SBI_NACL_SHMEM_HFENCE_TYPE_GVMA_VMID, \ + __order, __vmid, 0), \ + nacl_hfence_mkpnum(__order, __gpa), \ + nacl_hfence_mkpcount(__order, __gpsz)) + +#define nacl_hfence_gvma_vmid_all(__shmem, __vmid) \ +__kvm_riscv_nacl_hfence(__shmem, \ + nacl_hfence_mkconfig(SBI_NACL_SHMEM_HFENCE_TYPE_GVMA_VMID_ALL, \ + 0, __vmid, 0), 0, 0) + +#define nacl_hfence_vvma(__shmem, __vmid, __gva, __gvsz, __order) \ +__kvm_riscv_nacl_hfence(__shmem, \ + nacl_hfence_mkconfig(SBI_NACL_SHMEM_HFENCE_TYPE_VVMA, \ + __order, __vmid, 0), \ + nacl_hfence_mkpnum(__order, __gva), \ + nacl_hfence_mkpcount(__order, __gvsz)) + +#define nacl_hfence_vvma_all(__shmem, __vmid) \ +__kvm_riscv_nacl_hfence(__shmem, \ + nacl_hfence_mkconfig(SBI_NACL_SHMEM_HFENCE_TYPE_VVMA_ALL, \ + 0, __vmid, 0), 0, 0) + +#define nacl_hfence_vvma_asid(__shmem, __vmid, __asid, __gva, __gvsz, __order)\ +__kvm_riscv_nacl_hfence(__shmem, \ + nacl_hfence_mkconfig(SBI_NACL_SHMEM_HFENCE_TYPE_VVMA_ASID, \ + __order, __vmid, __asid), \ + nacl_hfence_mkpnum(__order, __gva), \ + nacl_hfence_mkpcount(__order, __gvsz)) + +#define nacl_hfence_vvma_asid_all(__shmem, __vmid, __asid) \ +__kvm_riscv_nacl_hfence(__shmem, \ + nacl_hfence_mkconfig(SBI_NACL_SHMEM_HFENCE_TYPE_VVMA_ASID_ALL, \ + 0, __vmid, __asid), 0, 0) + +#define nacl_csr_read(__shmem, __csr) \ +({ \ + unsigned long *__a = (__shmem) + SBI_NACL_SHMEM_CSR_OFFSET; \ + lelong_to_cpu(__a[SBI_NACL_SHMEM_CSR_INDEX(__csr)]); \ +}) + +#define nacl_csr_write(__shmem, __csr, __val) \ +do { \ + void *__s = (__shmem); \ + unsigned int __i = SBI_NACL_SHMEM_CSR_INDEX(__csr); \ + unsigned long *__a = (__s) + SBI_NACL_SHMEM_CSR_OFFSET; \ + u8 *__b = (__s) + SBI_NACL_SHMEM_DBITMAP_OFFSET; \ + __a[__i] = cpu_to_lelong(__val); \ + __b[__i >> 3] |= 1U << (__i & 0x7); \ +} while (0) + +#define nacl_csr_swap(__shmem, __csr, __val) \ +({ \ + void *__s = (__shmem); \ + unsigned int __i = SBI_NACL_SHMEM_CSR_INDEX(__csr); \ + unsigned long *__a = (__s) + SBI_NACL_SHMEM_CSR_OFFSET; \ + u8 *__b = (__s) + SBI_NACL_SHMEM_DBITMAP_OFFSET; \ + unsigned long __r = lelong_to_cpu(__a[__i]); \ + __a[__i] = cpu_to_lelong(__val); \ + __b[__i >> 3] |= 1U << (__i & 0x7); \ + __r; \ +}) + +#define nacl_sync_csr(__csr) \ + sbi_ecall(SBI_EXT_NACL, SBI_EXT_NACL_SYNC_CSR, \ + (__csr), 0, 0, 0, 0, 0) + +/* + * Each ncsr_xyz() macro defined below has it's own static-branch so every + * use of ncsr_xyz() macro emits a patchable direct jump. This means multiple + * back-to-back ncsr_xyz() macro usage will emit multiple patchable direct + * jumps which is sub-optimal. + * + * Based on the above, it is recommended to avoid multiple back-to-back + * ncsr_xyz() macro usage. + */ + +#define ncsr_read(__csr) \ +({ \ + unsigned long __r; \ + if (kvm_riscv_nacl_available()) \ + __r = nacl_csr_read(nacl_shmem(), __csr); \ + else \ + __r = csr_read(__csr); \ + __r; \ +}) + +#define ncsr_write(__csr, __val) \ +do { \ + if (kvm_riscv_nacl_sync_csr_available()) \ + nacl_csr_write(nacl_shmem(), __csr, __val); \ + else \ + csr_write(__csr, __val); \ +} while (0) + +#define ncsr_swap(__csr, __val) \ +({ \ + unsigned long __r; \ + if (kvm_riscv_nacl_sync_csr_available()) \ + __r = nacl_csr_swap(nacl_shmem(), __csr, __val); \ + else \ + __r = csr_swap(__csr, __val); \ + __r; \ +}) + +#define nsync_csr(__csr) \ +do { \ + if (kvm_riscv_nacl_sync_csr_available()) \ + nacl_sync_csr(__csr); \ +} while (0) + +#endif diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile index 1423d47f993f..a37418189ddb 100644 --- a/arch/riscv/kvm/Makefile +++ b/arch/riscv/kvm/Makefile @@ -16,6 +16,7 @@ kvm-y += aia_device.o kvm-y += aia_imsic.o kvm-y += main.o kvm-y += mmu.o +kvm-y += nacl.o kvm-y += tlb.o kvm-y += vcpu.o kvm-y += vcpu_exit.o diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c index 506303dbe10e..4b24705dc63a 100644 --- a/arch/riscv/kvm/main.c +++ b/arch/riscv/kvm/main.c @@ -10,8 +10,8 @@ #include #include #include -#include #include +#include #include long kvm_arch_dev_ioctl(struct file *filp, @@ -22,6 +22,12 @@ long kvm_arch_dev_ioctl(struct file *filp, int kvm_arch_enable_virtualization_cpu(void) { + int rc; + + rc = kvm_riscv_nacl_enable(); + if (rc) + return rc; + csr_write(CSR_HEDELEG, KVM_HEDELEG_DEFAULT); csr_write(CSR_HIDELEG, KVM_HIDELEG_DEFAULT); @@ -49,17 +55,21 @@ void kvm_arch_disable_virtualization_cpu(void) csr_write(CSR_HVIP, 0); csr_write(CSR_HEDELEG, 0); csr_write(CSR_HIDELEG, 0); + + kvm_riscv_nacl_disable(); } static void kvm_riscv_teardown(void) { kvm_riscv_aia_exit(); + kvm_riscv_nacl_exit(); kvm_unregister_perf_callbacks(); } static int __init riscv_kvm_init(void) { int rc; + char slist[64]; const char *str; if (!riscv_isa_extension_available(NULL, h)) { @@ -77,16 +87,53 @@ static int __init riscv_kvm_init(void) return -ENODEV; } + rc = kvm_riscv_nacl_init(); + if (rc && rc != -ENODEV) + return rc; + kvm_riscv_gstage_mode_detect(); kvm_riscv_gstage_vmid_detect(); rc = kvm_riscv_aia_init(); - if (rc && rc != -ENODEV) + if (rc && rc != -ENODEV) { + kvm_riscv_nacl_exit(); return rc; + } kvm_info("hypervisor extension available\n"); + if (kvm_riscv_nacl_available()) { + rc = 0; + slist[0] = '\0'; + if (kvm_riscv_nacl_sync_csr_available()) { + if (rc) + strcat(slist, ", "); + strcat(slist, "sync_csr"); + rc++; + } + if (kvm_riscv_nacl_sync_hfence_available()) { + if (rc) + strcat(slist, ", "); + strcat(slist, "sync_hfence"); + rc++; + } + if (kvm_riscv_nacl_sync_sret_available()) { + if (rc) + strcat(slist, ", "); + strcat(slist, "sync_sret"); + rc++; + } + if (kvm_riscv_nacl_autoswap_csr_available()) { + if (rc) + strcat(slist, ", "); + strcat(slist, "autoswap_csr"); + rc++; + } + kvm_info("using SBI nested acceleration with %s\n", + (rc) ? slist : "no features"); + } + switch (kvm_riscv_gstage_mode()) { case HGATP_MODE_SV32X4: str = "Sv32x4"; diff --git a/arch/riscv/kvm/nacl.c b/arch/riscv/kvm/nacl.c new file mode 100644 index 000000000000..08a95ad9ada2 --- /dev/null +++ b/arch/riscv/kvm/nacl.c @@ -0,0 +1,152 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2024 Ventana Micro Systems Inc. + */ + +#include +#include +#include + +DEFINE_STATIC_KEY_FALSE(kvm_riscv_nacl_available); +DEFINE_STATIC_KEY_FALSE(kvm_riscv_nacl_sync_csr_available); +DEFINE_STATIC_KEY_FALSE(kvm_riscv_nacl_sync_hfence_available); +DEFINE_STATIC_KEY_FALSE(kvm_riscv_nacl_sync_sret_available); +DEFINE_STATIC_KEY_FALSE(kvm_riscv_nacl_autoswap_csr_available); +DEFINE_PER_CPU(struct kvm_riscv_nacl, kvm_riscv_nacl); + +void __kvm_riscv_nacl_hfence(void *shmem, + unsigned long control, + unsigned long page_num, + unsigned long page_count) +{ + int i, ent = -1, try_count = 5; + unsigned long *entp; + +again: + for (i = 0; i < SBI_NACL_SHMEM_HFENCE_ENTRY_MAX; i++) { + entp = shmem + SBI_NACL_SHMEM_HFENCE_ENTRY_CONFIG(i); + if (lelong_to_cpu(*entp) & SBI_NACL_SHMEM_HFENCE_CONFIG_PEND) + continue; + + ent = i; + break; + } + + if (ent < 0) { + if (try_count) { + nacl_sync_hfence(-1UL); + goto again; + } else { + pr_warn("KVM: No free entry in NACL shared memory\n"); + return; + } + } + + entp = shmem + SBI_NACL_SHMEM_HFENCE_ENTRY_CONFIG(i); + *entp = cpu_to_lelong(control); + entp = shmem + SBI_NACL_SHMEM_HFENCE_ENTRY_PNUM(i); + *entp = cpu_to_lelong(page_num); + entp = shmem + SBI_NACL_SHMEM_HFENCE_ENTRY_PCOUNT(i); + *entp = cpu_to_lelong(page_count); +} + +int kvm_riscv_nacl_enable(void) +{ + int rc; + struct sbiret ret; + struct kvm_riscv_nacl *nacl; + + if (!kvm_riscv_nacl_available()) + return 0; + nacl = this_cpu_ptr(&kvm_riscv_nacl); + + ret = sbi_ecall(SBI_EXT_NACL, SBI_EXT_NACL_SET_SHMEM, + nacl->shmem_phys, 0, 0, 0, 0, 0); + rc = sbi_err_map_linux_errno(ret.error); + if (rc) + return rc; + + return 0; +} + +void kvm_riscv_nacl_disable(void) +{ + if (!kvm_riscv_nacl_available()) + return; + + sbi_ecall(SBI_EXT_NACL, SBI_EXT_NACL_SET_SHMEM, + SBI_SHMEM_DISABLE, SBI_SHMEM_DISABLE, 0, 0, 0, 0); +} + +void kvm_riscv_nacl_exit(void) +{ + int cpu; + struct kvm_riscv_nacl *nacl; + + if (!kvm_riscv_nacl_available()) + return; + + /* Allocate per-CPU shared memory */ + for_each_possible_cpu(cpu) { + nacl = per_cpu_ptr(&kvm_riscv_nacl, cpu); + if (!nacl->shmem) + continue; + + free_pages((unsigned long)nacl->shmem, + get_order(SBI_NACL_SHMEM_SIZE)); + nacl->shmem = NULL; + nacl->shmem_phys = 0; + } +} + +static long nacl_probe_feature(long feature_id) +{ + struct sbiret ret; + + if (!kvm_riscv_nacl_available()) + return 0; + + ret = sbi_ecall(SBI_EXT_NACL, SBI_EXT_NACL_PROBE_FEATURE, + feature_id, 0, 0, 0, 0, 0); + return ret.value; +} + +int kvm_riscv_nacl_init(void) +{ + int cpu; + struct page *shmem_page; + struct kvm_riscv_nacl *nacl; + + if (sbi_spec_version < sbi_mk_version(1, 0) || + sbi_probe_extension(SBI_EXT_NACL) <= 0) + return -ENODEV; + + /* Enable NACL support */ + static_branch_enable(&kvm_riscv_nacl_available); + + /* Probe NACL features */ + if (nacl_probe_feature(SBI_NACL_FEAT_SYNC_CSR)) + static_branch_enable(&kvm_riscv_nacl_sync_csr_available); + if (nacl_probe_feature(SBI_NACL_FEAT_SYNC_HFENCE)) + static_branch_enable(&kvm_riscv_nacl_sync_hfence_available); + if (nacl_probe_feature(SBI_NACL_FEAT_SYNC_SRET)) + static_branch_enable(&kvm_riscv_nacl_sync_sret_available); + if (nacl_probe_feature(SBI_NACL_FEAT_AUTOSWAP_CSR)) + static_branch_enable(&kvm_riscv_nacl_autoswap_csr_available); + + /* Allocate per-CPU shared memory */ + for_each_possible_cpu(cpu) { + nacl = per_cpu_ptr(&kvm_riscv_nacl, cpu); + + shmem_page = alloc_pages(GFP_KERNEL | __GFP_ZERO, + get_order(SBI_NACL_SHMEM_SIZE)); + if (!shmem_page) { + kvm_riscv_nacl_exit(); + return -ENOMEM; + } + nacl->shmem = page_to_virt(shmem_page); + nacl->shmem_phys = page_to_phys(shmem_page); + } + + return 0; +} -- Gitee From 305b78e4b33ab6f7dcf0c5e0a0605c47da55f441 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 21 Oct 2024 01:17:30 +0530 Subject: [PATCH 18/26] RISC-V: KVM: Use nacl_csr_xyz() for accessing H-extension CSRs ANBZ: #33223 commit e28e6b69767b3aea73eda0fd3e7b4e1c15a7ebec upstream. When running under some other hypervisor, prefer nacl_csr_xyz() for accessing H-extension CSRs in the run-loop. This makes CSR access faster whenever SBI nested acceleration is available. Signed-off-by: Anup Patel Reviewed-by: Atish Patra Link: https://lore.kernel.org/r/20241020194734.58686-10-apatel@ventanamicro.com Signed-off-by: Anup Patel Signed-off-by: Ruidong Tian --- arch/riscv/kvm/mmu.c | 4 +- arch/riscv/kvm/vcpu.c | 103 +++++++++++++++++++++++++----------- arch/riscv/kvm/vcpu_timer.c | 28 +++++----- 3 files changed, 87 insertions(+), 48 deletions(-) diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index 726b56e8e303..90ee9867c920 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include @@ -730,7 +730,7 @@ void kvm_riscv_gstage_update_hgatp(struct kvm_vcpu *vcpu) hgatp |= (READ_ONCE(k->vmid.vmid) << HGATP_VMID_SHIFT) & HGATP_VMID; hgatp |= (k->pgd_phys >> PAGE_SHIFT) & HGATP_PPN; - csr_write(CSR_HGATP, hgatp); + ncsr_write(CSR_HGATP, hgatp); if (!kvm_riscv_gstage_vmid_bits()) kvm_riscv_local_hfence_gvma_all(); diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index daa9b1180f65..7e98a6f76c4b 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -17,8 +17,8 @@ #include #include #include -#include #include +#include #include #define CREATE_TRACE_POINTS @@ -375,10 +375,10 @@ void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu) struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; /* Read current HVIP and VSIE CSRs */ - csr->vsie = csr_read(CSR_VSIE); + csr->vsie = ncsr_read(CSR_VSIE); /* Sync-up HVIP.VSSIP bit changes does by Guest */ - hvip = csr_read(CSR_HVIP); + hvip = ncsr_read(CSR_HVIP); if ((csr->hvip ^ hvip) & (1UL << IRQ_VS_SOFT)) { if (hvip & (1UL << IRQ_VS_SOFT)) { if (!test_and_set_bit(IRQ_VS_SOFT, @@ -585,26 +585,49 @@ static void kvm_riscv_vcpu_setup_config(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { + void *nsh; struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; struct kvm_vcpu_config *cfg = &vcpu->arch.cfg; - csr_write(CSR_VSSTATUS, csr->vsstatus); - csr_write(CSR_VSIE, csr->vsie); - csr_write(CSR_VSTVEC, csr->vstvec); - csr_write(CSR_VSSCRATCH, csr->vsscratch); - csr_write(CSR_VSEPC, csr->vsepc); - csr_write(CSR_VSCAUSE, csr->vscause); - csr_write(CSR_VSTVAL, csr->vstval); - csr_write(CSR_HEDELEG, cfg->hedeleg); - csr_write(CSR_HVIP, csr->hvip); - csr_write(CSR_VSATP, csr->vsatp); - csr_write(CSR_HENVCFG, cfg->henvcfg); - if (IS_ENABLED(CONFIG_32BIT)) - csr_write(CSR_HENVCFGH, cfg->henvcfg >> 32); - if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) { - csr_write(CSR_HSTATEEN0, cfg->hstateen0); + if (kvm_riscv_nacl_sync_csr_available()) { + nsh = nacl_shmem(); + nacl_csr_write(nsh, CSR_VSSTATUS, csr->vsstatus); + nacl_csr_write(nsh, CSR_VSIE, csr->vsie); + nacl_csr_write(nsh, CSR_VSTVEC, csr->vstvec); + nacl_csr_write(nsh, CSR_VSSCRATCH, csr->vsscratch); + nacl_csr_write(nsh, CSR_VSEPC, csr->vsepc); + nacl_csr_write(nsh, CSR_VSCAUSE, csr->vscause); + nacl_csr_write(nsh, CSR_VSTVAL, csr->vstval); + nacl_csr_write(nsh, CSR_HEDELEG, cfg->hedeleg); + nacl_csr_write(nsh, CSR_HVIP, csr->hvip); + nacl_csr_write(nsh, CSR_VSATP, csr->vsatp); + nacl_csr_write(nsh, CSR_HENVCFG, cfg->henvcfg); + if (IS_ENABLED(CONFIG_32BIT)) + nacl_csr_write(nsh, CSR_HENVCFGH, cfg->henvcfg >> 32); + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) { + nacl_csr_write(nsh, CSR_HSTATEEN0, cfg->hstateen0); + if (IS_ENABLED(CONFIG_32BIT)) + nacl_csr_write(nsh, CSR_HSTATEEN0H, cfg->hstateen0 >> 32); + } + } else { + csr_write(CSR_VSSTATUS, csr->vsstatus); + csr_write(CSR_VSIE, csr->vsie); + csr_write(CSR_VSTVEC, csr->vstvec); + csr_write(CSR_VSSCRATCH, csr->vsscratch); + csr_write(CSR_VSEPC, csr->vsepc); + csr_write(CSR_VSCAUSE, csr->vscause); + csr_write(CSR_VSTVAL, csr->vstval); + csr_write(CSR_HEDELEG, cfg->hedeleg); + csr_write(CSR_HVIP, csr->hvip); + csr_write(CSR_VSATP, csr->vsatp); + csr_write(CSR_HENVCFG, cfg->henvcfg); if (IS_ENABLED(CONFIG_32BIT)) - csr_write(CSR_HSTATEEN0H, cfg->hstateen0 >> 32); + csr_write(CSR_HENVCFGH, cfg->henvcfg >> 32); + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) { + csr_write(CSR_HSTATEEN0, cfg->hstateen0); + if (IS_ENABLED(CONFIG_32BIT)) + csr_write(CSR_HSTATEEN0H, cfg->hstateen0 >> 32); + } } kvm_riscv_gstage_update_hgatp(vcpu); @@ -627,6 +650,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { + void *nsh; struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; vcpu->cpu = -1; @@ -642,15 +666,28 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) vcpu->arch.isa); kvm_riscv_vcpu_host_vector_restore(&vcpu->arch.host_context); - csr->vsstatus = csr_read(CSR_VSSTATUS); - csr->vsie = csr_read(CSR_VSIE); - csr->vstvec = csr_read(CSR_VSTVEC); - csr->vsscratch = csr_read(CSR_VSSCRATCH); - csr->vsepc = csr_read(CSR_VSEPC); - csr->vscause = csr_read(CSR_VSCAUSE); - csr->vstval = csr_read(CSR_VSTVAL); - csr->hvip = csr_read(CSR_HVIP); - csr->vsatp = csr_read(CSR_VSATP); + if (kvm_riscv_nacl_available()) { + nsh = nacl_shmem(); + csr->vsstatus = nacl_csr_read(nsh, CSR_VSSTATUS); + csr->vsie = nacl_csr_read(nsh, CSR_VSIE); + csr->vstvec = nacl_csr_read(nsh, CSR_VSTVEC); + csr->vsscratch = nacl_csr_read(nsh, CSR_VSSCRATCH); + csr->vsepc = nacl_csr_read(nsh, CSR_VSEPC); + csr->vscause = nacl_csr_read(nsh, CSR_VSCAUSE); + csr->vstval = nacl_csr_read(nsh, CSR_VSTVAL); + csr->hvip = nacl_csr_read(nsh, CSR_HVIP); + csr->vsatp = nacl_csr_read(nsh, CSR_VSATP); + } else { + csr->vsstatus = csr_read(CSR_VSSTATUS); + csr->vsie = csr_read(CSR_VSIE); + csr->vstvec = csr_read(CSR_VSTVEC); + csr->vsscratch = csr_read(CSR_VSSCRATCH); + csr->vsepc = csr_read(CSR_VSEPC); + csr->vscause = csr_read(CSR_VSCAUSE); + csr->vstval = csr_read(CSR_VSTVAL); + csr->hvip = csr_read(CSR_HVIP); + csr->vsatp = csr_read(CSR_VSATP); + } } static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu) @@ -705,7 +742,7 @@ static void kvm_riscv_update_hvip(struct kvm_vcpu *vcpu) { struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; - csr_write(CSR_HVIP, csr->hvip); + ncsr_write(CSR_HVIP, csr->hvip); kvm_riscv_vcpu_aia_update_hvip(vcpu); } @@ -752,7 +789,9 @@ static void noinstr kvm_riscv_vcpu_enter_exit(struct kvm_vcpu *vcpu) kvm_riscv_vcpu_swap_in_guest_state(vcpu); guest_state_enter_irqoff(); - hcntx->hstatus = csr_swap(CSR_HSTATUS, gcntx->hstatus); + hcntx->hstatus = ncsr_swap(CSR_HSTATUS, gcntx->hstatus); + + nsync_csr(-1UL); __kvm_riscv_switch_to(&vcpu->arch); @@ -888,8 +927,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) trap.sepc = vcpu->arch.guest_context.sepc; trap.scause = csr_read(CSR_SCAUSE); trap.stval = csr_read(CSR_STVAL); - trap.htval = csr_read(CSR_HTVAL); - trap.htinst = csr_read(CSR_HTINST); + trap.htval = ncsr_read(CSR_HTVAL); + trap.htinst = ncsr_read(CSR_HTINST); /* Syncup interrupts state with HW */ kvm_riscv_vcpu_sync_interrupts(vcpu); diff --git a/arch/riscv/kvm/vcpu_timer.c b/arch/riscv/kvm/vcpu_timer.c index 75486b25ac45..96e7a4e463f7 100644 --- a/arch/riscv/kvm/vcpu_timer.c +++ b/arch/riscv/kvm/vcpu_timer.c @@ -11,8 +11,8 @@ #include #include #include -#include #include +#include #include static u64 kvm_riscv_current_cycles(struct kvm_guest_timer *gt) @@ -72,12 +72,12 @@ static int kvm_riscv_vcpu_timer_cancel(struct kvm_vcpu_timer *t) static int kvm_riscv_vcpu_update_vstimecmp(struct kvm_vcpu *vcpu, u64 ncycles) { #if defined(CONFIG_32BIT) - csr_write(CSR_VSTIMECMP, ncycles & 0xFFFFFFFF); - csr_write(CSR_VSTIMECMPH, ncycles >> 32); + ncsr_write(CSR_VSTIMECMP, ncycles & 0xFFFFFFFF); + ncsr_write(CSR_VSTIMECMPH, ncycles >> 32); #else - csr_write(CSR_VSTIMECMP, ncycles); + ncsr_write(CSR_VSTIMECMP, ncycles); #endif - return 0; + return 0; } static int kvm_riscv_vcpu_update_hrtimer(struct kvm_vcpu *vcpu, u64 ncycles) @@ -289,10 +289,10 @@ static void kvm_riscv_vcpu_update_timedelta(struct kvm_vcpu *vcpu) struct kvm_guest_timer *gt = &vcpu->kvm->arch.timer; #if defined(CONFIG_32BIT) - csr_write(CSR_HTIMEDELTA, (u32)(gt->time_delta)); - csr_write(CSR_HTIMEDELTAH, (u32)(gt->time_delta >> 32)); + ncsr_write(CSR_HTIMEDELTA, (u32)(gt->time_delta)); + ncsr_write(CSR_HTIMEDELTAH, (u32)(gt->time_delta >> 32)); #else - csr_write(CSR_HTIMEDELTA, gt->time_delta); + ncsr_write(CSR_HTIMEDELTA, gt->time_delta); #endif } @@ -306,10 +306,10 @@ void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu) return; #if defined(CONFIG_32BIT) - csr_write(CSR_VSTIMECMP, (u32)t->next_cycles); - csr_write(CSR_VSTIMECMPH, (u32)(t->next_cycles >> 32)); + ncsr_write(CSR_VSTIMECMP, (u32)t->next_cycles); + ncsr_write(CSR_VSTIMECMPH, (u32)(t->next_cycles >> 32)); #else - csr_write(CSR_VSTIMECMP, t->next_cycles); + ncsr_write(CSR_VSTIMECMP, t->next_cycles); #endif /* timer should be enabled for the remaining operations */ @@ -327,10 +327,10 @@ void kvm_riscv_vcpu_timer_sync(struct kvm_vcpu *vcpu) return; #if defined(CONFIG_32BIT) - t->next_cycles = csr_read(CSR_VSTIMECMP); - t->next_cycles |= (u64)csr_read(CSR_VSTIMECMPH) << 32; + t->next_cycles = ncsr_read(CSR_VSTIMECMP); + t->next_cycles |= (u64)ncsr_read(CSR_VSTIMECMPH) << 32; #else - t->next_cycles = csr_read(CSR_VSTIMECMP); + t->next_cycles = ncsr_read(CSR_VSTIMECMP); #endif } -- Gitee From 2cdb9cea40cae2e78db47ac99dc3e23e242c7894 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 21 Oct 2024 01:17:31 +0530 Subject: [PATCH 19/26] RISC-V: KVM: Use nacl_csr_xyz() for accessing AIA CSRs ANBZ: #33223 commit dab55604aec572cfa2bc6b51be288da1ac4c7366 upstream. When running under some other hypervisor, prefer nacl_csr_xyz() for accessing AIA CSRs in the run-loop. This makes CSR access faster whenever SBI nested acceleration is available. Signed-off-by: Anup Patel Reviewed-by: Atish Patra Link: https://lore.kernel.org/r/20241020194734.58686-11-apatel@ventanamicro.com Signed-off-by: Anup Patel Signed-off-by: Ruidong Tian --- arch/riscv/kvm/aia.c | 97 ++++++++++++++++++++++++++++---------------- 1 file changed, 63 insertions(+), 34 deletions(-) diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c index 8800fc95e662..19afd1f23537 100644 --- a/arch/riscv/kvm/aia.c +++ b/arch/riscv/kvm/aia.c @@ -16,6 +16,7 @@ #include #include #include +#include struct aia_hgei_control { raw_spinlock_t lock; @@ -88,7 +89,7 @@ void kvm_riscv_vcpu_aia_sync_interrupts(struct kvm_vcpu *vcpu) struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr; if (kvm_riscv_aia_available()) - csr->vsieh = csr_read(CSR_VSIEH); + csr->vsieh = ncsr_read(CSR_VSIEH); } #endif @@ -115,7 +116,7 @@ bool kvm_riscv_vcpu_aia_has_interrupts(struct kvm_vcpu *vcpu, u64 mask) hgei = aia_find_hgei(vcpu); if (hgei > 0) - return !!(csr_read(CSR_HGEIP) & BIT(hgei)); + return !!(ncsr_read(CSR_HGEIP) & BIT(hgei)); return false; } @@ -128,45 +129,73 @@ void kvm_riscv_vcpu_aia_update_hvip(struct kvm_vcpu *vcpu) return; #ifdef CONFIG_32BIT - csr_write(CSR_HVIPH, vcpu->arch.aia_context.guest_csr.hviph); + ncsr_write(CSR_HVIPH, vcpu->arch.aia_context.guest_csr.hviph); #endif - csr_write(CSR_HVICTL, aia_hvictl_value(!!(csr->hvip & BIT(IRQ_VS_EXT)))); + ncsr_write(CSR_HVICTL, aia_hvictl_value(!!(csr->hvip & BIT(IRQ_VS_EXT)))); } void kvm_riscv_vcpu_aia_load(struct kvm_vcpu *vcpu, int cpu) { struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr; + void *nsh; if (!kvm_riscv_aia_available()) return; - csr_write(CSR_VSISELECT, csr->vsiselect); - csr_write(CSR_HVIPRIO1, csr->hviprio1); - csr_write(CSR_HVIPRIO2, csr->hviprio2); + if (kvm_riscv_nacl_sync_csr_available()) { + nsh = nacl_shmem(); + nacl_csr_write(nsh, CSR_VSISELECT, csr->vsiselect); + nacl_csr_write(nsh, CSR_HVIPRIO1, csr->hviprio1); + nacl_csr_write(nsh, CSR_HVIPRIO2, csr->hviprio2); +#ifdef CONFIG_32BIT + nacl_csr_write(nsh, CSR_VSIEH, csr->vsieh); + nacl_csr_write(nsh, CSR_HVIPH, csr->hviph); + nacl_csr_write(nsh, CSR_HVIPRIO1H, csr->hviprio1h); + nacl_csr_write(nsh, CSR_HVIPRIO2H, csr->hviprio2h); +#endif + } else { + csr_write(CSR_VSISELECT, csr->vsiselect); + csr_write(CSR_HVIPRIO1, csr->hviprio1); + csr_write(CSR_HVIPRIO2, csr->hviprio2); #ifdef CONFIG_32BIT - csr_write(CSR_VSIEH, csr->vsieh); - csr_write(CSR_HVIPH, csr->hviph); - csr_write(CSR_HVIPRIO1H, csr->hviprio1h); - csr_write(CSR_HVIPRIO2H, csr->hviprio2h); + csr_write(CSR_VSIEH, csr->vsieh); + csr_write(CSR_HVIPH, csr->hviph); + csr_write(CSR_HVIPRIO1H, csr->hviprio1h); + csr_write(CSR_HVIPRIO2H, csr->hviprio2h); #endif + } } void kvm_riscv_vcpu_aia_put(struct kvm_vcpu *vcpu) { struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr; + void *nsh; if (!kvm_riscv_aia_available()) return; - csr->vsiselect = csr_read(CSR_VSISELECT); - csr->hviprio1 = csr_read(CSR_HVIPRIO1); - csr->hviprio2 = csr_read(CSR_HVIPRIO2); + if (kvm_riscv_nacl_available()) { + nsh = nacl_shmem(); + csr->vsiselect = nacl_csr_read(nsh, CSR_VSISELECT); + csr->hviprio1 = nacl_csr_read(nsh, CSR_HVIPRIO1); + csr->hviprio2 = nacl_csr_read(nsh, CSR_HVIPRIO2); #ifdef CONFIG_32BIT - csr->vsieh = csr_read(CSR_VSIEH); - csr->hviph = csr_read(CSR_HVIPH); - csr->hviprio1h = csr_read(CSR_HVIPRIO1H); - csr->hviprio2h = csr_read(CSR_HVIPRIO2H); + csr->vsieh = nacl_csr_read(nsh, CSR_VSIEH); + csr->hviph = nacl_csr_read(nsh, CSR_HVIPH); + csr->hviprio1h = nacl_csr_read(nsh, CSR_HVIPRIO1H); + csr->hviprio2h = nacl_csr_read(nsh, CSR_HVIPRIO2H); #endif + } else { + csr->vsiselect = csr_read(CSR_VSISELECT); + csr->hviprio1 = csr_read(CSR_HVIPRIO1); + csr->hviprio2 = csr_read(CSR_HVIPRIO2); +#ifdef CONFIG_32BIT + csr->vsieh = csr_read(CSR_VSIEH); + csr->hviph = csr_read(CSR_HVIPH); + csr->hviprio1h = csr_read(CSR_HVIPRIO1H); + csr->hviprio2h = csr_read(CSR_HVIPRIO2H); +#endif + } } int kvm_riscv_vcpu_aia_get_csr(struct kvm_vcpu *vcpu, @@ -250,20 +279,20 @@ static u8 aia_get_iprio8(struct kvm_vcpu *vcpu, unsigned int irq) switch (bitpos / BITS_PER_LONG) { case 0: - hviprio = csr_read(CSR_HVIPRIO1); + hviprio = ncsr_read(CSR_HVIPRIO1); break; case 1: #ifndef CONFIG_32BIT - hviprio = csr_read(CSR_HVIPRIO2); + hviprio = ncsr_read(CSR_HVIPRIO2); break; #else - hviprio = csr_read(CSR_HVIPRIO1H); + hviprio = ncsr_read(CSR_HVIPRIO1H); break; case 2: - hviprio = csr_read(CSR_HVIPRIO2); + hviprio = ncsr_read(CSR_HVIPRIO2); break; case 3: - hviprio = csr_read(CSR_HVIPRIO2H); + hviprio = ncsr_read(CSR_HVIPRIO2H); break; #endif default: @@ -283,20 +312,20 @@ static void aia_set_iprio8(struct kvm_vcpu *vcpu, unsigned int irq, u8 prio) switch (bitpos / BITS_PER_LONG) { case 0: - hviprio = csr_read(CSR_HVIPRIO1); + hviprio = ncsr_read(CSR_HVIPRIO1); break; case 1: #ifndef CONFIG_32BIT - hviprio = csr_read(CSR_HVIPRIO2); + hviprio = ncsr_read(CSR_HVIPRIO2); break; #else - hviprio = csr_read(CSR_HVIPRIO1H); + hviprio = ncsr_read(CSR_HVIPRIO1H); break; case 2: - hviprio = csr_read(CSR_HVIPRIO2); + hviprio = ncsr_read(CSR_HVIPRIO2); break; case 3: - hviprio = csr_read(CSR_HVIPRIO2H); + hviprio = ncsr_read(CSR_HVIPRIO2H); break; #endif default: @@ -308,20 +337,20 @@ static void aia_set_iprio8(struct kvm_vcpu *vcpu, unsigned int irq, u8 prio) switch (bitpos / BITS_PER_LONG) { case 0: - csr_write(CSR_HVIPRIO1, hviprio); + ncsr_write(CSR_HVIPRIO1, hviprio); break; case 1: #ifndef CONFIG_32BIT - csr_write(CSR_HVIPRIO2, hviprio); + ncsr_write(CSR_HVIPRIO2, hviprio); break; #else - csr_write(CSR_HVIPRIO1H, hviprio); + ncsr_write(CSR_HVIPRIO1H, hviprio); break; case 2: - csr_write(CSR_HVIPRIO2, hviprio); + ncsr_write(CSR_HVIPRIO2, hviprio); break; case 3: - csr_write(CSR_HVIPRIO2H, hviprio); + ncsr_write(CSR_HVIPRIO2H, hviprio); break; #endif default: @@ -377,7 +406,7 @@ int kvm_riscv_vcpu_aia_rmw_ireg(struct kvm_vcpu *vcpu, unsigned int csr_num, return KVM_INSN_ILLEGAL_TRAP; /* First try to emulate in kernel space */ - isel = csr_read(CSR_VSISELECT) & ISELECT_MASK; + isel = ncsr_read(CSR_VSISELECT) & ISELECT_MASK; if (isel >= ISELECT_IPRIO0 && isel <= ISELECT_IPRIO15) return aia_rmw_iprio(vcpu, isel, val, new_val, wr_mask); else if (isel >= IMSIC_FIRST && isel <= IMSIC_LAST && -- Gitee From d603358fbb8a70d2c4219adb85f9388bab5ed4f8 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 21 Oct 2024 01:17:32 +0530 Subject: [PATCH 20/26] RISC-V: KVM: Use SBI sync SRET call when available ANBZ: #33223 commit 68c72a6557b072bff79658b9c0fdb0e69148e32d upstream. Implement an optimized KVM world-switch using SBI sync SRET call when SBI nested acceleration extension is available. This improves KVM world-switch when KVM RISC-V is running as a Guest under some other hypervisor. Signed-off-by: Anup Patel Reviewed-by: Atish Patra Link: https://lore.kernel.org/r/20241020194734.58686-12-apatel@ventanamicro.com Signed-off-by: Anup Patel Signed-off-by: Ruidong Tian --- arch/riscv/include/asm/kvm_nacl.h | 6 ++++ arch/riscv/kvm/vcpu.c | 48 ++++++++++++++++++++++++++++--- arch/riscv/kvm/vcpu_switch.S | 29 +++++++++++++++++++ 3 files changed, 79 insertions(+), 4 deletions(-) diff --git a/arch/riscv/include/asm/kvm_nacl.h b/arch/riscv/include/asm/kvm_nacl.h index 8f3e3ebf5017..4124d5e06a0f 100644 --- a/arch/riscv/include/asm/kvm_nacl.h +++ b/arch/riscv/include/asm/kvm_nacl.h @@ -12,6 +12,8 @@ #include #include +struct kvm_vcpu_arch; + DECLARE_STATIC_KEY_FALSE(kvm_riscv_nacl_available); #define kvm_riscv_nacl_available() \ static_branch_unlikely(&kvm_riscv_nacl_available) @@ -43,6 +45,10 @@ void __kvm_riscv_nacl_hfence(void *shmem, unsigned long page_num, unsigned long page_count); +void __kvm_riscv_nacl_switch_to(struct kvm_vcpu_arch *vcpu_arch, + unsigned long sbi_ext_id, + unsigned long sbi_func_id); + int kvm_riscv_nacl_enable(void); void kvm_riscv_nacl_disable(void); diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index 7e98a6f76c4b..8ef4ee2f7627 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -783,19 +783,59 @@ static __always_inline void kvm_riscv_vcpu_swap_in_host_state(struct kvm_vcpu *v */ static void noinstr kvm_riscv_vcpu_enter_exit(struct kvm_vcpu *vcpu) { + void *nsh; struct kvm_cpu_context *gcntx = &vcpu->arch.guest_context; struct kvm_cpu_context *hcntx = &vcpu->arch.host_context; kvm_riscv_vcpu_swap_in_guest_state(vcpu); guest_state_enter_irqoff(); - hcntx->hstatus = ncsr_swap(CSR_HSTATUS, gcntx->hstatus); + if (kvm_riscv_nacl_sync_sret_available()) { + nsh = nacl_shmem(); - nsync_csr(-1UL); + if (kvm_riscv_nacl_autoswap_csr_available()) { + hcntx->hstatus = + nacl_csr_read(nsh, CSR_HSTATUS); + nacl_scratch_write_long(nsh, + SBI_NACL_SHMEM_AUTOSWAP_OFFSET + + SBI_NACL_SHMEM_AUTOSWAP_HSTATUS, + gcntx->hstatus); + nacl_scratch_write_long(nsh, + SBI_NACL_SHMEM_AUTOSWAP_OFFSET, + SBI_NACL_SHMEM_AUTOSWAP_FLAG_HSTATUS); + } else if (kvm_riscv_nacl_sync_csr_available()) { + hcntx->hstatus = nacl_csr_swap(nsh, + CSR_HSTATUS, gcntx->hstatus); + } else { + hcntx->hstatus = csr_swap(CSR_HSTATUS, gcntx->hstatus); + } - __kvm_riscv_switch_to(&vcpu->arch); + nacl_scratch_write_longs(nsh, + SBI_NACL_SHMEM_SRET_OFFSET + + SBI_NACL_SHMEM_SRET_X(1), + &gcntx->ra, + SBI_NACL_SHMEM_SRET_X_LAST); + + __kvm_riscv_nacl_switch_to(&vcpu->arch, SBI_EXT_NACL, + SBI_EXT_NACL_SYNC_SRET); + + if (kvm_riscv_nacl_autoswap_csr_available()) { + nacl_scratch_write_long(nsh, + SBI_NACL_SHMEM_AUTOSWAP_OFFSET, + 0); + gcntx->hstatus = nacl_scratch_read_long(nsh, + SBI_NACL_SHMEM_AUTOSWAP_OFFSET + + SBI_NACL_SHMEM_AUTOSWAP_HSTATUS); + } else { + gcntx->hstatus = csr_swap(CSR_HSTATUS, hcntx->hstatus); + } + } else { + hcntx->hstatus = csr_swap(CSR_HSTATUS, gcntx->hstatus); - gcntx->hstatus = csr_swap(CSR_HSTATUS, hcntx->hstatus); + __kvm_riscv_switch_to(&vcpu->arch); + + gcntx->hstatus = csr_swap(CSR_HSTATUS, hcntx->hstatus); + } vcpu->arch.last_exit_cpu = vcpu->cpu; guest_state_exit_irqoff(); diff --git a/arch/riscv/kvm/vcpu_switch.S b/arch/riscv/kvm/vcpu_switch.S index 9f13e5ce6a18..47686bcb21e0 100644 --- a/arch/riscv/kvm/vcpu_switch.S +++ b/arch/riscv/kvm/vcpu_switch.S @@ -218,6 +218,35 @@ SYM_FUNC_START(__kvm_riscv_switch_to) ret SYM_FUNC_END(__kvm_riscv_switch_to) + /* + * Parameters: + * A0 <= Pointer to struct kvm_vcpu_arch + * A1 <= SBI extension ID + * A2 <= SBI function ID + */ +SYM_FUNC_START(__kvm_riscv_nacl_switch_to) + SAVE_HOST_GPRS + + SAVE_HOST_AND_RESTORE_GUEST_CSRS .Lkvm_nacl_switch_return + + /* Resume Guest using SBI nested acceleration */ + add a6, a2, zero + add a7, a1, zero + ecall + + /* Back to Host */ + .align 2 +.Lkvm_nacl_switch_return: + SAVE_GUEST_GPRS + + SAVE_GUEST_AND_RESTORE_HOST_CSRS + + RESTORE_HOST_GPRS + + /* Return to C code */ + ret +SYM_FUNC_END(__kvm_riscv_nacl_switch_to) + SYM_CODE_START(__kvm_riscv_unpriv_trap) /* * We assume that faulting unpriv load/store instruction is -- Gitee From 6799b95eaaad2920f9827f1dc139db30e01ce33e Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 21 Oct 2024 01:17:33 +0530 Subject: [PATCH 21/26] RISC-V: KVM: Save trap CSRs in kvm_riscv_vcpu_enter_exit() ANBZ: #33223 commit 3e7d154ad89be46b41bb47a0a8a19ecf8e0ca3f3 upstream. Save trap CSRs in the kvm_riscv_vcpu_enter_exit() function instead of the kvm_arch_vcpu_ioctl_run() function so that HTVAL and HTINST CSRs are accessed in more optimized manner while running under some other hypervisor. Signed-off-by: Anup Patel Reviewed-by: Atish Patra Link: https://lore.kernel.org/r/20241020194734.58686-13-apatel@ventanamicro.com Signed-off-by: Anup Patel Signed-off-by: Ruidong Tian --- arch/riscv/kvm/vcpu.c | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index 8ef4ee2f7627..58f2b50889de 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -781,12 +781,21 @@ static __always_inline void kvm_riscv_vcpu_swap_in_host_state(struct kvm_vcpu *v * This must be noinstr as instrumentation may make use of RCU, and this is not * safe during the EQS. */ -static void noinstr kvm_riscv_vcpu_enter_exit(struct kvm_vcpu *vcpu) +static void noinstr kvm_riscv_vcpu_enter_exit(struct kvm_vcpu *vcpu, + struct kvm_cpu_trap *trap) { void *nsh; struct kvm_cpu_context *gcntx = &vcpu->arch.guest_context; struct kvm_cpu_context *hcntx = &vcpu->arch.host_context; + /* + * We save trap CSRs (such as SEPC, SCAUSE, STVAL, HTVAL, and + * HTINST) here because we do local_irq_enable() after this + * function in kvm_arch_vcpu_ioctl_run() which can result in + * an interrupt immediately after local_irq_enable() and can + * potentially change trap CSRs. + */ + kvm_riscv_vcpu_swap_in_guest_state(vcpu); guest_state_enter_irqoff(); @@ -829,14 +838,24 @@ static void noinstr kvm_riscv_vcpu_enter_exit(struct kvm_vcpu *vcpu) } else { gcntx->hstatus = csr_swap(CSR_HSTATUS, hcntx->hstatus); } + + trap->htval = nacl_csr_read(nsh, CSR_HTVAL); + trap->htinst = nacl_csr_read(nsh, CSR_HTINST); } else { hcntx->hstatus = csr_swap(CSR_HSTATUS, gcntx->hstatus); __kvm_riscv_switch_to(&vcpu->arch); gcntx->hstatus = csr_swap(CSR_HSTATUS, hcntx->hstatus); + + trap->htval = csr_read(CSR_HTVAL); + trap->htinst = csr_read(CSR_HTINST); } + trap->sepc = gcntx->sepc; + trap->scause = csr_read(CSR_SCAUSE); + trap->stval = csr_read(CSR_STVAL); + vcpu->arch.last_exit_cpu = vcpu->cpu; guest_state_exit_irqoff(); kvm_riscv_vcpu_swap_in_host_state(vcpu); @@ -954,22 +973,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) guest_timing_enter_irqoff(); - kvm_riscv_vcpu_enter_exit(vcpu); + kvm_riscv_vcpu_enter_exit(vcpu, &trap); vcpu->mode = OUTSIDE_GUEST_MODE; vcpu->stat.exits++; - /* - * Save SCAUSE, STVAL, HTVAL, and HTINST because we might - * get an interrupt between __kvm_riscv_switch_to() and - * local_irq_enable() which can potentially change CSRs. - */ - trap.sepc = vcpu->arch.guest_context.sepc; - trap.scause = csr_read(CSR_SCAUSE); - trap.stval = csr_read(CSR_STVAL); - trap.htval = ncsr_read(CSR_HTVAL); - trap.htinst = ncsr_read(CSR_HTINST); - /* Syncup interrupts state with HW */ kvm_riscv_vcpu_sync_interrupts(vcpu); -- Gitee From f99bb004e1295b609554eb87bf2f4ddf59f377ad Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 21 Oct 2024 01:17:34 +0530 Subject: [PATCH 22/26] RISC-V: KVM: Use NACL HFENCEs for KVM request based HFENCEs ANBZ: #33223 commit 5bdecd891e505a9f98a50998aa60a60568f58c3c upstream. When running under some other hypervisor, use SBI NACL based HFENCEs for TLB shoot-down via KVM requests. This makes HFENCEs faster whenever SBI nested acceleration is available. Signed-off-by: Anup Patel Reviewed-by: Atish Patra Link: https://lore.kernel.org/r/20241020194734.58686-14-apatel@ventanamicro.com Signed-off-by: Anup Patel Signed-off-by: Ruidong Tian --- arch/riscv/kvm/tlb.c | 57 +++++++++++++++++++++++++++++++------------- 1 file changed, 40 insertions(+), 17 deletions(-) diff --git a/arch/riscv/kvm/tlb.c b/arch/riscv/kvm/tlb.c index 23c0e82b5103..2f91ea5f8493 100644 --- a/arch/riscv/kvm/tlb.c +++ b/arch/riscv/kvm/tlb.c @@ -14,6 +14,7 @@ #include #include #include +#include #define has_svinval() riscv_has_extension_unlikely(RISCV_ISA_EXT_SVINVAL) @@ -186,18 +187,24 @@ void kvm_riscv_fence_i_process(struct kvm_vcpu *vcpu) void kvm_riscv_hfence_gvma_vmid_all_process(struct kvm_vcpu *vcpu) { - struct kvm_vmid *vmid; + struct kvm_vmid *v = &vcpu->kvm->arch.vmid; + unsigned long vmid = READ_ONCE(v->vmid); - vmid = &vcpu->kvm->arch.vmid; - kvm_riscv_local_hfence_gvma_vmid_all(READ_ONCE(vmid->vmid)); + if (kvm_riscv_nacl_available()) + nacl_hfence_gvma_vmid_all(nacl_shmem(), vmid); + else + kvm_riscv_local_hfence_gvma_vmid_all(vmid); } void kvm_riscv_hfence_vvma_all_process(struct kvm_vcpu *vcpu) { - struct kvm_vmid *vmid; + struct kvm_vmid *v = &vcpu->kvm->arch.vmid; + unsigned long vmid = READ_ONCE(v->vmid); - vmid = &vcpu->kvm->arch.vmid; - kvm_riscv_local_hfence_vvma_all(READ_ONCE(vmid->vmid)); + if (kvm_riscv_nacl_available()) + nacl_hfence_vvma_all(nacl_shmem(), vmid); + else + kvm_riscv_local_hfence_vvma_all(vmid); } static bool vcpu_hfence_dequeue(struct kvm_vcpu *vcpu, @@ -251,6 +258,7 @@ static bool vcpu_hfence_enqueue(struct kvm_vcpu *vcpu, void kvm_riscv_hfence_process(struct kvm_vcpu *vcpu) { + unsigned long vmid; struct kvm_riscv_hfence d = { 0 }; struct kvm_vmid *v = &vcpu->kvm->arch.vmid; @@ -259,26 +267,41 @@ void kvm_riscv_hfence_process(struct kvm_vcpu *vcpu) case KVM_RISCV_HFENCE_UNKNOWN: break; case KVM_RISCV_HFENCE_GVMA_VMID_GPA: - kvm_riscv_local_hfence_gvma_vmid_gpa( - READ_ONCE(v->vmid), - d.addr, d.size, d.order); + vmid = READ_ONCE(v->vmid); + if (kvm_riscv_nacl_available()) + nacl_hfence_gvma_vmid(nacl_shmem(), vmid, + d.addr, d.size, d.order); + else + kvm_riscv_local_hfence_gvma_vmid_gpa(vmid, d.addr, + d.size, d.order); break; case KVM_RISCV_HFENCE_VVMA_ASID_GVA: kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_ASID_RCVD); - kvm_riscv_local_hfence_vvma_asid_gva( - READ_ONCE(v->vmid), d.asid, - d.addr, d.size, d.order); + vmid = READ_ONCE(v->vmid); + if (kvm_riscv_nacl_available()) + nacl_hfence_vvma_asid(nacl_shmem(), vmid, d.asid, + d.addr, d.size, d.order); + else + kvm_riscv_local_hfence_vvma_asid_gva(vmid, d.asid, d.addr, + d.size, d.order); break; case KVM_RISCV_HFENCE_VVMA_ASID_ALL: kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_ASID_RCVD); - kvm_riscv_local_hfence_vvma_asid_all( - READ_ONCE(v->vmid), d.asid); + vmid = READ_ONCE(v->vmid); + if (kvm_riscv_nacl_available()) + nacl_hfence_vvma_asid_all(nacl_shmem(), vmid, d.asid); + else + kvm_riscv_local_hfence_vvma_asid_all(vmid, d.asid); break; case KVM_RISCV_HFENCE_VVMA_GVA: kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_RCVD); - kvm_riscv_local_hfence_vvma_gva( - READ_ONCE(v->vmid), - d.addr, d.size, d.order); + vmid = READ_ONCE(v->vmid); + if (kvm_riscv_nacl_available()) + nacl_hfence_vvma(nacl_shmem(), vmid, + d.addr, d.size, d.order); + else + kvm_riscv_local_hfence_vvma_gva(vmid, d.addr, + d.size, d.order); break; default: break; -- Gitee From 7ce6a8b5d7a70ba0ac3c6918747f8b32f26285e5 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 7 Jul 2025 09:23:43 +0530 Subject: [PATCH 23/26] RISC-V: KVM: Disable vstimecmp before exiting to user-space ANBZ: #33223 commit 57f576e860d3a3582edb5064db9f0c95558ca5f4 upstream. If VS-timer expires when no VCPU running on a host CPU then WFI executed by such host CPU will be effective NOP resulting in no power savings. This is as-per RISC-V Privileged specificaiton which says: "WFI is also required to resume execution for locally enabled interrupts pending at any privilege level, regardless of the global interrupt enable at each privilege level." To address the above issue, vstimecmp CSR must be set to -1UL over here when VCPU is scheduled-out or exits to user space. Reviewed-by: Atish Patra Tested-by: Atish Patra Tested-by: Heinrich Schuchardt Fixes: 8f5cb44b1bae ("RISC-V: KVM: Support sstc extension") Fixes: cea8896bd936 ("RISC-V: KVM: Fix kvm_riscv_vcpu_timer_pending() for Sstc") Reported-by: Heinrich Schuchardt Closes: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/2112578 Signed-off-by: Anup Patel Reviewed-by: Nutty Liu Link: https://lore.kernel.org/r/20250707035345.17494-2-apatel@ventanamicro.com Signed-off-by: Anup Patel Signed-off-by: Ruidong Tian --- arch/riscv/kvm/vcpu_timer.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arch/riscv/kvm/vcpu_timer.c b/arch/riscv/kvm/vcpu_timer.c index 96e7a4e463f7..fdf4d415b434 100644 --- a/arch/riscv/kvm/vcpu_timer.c +++ b/arch/riscv/kvm/vcpu_timer.c @@ -344,8 +344,24 @@ void kvm_riscv_vcpu_timer_save(struct kvm_vcpu *vcpu) /* * The vstimecmp CSRs are saved by kvm_riscv_vcpu_timer_sync() * upon every VM exit so no need to save here. + * + * If VS-timer expires when no VCPU running on a host CPU then + * WFI executed by such host CPU will be effective NOP resulting + * in no power savings. This is because as-per RISC-V Privileged + * specificaiton: "WFI is also required to resume execution for + * locally enabled interrupts pending at any privilege level, + * regardless of the global interrupt enable at each privilege + * level." + * + * To address the above issue, vstimecmp CSR must be set to -1UL + * over here when VCPU is scheduled-out or exits to user space. */ + csr_write(CSR_VSTIMECMP, -1UL); +#if defined(CONFIG_32BIT) + csr_write(CSR_VSTIMECMPH, -1UL); +#endif + /* timer should be enabled for the remaining operations */ if (unlikely(!t->init_done)) return; -- Gitee From 7a1bc34741463c45eb19e602ebf51ab016741531 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 7 Jul 2025 09:23:44 +0530 Subject: [PATCH 24/26] RISC-V: KVM: Move HGEI[E|P] CSR access to IMSIC virtualization ANBZ: #33223 commit 4cec89db80ba81fa4524c6449c0494b8ae08eeb0 upstream. Currently, the common AIA functions kvm_riscv_vcpu_aia_has_interrupts() and kvm_riscv_aia_wakeon_hgei() lookup HGEI line using an array of VCPU pointers before accessing HGEI[E|P] CSR which is slow and prone to race conditions because there is a separate per-hart lock for the VCPU pointer array and a separate per-VCPU rwlock for IMSIC VS-file (including HGEI line) used by the VCPU. Due to these race conditions, it is observed on QEMU RISC-V host that Guest VCPUs sleep in WFI and never wakeup even with interrupt pending in the IMSIC VS-file because VCPUs were waiting for HGEI wakeup on the wrong host CPU. The IMSIC virtualization already keeps track of the HGEI line and the associated IMSIC VS-file used by each VCPU so move the HGEI[E|P] CSR access to IMSIC virtualization so that costly HGEI line lookup can be avoided and likelihood of race-conditions when updating HGEI[E|P] CSR is also reduced. Reviewed-by: Atish Patra Tested-by: Atish Patra Tested-by: Heinrich Schuchardt Fixes: 3385339296d1 ("RISC-V: KVM: Use IMSIC guest files when available") Signed-off-by: Anup Patel Reviewed-by: Nutty Liu Link: https://lore.kernel.org/r/20250707035345.17494-3-apatel@ventanamicro.com Signed-off-by: Anup Patel Signed-off-by: Ruidong Tian --- arch/riscv/include/asm/kvm_aia.h | 4 ++- arch/riscv/include/asm/kvm_host.h | 3 ++ arch/riscv/kvm/aia.c | 51 +++++-------------------------- arch/riscv/kvm/aia_imsic.c | 45 +++++++++++++++++++++++++++ arch/riscv/kvm/vcpu.c | 10 ------ 5 files changed, 58 insertions(+), 55 deletions(-) diff --git a/arch/riscv/include/asm/kvm_aia.h b/arch/riscv/include/asm/kvm_aia.h index 3b643b9efc07..5acce285e56e 100644 --- a/arch/riscv/include/asm/kvm_aia.h +++ b/arch/riscv/include/asm/kvm_aia.h @@ -87,6 +87,9 @@ DECLARE_STATIC_KEY_FALSE(kvm_riscv_aia_available); extern struct kvm_device_ops kvm_riscv_aia_device_ops; +bool kvm_riscv_vcpu_aia_imsic_has_interrupt(struct kvm_vcpu *vcpu); +void kvm_riscv_vcpu_aia_imsic_load(struct kvm_vcpu *vcpu, int cpu); +void kvm_riscv_vcpu_aia_imsic_put(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_aia_imsic_release(struct kvm_vcpu *vcpu); int kvm_riscv_vcpu_aia_imsic_update(struct kvm_vcpu *vcpu); @@ -161,7 +164,6 @@ void kvm_riscv_aia_destroy_vm(struct kvm *kvm); int kvm_riscv_aia_alloc_hgei(int cpu, struct kvm_vcpu *owner, void __iomem **hgei_va, phys_addr_t *hgei_pa); void kvm_riscv_aia_free_hgei(int cpu, int hgei); -void kvm_riscv_aia_wakeon_hgei(struct kvm_vcpu *owner, bool enable); void kvm_riscv_aia_enable(void); void kvm_riscv_aia_disable(void); diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h index a8179b946168..76e3b77fc8c8 100644 --- a/arch/riscv/include/asm/kvm_host.h +++ b/arch/riscv/include/asm/kvm_host.h @@ -311,6 +311,9 @@ static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} #define KVM_ARCH_WANT_MMU_NOTIFIER +static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} + #define KVM_RISCV_GSTAGE_TLB_MIN_ORDER 12 void kvm_riscv_local_hfence_gvma_vmid_gpa(unsigned long vmid, diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c index 19afd1f23537..dad318185660 100644 --- a/arch/riscv/kvm/aia.c +++ b/arch/riscv/kvm/aia.c @@ -30,28 +30,6 @@ unsigned int kvm_riscv_aia_nr_hgei; unsigned int kvm_riscv_aia_max_ids; DEFINE_STATIC_KEY_FALSE(kvm_riscv_aia_available); -static int aia_find_hgei(struct kvm_vcpu *owner) -{ - int i, hgei; - unsigned long flags; - struct aia_hgei_control *hgctrl = get_cpu_ptr(&aia_hgei); - - raw_spin_lock_irqsave(&hgctrl->lock, flags); - - hgei = -1; - for (i = 1; i <= kvm_riscv_aia_nr_hgei; i++) { - if (hgctrl->owners[i] == owner) { - hgei = i; - break; - } - } - - raw_spin_unlock_irqrestore(&hgctrl->lock, flags); - - put_cpu_ptr(&aia_hgei); - return hgei; -} - static inline unsigned long aia_hvictl_value(bool ext_irq_pending) { unsigned long hvictl; @@ -95,7 +73,6 @@ void kvm_riscv_vcpu_aia_sync_interrupts(struct kvm_vcpu *vcpu) bool kvm_riscv_vcpu_aia_has_interrupts(struct kvm_vcpu *vcpu, u64 mask) { - int hgei; unsigned long seip; if (!kvm_riscv_aia_available()) @@ -114,11 +91,7 @@ bool kvm_riscv_vcpu_aia_has_interrupts(struct kvm_vcpu *vcpu, u64 mask) if (!kvm_riscv_aia_initialized(vcpu->kvm) || !seip) return false; - hgei = aia_find_hgei(vcpu); - if (hgei > 0) - return !!(ncsr_read(CSR_HGEIP) & BIT(hgei)); - - return false; + return kvm_riscv_vcpu_aia_imsic_has_interrupt(vcpu); } void kvm_riscv_vcpu_aia_update_hvip(struct kvm_vcpu *vcpu) @@ -164,6 +137,9 @@ void kvm_riscv_vcpu_aia_load(struct kvm_vcpu *vcpu, int cpu) csr_write(CSR_HVIPRIO2H, csr->hviprio2h); #endif } + + if (kvm_riscv_aia_initialized(vcpu->kvm)) + kvm_riscv_vcpu_aia_imsic_load(vcpu, cpu); } void kvm_riscv_vcpu_aia_put(struct kvm_vcpu *vcpu) @@ -174,6 +150,9 @@ void kvm_riscv_vcpu_aia_put(struct kvm_vcpu *vcpu) if (!kvm_riscv_aia_available()) return; + if (kvm_riscv_aia_initialized(vcpu->kvm)) + kvm_riscv_vcpu_aia_imsic_put(vcpu); + if (kvm_riscv_nacl_available()) { nsh = nacl_shmem(); csr->vsiselect = nacl_csr_read(nsh, CSR_VSISELECT); @@ -472,22 +451,6 @@ void kvm_riscv_aia_free_hgei(int cpu, int hgei) raw_spin_unlock_irqrestore(&hgctrl->lock, flags); } -void kvm_riscv_aia_wakeon_hgei(struct kvm_vcpu *owner, bool enable) -{ - int hgei; - - if (!kvm_riscv_aia_available()) - return; - - hgei = aia_find_hgei(owner); - if (hgei > 0) { - if (enable) - csr_set(CSR_HGEIE, BIT(hgei)); - else - csr_clear(CSR_HGEIE, BIT(hgei)); - } -} - static irqreturn_t hgei_interrupt(int irq, void *dev_id) { int i; diff --git a/arch/riscv/kvm/aia_imsic.c b/arch/riscv/kvm/aia_imsic.c index 29ef9c2133a9..2ff865943ebb 100644 --- a/arch/riscv/kvm/aia_imsic.c +++ b/arch/riscv/kvm/aia_imsic.c @@ -676,6 +676,48 @@ static void imsic_swfile_update(struct kvm_vcpu *vcpu, imsic_swfile_extirq_update(vcpu); } +bool kvm_riscv_vcpu_aia_imsic_has_interrupt(struct kvm_vcpu *vcpu) +{ + struct imsic *imsic = vcpu->arch.aia_context.imsic_state; + unsigned long flags; + bool ret = false; + + /* + * The IMSIC SW-file directly injects interrupt via hvip so + * only check for interrupt when IMSIC VS-file is being used. + */ + + read_lock_irqsave(&imsic->vsfile_lock, flags); + if (imsic->vsfile_cpu > -1) + ret = !!(csr_read(CSR_HGEIP) & BIT(imsic->vsfile_hgei)); + read_unlock_irqrestore(&imsic->vsfile_lock, flags); + + return ret; +} + +void kvm_riscv_vcpu_aia_imsic_load(struct kvm_vcpu *vcpu, int cpu) +{ + /* + * No need to explicitly clear HGEIE CSR bits because the + * hgei interrupt handler (aka hgei_interrupt()) will always + * clear it for us. + */ +} + +void kvm_riscv_vcpu_aia_imsic_put(struct kvm_vcpu *vcpu) +{ + struct imsic *imsic = vcpu->arch.aia_context.imsic_state; + unsigned long flags; + + if (!kvm_vcpu_is_blocking(vcpu)) + return; + + read_lock_irqsave(&imsic->vsfile_lock, flags); + if (imsic->vsfile_cpu > -1) + csr_set(CSR_HGEIE, BIT(imsic->vsfile_hgei)); + read_unlock_irqrestore(&imsic->vsfile_lock, flags); +} + void kvm_riscv_vcpu_aia_imsic_release(struct kvm_vcpu *vcpu) { unsigned long flags; @@ -781,6 +823,9 @@ int kvm_riscv_vcpu_aia_imsic_update(struct kvm_vcpu *vcpu) * producers to the new IMSIC VS-file. */ + /* Ensure HGEIE CSR bit is zero before using the new IMSIC VS-file */ + csr_clear(CSR_HGEIE, BIT(new_vsfile_hgei)); + /* Zero-out new IMSIC VS-file */ imsic_vsfile_local_clear(new_vsfile_hgei, imsic->nr_hw_eix); diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index 58f2b50889de..680b44de0ea2 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -207,16 +207,6 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) return kvm_riscv_vcpu_timer_pending(vcpu); } -void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) -{ - kvm_riscv_aia_wakeon_hgei(vcpu, true); -} - -void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) -{ - kvm_riscv_aia_wakeon_hgei(vcpu, false); -} - int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { return (kvm_riscv_vcpu_has_interrupts(vcpu, -1UL) && -- Gitee From 90cceba82c668a2b93b1770fbc3748b158ed67a3 Mon Sep 17 00:00:00 2001 From: Fangyu Yu Date: Thu, 16 Oct 2025 09:26:59 +0800 Subject: [PATCH 25/26] RISC-V: KVM: Read HGEIP CSR on the correct cpu ANBZ: #33223 commit 873f10cf8e4d59605bc38fa1051dea8ee56fe3be upstream. When executing kvm_riscv_vcpu_aia_has_interrupts, the vCPU may have migrated and the IMSIC VS-file have not been updated yet, currently the HGEIP CSR should be read from the imsic->vsfile_cpu ( the pCPU before migration ) via on_each_cpu_mask, but this will trigger an IPI call and repeated IPI within a period of time is expensive in a many-core systems. Just let the vCPU execute and update the correct IMSIC VS-file via kvm_riscv_vcpu_aia_imsic_update may be a simple solution. Fixes: 4cec89db80ba ("RISC-V: KVM: Move HGEI[E|P] CSR access to IMSIC virtualization") Signed-off-by: Fangyu Yu Reviewed-by: Guo Ren Reviewed-by: Anup Patel Tested-by: Anup Patel Link: https://lore.kernel.org/r/20251016012659.82998-1-fangyu.yu@linux.alibaba.com Signed-off-by: Anup Patel Signed-off-by: Ruidong Tian --- arch/riscv/kvm/aia_imsic.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kvm/aia_imsic.c b/arch/riscv/kvm/aia_imsic.c index 2ff865943ebb..279ffec2712e 100644 --- a/arch/riscv/kvm/aia_imsic.c +++ b/arch/riscv/kvm/aia_imsic.c @@ -688,8 +688,20 @@ bool kvm_riscv_vcpu_aia_imsic_has_interrupt(struct kvm_vcpu *vcpu) */ read_lock_irqsave(&imsic->vsfile_lock, flags); - if (imsic->vsfile_cpu > -1) - ret = !!(csr_read(CSR_HGEIP) & BIT(imsic->vsfile_hgei)); + if (imsic->vsfile_cpu > -1) { + /* + * This function is typically called from kvm_vcpu_block() via + * kvm_arch_vcpu_runnable() upon WFI trap. The kvm_vcpu_block() + * can be preempted and the blocking VCPU might resume on a + * different CPU. This means it is possible that current CPU + * does not match the imsic->vsfile_cpu hence this function + * must check imsic->vsfile_cpu before accessing HGEIP CSR. + */ + if (imsic->vsfile_cpu != vcpu->cpu) + ret = true; + else + ret = !!(csr_read(CSR_HGEIP) & BIT(imsic->vsfile_hgei)); + } read_unlock_irqrestore(&imsic->vsfile_lock, flags); return ret; -- Gitee From 69738836bebeb9ca73ee8f9be2b4680b702520e2 Mon Sep 17 00:00:00 2001 From: Ruidong Tian Date: Thu, 16 Apr 2026 01:26:58 +0800 Subject: [PATCH 26/26] anolis: configs: update config file ANBZ: #33223 Signed-off-by: Ruidong Tian --- anolis/configs/L1-RECOMMEND/riscv/CONFIG_GUEST_PERF_EVENTS | 1 - anolis/configs/L2-OPTIONAL/riscv/CONFIG_USER_STACKTRACE_SUPPORT | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) delete mode 100644 anolis/configs/L1-RECOMMEND/riscv/CONFIG_GUEST_PERF_EVENTS create mode 100644 anolis/configs/L2-OPTIONAL/riscv/CONFIG_USER_STACKTRACE_SUPPORT diff --git a/anolis/configs/L1-RECOMMEND/riscv/CONFIG_GUEST_PERF_EVENTS b/anolis/configs/L1-RECOMMEND/riscv/CONFIG_GUEST_PERF_EVENTS deleted file mode 100644 index d7392e9f6211..000000000000 --- a/anolis/configs/L1-RECOMMEND/riscv/CONFIG_GUEST_PERF_EVENTS +++ /dev/null @@ -1 +0,0 @@ -# CONFIG_GUEST_PERF_EVENTS is not set diff --git a/anolis/configs/L2-OPTIONAL/riscv/CONFIG_USER_STACKTRACE_SUPPORT b/anolis/configs/L2-OPTIONAL/riscv/CONFIG_USER_STACKTRACE_SUPPORT new file mode 100644 index 000000000000..079153a892dc --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/riscv/CONFIG_USER_STACKTRACE_SUPPORT @@ -0,0 +1 @@ +CONFIG_USER_STACKTRACE_SUPPORT=y -- Gitee