From 902dd5b166728d615020b49a95a6c3c7b1cc4f0a Mon Sep 17 00:00:00 2001 From: heqiong Date: Mon, 21 Jul 2025 16:47:42 +0800 Subject: [PATCH 1/2] KVM: arm64: clean the first vcpu dcache Phytium CPU support cache consistency. Just flush the first vcpu's dcache. By reduce the number of flush dcache to improve the efficiency of SMP multi-core startup, especially for the VM with multi-core and hugepages. Signed-off-by: He Qiong Signed-off-by: Jiakun Shuai --- arch/arm64/include/asm/cputype.h | 1 + virt/kvm/arm/mmu.c | 16 ++++++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 414aa8a92896..17db34582afd 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -61,6 +61,7 @@ #define ARM_CPU_IMP_FUJITSU 0x46 #define ARM_CPU_IMP_HISI 0x48 #define ARM_CPU_IMP_AMPERE 0xC0 +#define ARM_CPU_IMP_PHYTIUM 0x70 #define ARM_CPU_PART_AEM_V8 0xD0F #define ARM_CPU_PART_FOUNDATION 0xD00 diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 6e20da9bf4f5..3b04786aa439 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -2529,9 +2529,21 @@ void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled) * If switching it off, need to clean the caches. * Clean + invalidate does the trick always. */ - if (now_enabled != was_enabled) - stage2_flush_vm(vcpu->kvm); + if (now_enabled != was_enabled) { + /* + * Due to Phytium CPU's cache consistency support, + * just flush dcache on one vcpu not all vcpus in the VM. + * This can reduce the number of flush dcaches and + * improve the efficiency of SMP multi-core startup, + * especially for the large VM with hugepages. + */ + if(read_cpuid_implementor() == ARM_CPU_IMP_PHYTIUM) { + if (vcpu->vcpu_id == 0) + stage2_flush_vm(vcpu->kvm); + } else + stage2_flush_vm(vcpu->kvm); + } /* Caches are now on, stop trapping VM ops (until a S/W op) */ if (now_enabled) *vcpu_hcr(vcpu) &= ~HCR_TVM; -- Gitee From 05427209ad8f807b8cd6c10c9625c96b46d9608e Mon Sep 17 00:00:00 2001 From: heqiong Date: Mon, 21 Jul 2025 17:33:35 +0800 Subject: [PATCH 2/2] KVM: arm64: Move guest CMOs to the fault handlers This patch is backport from upstream kernel. Commit id is 25aa28691bb960a76f0cffd8862144a29487f6ff. We currently uniformly perform CMOs of D-cache and I-cache in function user_mem_abort before calling the fault handlers. If we get concurrent guest faults(e.g. translation faults, permission faults) or some really unnecessary guest faults caused by BBM, CMOs for the first vcpu are necessary while the others later are not. By moving CMOs to the fault handlers, we can easily identify conditions where they are really needed and avoid the unnecessary ones. As it's a time consuming process to perform CMOs especially when flushing a block range, so this solution reduces much load of kvm and improve efficiency of the stage-2 page table code. We can imagine two specific scenarios which will gain much benefit: 1) In a normal VM startup, this solution will improve the efficiency of handling guest page faults incurred by vCPUs, when initially populating stage-2 page tables. 2) After live migration, the heavy workload will be resumed on the destination VM, however all the stage-2 page tables need to be rebuilt at the moment. So this solution will ease the performance drop during resuming stage. Reviewed-by: Fuad Tabba Signed-off-by: Yanan Wang Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210617105824.31752-5-wangyanan55@huawei.com Signed-off-by: He Qiong Signed-off-by: Jiakun Shuai --- virt/kvm/arm/mmu.c | 53 +++++++++++++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 17 deletions(-) diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 3b04786aa439..abb5f76454b5 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -141,6 +141,28 @@ static bool kvm_is_device_pfn(unsigned long pfn) return !pfn_valid(pfn); } +#define KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR GENMASK(5, 2) +static bool stage2_pte_cacheable(u64 pte) +{ + u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR; + return memattr == PAGE_S2_MEMATTR(NORMAL); +} +#define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54) +static bool stage2_pte_executable(u64 pte) +{ + return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN); +} + +static void clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size) +{ + __clean_dcache_guest_page(pfn, size); +} + +static void invalidate_icache_guest_page(kvm_pfn_t pfn, unsigned long size) +{ + __invalidate_icache_guest_page(pfn, size); +} + /** * stage2_dissolve_pmd() - clear and flush huge PMD entry * @kvm: pointer to kvm structure. @@ -1173,6 +1195,13 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache pmd_clear(pmd); kvm_tlb_flush_vmid_ipa(kvm, addr); } else { + /* Flush data cache before installation of the new PTE */ + if (stage2_pte_cacheable(pmd_val(*new_pmd))) + kvm_flush_dcache_pmd(*new_pmd); + + if (stage2_pte_executable(pmd_val(*new_pmd))) + invalidate_icache_guest_page(pmd_pfn(*new_pmd), S2_PMD_SIZE); + get_page(virt_to_page(pmd)); } @@ -1359,6 +1388,13 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, kvm_set_pte(pte, __pte(0)); kvm_tlb_flush_vmid_ipa(kvm, addr); } else { + /* Flush data cache before installation of the new PTE */ + if (stage2_pte_cacheable(pte_val(*new_pte))) + kvm_flush_dcache_pte(*new_pte); + + if (stage2_pte_executable(pte_val(*new_pte))) + invalidate_icache_guest_page(pte_pfn(*new_pte), PAGE_SIZE); + get_page(virt_to_page(pte)); } @@ -1650,16 +1686,6 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask); } -static void clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size) -{ - __clean_dcache_guest_page(pfn, size); -} - -static void invalidate_icache_guest_page(kvm_pfn_t pfn, unsigned long size) -{ - __invalidate_icache_guest_page(pfn, size); -} - static void kvm_send_hwpoison_signal(unsigned long address, struct vm_area_struct *vma) { @@ -1855,12 +1881,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (writable) kvm_set_pfn_dirty(pfn); - if (fault_status != FSC_PERM && !is_iomap(flags)) - clean_dcache_guest_page(pfn, vma_pagesize); - - if (exec_fault) - invalidate_icache_guest_page(pfn, vma_pagesize); - /* * If we took an execution fault we have made the * icache/dcache coherent above and should now let the s2 @@ -2165,7 +2185,6 @@ int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) * We've moved a page around, probably through CoW, so let's treat it * just like a translation fault and clean the cache to the PoC. */ - clean_dcache_guest_page(pfn, PAGE_SIZE); stage2_pte = kvm_pfn_pte(pfn, PAGE_S2); handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte); -- Gitee