From 902dd5b166728d615020b49a95a6c3c7b1cc4f0a Mon Sep 17 00:00:00 2001
From: heqiong <heqiong1557@phytium.com.cn>
Date: Mon, 21 Jul 2025 16:47:42 +0800
Subject: [PATCH 1/2] KVM: arm64: clean the first vcpu dcache

Phytium CPU support cache consistency.
Just flush the first vcpu's dcache.

By reduce the number of flush dcache to
improve the efficiency of SMP multi-core startup,
especially for the VM with multi-core and hugepages.

Signed-off-by: He Qiong <heqiong1557@phytium.com.cn>
Signed-off-by: Jiakun Shuai <shuaijiakun1288@phytium.com.cn>
---
 arch/arm64/include/asm/cputype.h |  1 +
 virt/kvm/arm/mmu.c               | 16 ++++++++++++++--
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 414aa8a92896..17db34582afd 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -61,6 +61,7 @@
 #define ARM_CPU_IMP_FUJITSU		0x46
 #define ARM_CPU_IMP_HISI		0x48
 #define ARM_CPU_IMP_AMPERE		0xC0
+#define ARM_CPU_IMP_PHYTIUM		0x70
 
 #define ARM_CPU_PART_AEM_V8		0xD0F
 #define ARM_CPU_PART_FOUNDATION		0xD00
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 6e20da9bf4f5..3b04786aa439 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -2529,9 +2529,21 @@ void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled)
 	 * If switching it off, need to clean the caches.
 	 * Clean + invalidate does the trick always.
 	 */
-	if (now_enabled != was_enabled)
-		stage2_flush_vm(vcpu->kvm);
+	if (now_enabled != was_enabled) {
 
+		/*
+		 * Due to Phytium CPU's cache consistency support,
+		 * just flush dcache on one vcpu not all vcpus in the VM.
+		 * This can reduce the number of flush dcaches and
+		 * improve the efficiency of SMP multi-core startup,
+		 * especially for the large VM with hugepages.
+		 */
+		if(read_cpuid_implementor() == ARM_CPU_IMP_PHYTIUM) {
+			if (vcpu->vcpu_id == 0)
+				stage2_flush_vm(vcpu->kvm);
+		} else
+			stage2_flush_vm(vcpu->kvm);
+	}
 	/* Caches are now on, stop trapping VM ops (until a S/W op) */
 	if (now_enabled)
 		*vcpu_hcr(vcpu) &= ~HCR_TVM;
-- 
Gitee


From 05427209ad8f807b8cd6c10c9625c96b46d9608e Mon Sep 17 00:00:00 2001
From: heqiong <heqiong1557@phytium.com.cn>
Date: Mon, 21 Jul 2025 17:33:35 +0800
Subject: [PATCH 2/2] KVM: arm64: Move guest CMOs to the fault handlers

This patch is backport from upstream kernel.
Commit id is 25aa28691bb960a76f0cffd8862144a29487f6ff.

We currently uniformly perform CMOs of D-cache and I-cache in function
user_mem_abort before calling the fault handlers. If we get concurrent
guest faults(e.g. translation faults, permission faults) or some really
unnecessary guest faults caused by BBM, CMOs for the first vcpu are
necessary while the others later are not.

By moving CMOs to the fault handlers, we can easily identify conditions
where they are really needed and avoid the unnecessary ones. As it's a
time consuming process to perform CMOs especially when flushing a block
range, so this solution reduces much load of kvm and improve efficiency
of the stage-2 page table code.

We can imagine two specific scenarios which will gain much benefit:
1) In a normal VM startup, this solution will improve the efficiency of
handling guest page faults incurred by vCPUs, when initially populating
stage-2 page tables.
2) After live migration, the heavy workload will be resumed on the
destination VM, however all the stage-2 page tables need to be rebuilt
at the moment. So this solution will ease the performance drop during
resuming stage.

Reviewed-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210617105824.31752-5-wangyanan55@huawei.com
Signed-off-by: He Qiong <heqiong1557@phytium.com.cn>
Signed-off-by: Jiakun Shuai <shuaijiakun1288@phytium.com.cn>
---
 virt/kvm/arm/mmu.c | 53 +++++++++++++++++++++++++++++++---------------
 1 file changed, 36 insertions(+), 17 deletions(-)

diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 3b04786aa439..abb5f76454b5 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -141,6 +141,28 @@ static bool kvm_is_device_pfn(unsigned long pfn)
 	return !pfn_valid(pfn);
 }
 
+#define KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR GENMASK(5, 2)
+static bool stage2_pte_cacheable(u64 pte)
+{
+        u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR;
+        return memattr == PAGE_S2_MEMATTR(NORMAL);
+}
+#define KVM_PTE_LEAF_ATTR_HI_S2_XN      BIT(54)
+static bool stage2_pte_executable(u64 pte)
+{
+         return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN);
+}
+
+static void clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
+{
+        __clean_dcache_guest_page(pfn, size);
+}
+
+static void invalidate_icache_guest_page(kvm_pfn_t pfn, unsigned long size)
+{
+        __invalidate_icache_guest_page(pfn, size);
+}
+
 /**
  * stage2_dissolve_pmd() - clear and flush huge PMD entry
  * @kvm:	pointer to kvm structure.
@@ -1173,6 +1195,13 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
 		pmd_clear(pmd);
 		kvm_tlb_flush_vmid_ipa(kvm, addr);
 	} else {
+        /* Flush data cache before installation of the new PTE */
+		if (stage2_pte_cacheable(pmd_val(*new_pmd)))
+			kvm_flush_dcache_pmd(*new_pmd);
+
+        if (stage2_pte_executable(pmd_val(*new_pmd)))
+			invalidate_icache_guest_page(pmd_pfn(*new_pmd), S2_PMD_SIZE);
+
 		get_page(virt_to_page(pmd));
 	}
 
@@ -1359,6 +1388,13 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
 		kvm_set_pte(pte, __pte(0));
 		kvm_tlb_flush_vmid_ipa(kvm, addr);
 	} else {
+		/* Flush data cache before installation of the new PTE */
+		if (stage2_pte_cacheable(pte_val(*new_pte)))
+			kvm_flush_dcache_pte(*new_pte);
+
+		if (stage2_pte_executable(pte_val(*new_pte)))
+			invalidate_icache_guest_page(pte_pfn(*new_pte), PAGE_SIZE);
+
 		get_page(virt_to_page(pte));
 	}
 
@@ -1650,16 +1686,6 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
 	kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
 }
 
-static void clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
-{
-	__clean_dcache_guest_page(pfn, size);
-}
-
-static void invalidate_icache_guest_page(kvm_pfn_t pfn, unsigned long size)
-{
-	__invalidate_icache_guest_page(pfn, size);
-}
-
 static void kvm_send_hwpoison_signal(unsigned long address,
 				     struct vm_area_struct *vma)
 {
@@ -1855,12 +1881,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	if (writable)
 		kvm_set_pfn_dirty(pfn);
 
-	if (fault_status != FSC_PERM && !is_iomap(flags))
-		clean_dcache_guest_page(pfn, vma_pagesize);
-
-	if (exec_fault)
-		invalidate_icache_guest_page(pfn, vma_pagesize);
-
 	/*
 	 * If we took an execution fault we have made the
 	 * icache/dcache coherent above and should now let the s2
@@ -2165,7 +2185,6 @@ int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
 	 * We've moved a page around, probably through CoW, so let's treat it
 	 * just like a translation fault and clean the cache to the PoC.
 	 */
-	clean_dcache_guest_page(pfn, PAGE_SIZE);
 	stage2_pte = kvm_pfn_pte(pfn, PAGE_S2);
 	handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte);
 
-- 
Gitee