From 639da762499740f0dc8e70e77521c069c3b3e177 Mon Sep 17 00:00:00 2001
From: Xie Xiaodong <624338359@qq.com>
Date: Thu, 16 Apr 2026 11:38:03 +0800
Subject: [PATCH 1/5] KVM: arm64: Fix FEAT_TLBIRANGE bugs

Fix the local FEAT_TLBIRANGE integration issues in the 5.4 arm64 KVM
backport:
- use *flush and clear it after memslot range flushes
- restore the 5.4 single-name full TLB flush API hook
- complete deferred stage-2 unmap range invalidation per walker chunk
  instead of deferring it until the full hand-written walk completes

Upstream arm64 KVM chunks large stage-2 unmaps through
stage2_apply_range(), which invokes kvm_pgtable_stage2_unmap() on
each chunk. When deferred range invalidation is enabled, each chunk
completes its TLBI range before stage2_apply_range() can drop and
reacquire mmu_lock via cond_resched_rwlock_write().

Align the 5.4 behaviour with upstream by completing the deferred
range TLBI for each walker chunk immediately after unmap_stage2_puds()
returns. This keeps FEAT_TLBIRANGE enabled for stage-2 unmap while
ensuring stale leaf translations do not survive beyond a chunk that
may reschedule.

Fixes: 1b2905d3582f ("KVM: arm64: Add support for FEAT_TLBIRANGE")

Signed-off-by: Xie Xiaodong <624338359@qq.com>
---
 arch/arm64/include/asm/kvm_host.h |  2 +-
 arch/arm64/kvm/Kconfig            |  1 +
 include/linux/kvm_host.h          |  6 +++---
 virt/kvm/Kconfig                  |  3 +++
 virt/kvm/arm/arm.c                | 17 ++++++-----------
 virt/kvm/arm/mmu.c                | 14 +++++++++-----
 virt/kvm/kvm_main.c               | 11 +++++++----
 7 files changed, 30 insertions(+), 24 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 850ef4a6d389..cbca2fc7fd31 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -693,7 +693,7 @@ void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu);
 void kvm_set_ipa_limit(void);
 
 #define __KVM_HAVE_ARCH_VM_ALLOC
-#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
 #define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
 struct kvm *kvm_arch_alloc_vm(void);
 void kvm_arch_free_vm(struct kvm *kvm);
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 1d0a3791c017..ca5401afc1fb 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -24,6 +24,7 @@ config KVM
 	select MMU_NOTIFIER
 	select PREEMPT_NOTIFIERS
 	select HAVE_KVM_CPU_RELAX_INTERCEPT
+	select HAVE_KVM_ARCH_TLB_FLUSH_ALL
 	select KVM_MMIO
 	select KVM_ARM_HOST
 	select KVM_GENERIC_DIRTYLOG_READ_PROTECT
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9019db30b8a7..ac29d18293fb 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -931,13 +931,13 @@ static inline void kvm_arch_free_vm(struct kvm *kvm)
 }
 #endif
 
-#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
-static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
+#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
+static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
 {
 	return -ENOTSUPP;
 }
 #else
-int kvm_arch_flush_remote_tlbs(struct kvm *kvm);
+int kvm_arch_flush_remote_tlb(struct kvm *kvm);
 #endif
 
 #ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 6ec39b52214c..aad9284c043a 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -36,6 +36,9 @@ config HAVE_KVM_CPU_RELAX_INTERCEPT
 config KVM_VFIO
        bool
 
+config HAVE_KVM_ARCH_TLB_FLUSH_ALL
+       bool
+
 config HAVE_KVM_INVALID_WAKEUPS
        bool
 
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 254537f5ed00..34ef6ed11b20 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -1274,11 +1274,12 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
  * Steps 1-4 below provide general overview of dirty page logging. See
  * kvm_get_dirty_log_protect() function description for additional details.
  *
- * We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we
- * always flush the TLB (step 4) even if previous step failed  and the dirty
- * bitmap may be corrupt. Regardless of previous outcome the KVM logging API
- * does not preclude user space subsequent dirty log read. Flushing TLB ensures
- * writes will be marked dirty for next log read.
+ * We call kvm_get_dirty_log_protect() to handle steps 1-4. The helper
+ * flushes the relevant memslot TLBs when needed, even if the subsequent
+ * copy_to_user() fails and the dirty bitmap may be corrupt. Regardless of
+ * previous outcome the KVM logging API does not preclude user space
+ * subsequent dirty log read. Flushing TLB ensures writes will be marked
+ * dirty for next log read.
  *
  *   1. Take a snapshot of the bit and clear it if needed.
  *   2. Write protect the corresponding page.
@@ -1294,9 +1295,6 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
 
 	r = kvm_get_dirty_log_protect(kvm, log, &flush);
 
-	if (flush)
-		kvm_flush_remote_tlbs(kvm);
-
 	mutex_unlock(&kvm->slots_lock);
 	return r;
 }
@@ -1310,9 +1308,6 @@ int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, struct kvm_clear_dirty_log *lo
 
 	r = kvm_clear_dirty_log_protect(kvm, log, &flush);
 
-	if (flush)
-		kvm_flush_remote_tlbs(kvm);
-
 	mutex_unlock(&kvm->slots_lock);
 	return r;
 }
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index abb5f76454b5..0d293f9f1b39 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -49,12 +49,12 @@ static bool memslot_is_logging(struct kvm_memory_slot *memslot)
 }
 
 /**
- * kvm_arch_flush_remote_tlbs() - flush all VM TLB entries for v7/8
+ * kvm_arch_flush_remote_tlb() - flush all VM TLB entries for v7/8
  * @kvm:	pointer to kvm structure.
  *
  * Interface to HYP function to flush all VM TLB entries
  */
-int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
+int kvm_arch_flush_remote_tlb(struct kvm *kvm)
 {
 	kvm_call_hyp(__kvm_tlb_flush_vmid, kvm);
 	return 0;
@@ -429,10 +429,14 @@ static void __unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size,
 		next = stage2_pgd_addr_end(kvm, addr, end);
 		if (!stage2_pgd_none(kvm, *pgd))
 			unmap_stage2_puds(kvm, pgd, addr, next);
-		
+
 		if (stage2_unmap_defer_tlb_flush())
-			/* Perform the deferred TLB invalidations */
-			kvm_tlb_flush_vmid_range(kvm, addr, size);
+			/*
+			 * Mirror the upstream chunked unmap semantics by
+			 * completing deferred range invalidation for this
+			 * walker chunk before the next iteration can resched.
+			 */
+			kvm_tlb_flush_vmid_range(kvm, addr, next - addr);
 
 		/*
 		 * If the range is too large, release the kvm->mmu_lock
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ac74b31efb81..ceb70dc7e7f0 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -341,7 +341,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
 	 * kvm_make_all_cpus_request() reads vcpu->mode. We reuse that
 	 * barrier here.
 	 */
-	if (!kvm_arch_flush_remote_tlbs(kvm)
+	if (!kvm_arch_flush_remote_tlb(kvm)
 	    || kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
 		++kvm->stat.remote_tlb_flush;
 	cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
@@ -1343,8 +1343,10 @@ int kvm_get_dirty_log_protect(struct kvm *kvm,
 		}
 		spin_unlock(&kvm->mmu_lock);
 	}
-	if (flush)
+	if (*flush) {
 		kvm_flush_remote_tlbs_memslot(kvm, memslot);
+		*flush = false;
+	}
 	if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
 		return -EFAULT;
 	return 0;
@@ -1420,9 +1422,10 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm,
 		}
 	}
 	spin_unlock(&kvm->mmu_lock);
-	
-	if(flush)
+	if (*flush) {
 		kvm_flush_remote_tlbs_memslot(kvm, memslot);
+		*flush = false;
+	}
 
 	return 0;
 }
-- 
Gitee


From 88be31467ce78faed50f777fb8066942d11c00c9 Mon Sep 17 00:00:00 2001
From: Ryan Roberts <ryan.roberts@arm.com>
Date: Mon, 27 Nov 2023 11:17:26 +0000
Subject: [PATCH 2/5] arm64/mm: Modify range-based tlbi to decrement scale

commit e2768b798a197318736f00c506633cb78ff77012 upstream

In preparation for adding support for LPA2 to the tlb invalidation
routines, modify the algorithm used by range-based tlbi to start at the
highest 'scale' and decrement instead of starting at the lowest 'scale'
and incrementing. This new approach makes it possible to maintain 64K
alignment as we work through the range, until the last op (at scale=0).
This is required when LPA2 is enabled. (This part will be added in a
subsequent commit).

This change is separated into its own patch because it will also impact
non-LPA2 systems, and I want to make it easy to bisect in case it leads
to performance regression (see below for benchmarks that suggest this
should not be a problem).

The original commit (d1d3aa98 "arm64: tlb: Use the TLBI RANGE feature in
arm64") stated this as the reason for _incrementing_ scale:

  However, in most scenarios, the pages = 1 when flush_tlb_range() is
  called. Start from scale = 3 or other proper value (such as scale
  =ilog2(pages)), will incur extra overhead. So increase 'scale' from 0
  to maximum.

But pages=1 is already special cased by the non-range invalidation path,
which will take care of it the first time through the loop (both in the
original commit and in my change), so I don't think switching to
decrement scale should have any extra performance impact after all.

Indeed benchmarking kernel compilation, a TLBI-heavy workload, suggests
that this new approach actually _improves_ performance slightly (using a
virtual machine on Apple M2):

Table shows time to execute kernel compilation workload with 8 jobs,
relative to baseline without this patch (more negative number is
bigger speedup). Repeated 9 times across 3 system reboots:

| counter   |       mean |     stdev |
|:----------|-----------:|----------:|
| real-time |      -0.6% |      0.0% |
| kern-time |      -1.6% |      0.5% |
| user-time |      -0.4% |      0.1% |

Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20231127111737.1897081-2-ryan.roberts@arm.com
Signed-off-by: Xie Xiaodong <624338359@qq.com>
---
 arch/arm64/include/asm/tlbflush.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index 11e9d8bd8b75..d1ea1732515f 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -298,14 +298,14 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
  * entries one by one at the granularity of 'stride'. If the TLB
  * range ops are supported, then:
  *
- * 1. If 'pages' is odd, flush the first page through non-range
- *    operations;
+ * 1. The minimum range granularity is decided by 'scale', so multiple range
+ *    TLBI operations may be required. Start from scale = 3, flush the largest
+ *    possible number of pages ((num+1)*2^(5*scale+1)) that fit into the
+ *    requested range, then decrement scale and continue until one or zero pages
+ *    are left.
  *
- * 2. For remaining pages: the minimum range granularity is decided
- *    by 'scale', so multiple range TLBI operations may be required.
- *    Start from scale = 0, flush the corresponding number of pages
- *    ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
- *    until no pages left.
+ * 2. If there is 1 page remaining, flush it through non-range operations. Range
+ *    operations can only span an even number of pages.
  *
  * Note that certain ranges can be represented by either num = 31 and
  * scale or num = 0 and scale + 1. The loop below favours the latter
@@ -315,12 +315,12 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
 				asid, tlb_level, tlbi_user)		\
 do {									\
 	int num = 0;							\
-	int scale = 0;							\
+	int scale = 3;							\
 	unsigned long addr;						\
 									\
 	while (pages > 0) {						\
 		if (!system_supports_tlb_range() ||			\
-		    pages % 2 == 1) {					\
+		    pages == 1) {					\
 			addr = __TLBI_VADDR(start, asid);		\
 			__tlbi_level(op, addr, tlb_level);		\
 			if (tlbi_user)					\
@@ -340,7 +340,7 @@ do {									\
 			start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \
 			pages -= __TLBI_RANGE_PAGES(num, scale);	\
 		}							\
-		scale++;						\
+		scale--;						\
 	}								\
 } while (0)
 
-- 
Gitee


From abeb0a387d8b9b9054dc161ae917dfb65277eff2 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Fri, 5 Apr 2024 13:58:50 +1000
Subject: [PATCH 3/5] arm64: tlb: Fix TLBI RANGE operand

commit e3ba51ab24fddef79fc212f9840de54db8fd1685 upstream

KVM/arm64 relies on TLBI RANGE feature to flush TLBs when the dirty
pages are collected by VMM and the page table entries become write
protected during live migration. Unfortunately, the operand passed
to the TLBI RANGE instruction isn't correctly sorted out due to the
commit 117940aa6e5f ("KVM: arm64: Define kvm_tlb_flush_vmid_range()").
It leads to crash on the destination VM after live migration because
TLBs aren't flushed completely and some of the dirty pages are missed.

For example, I have a VM where 8GB memory is assigned, starting from
0x40000000 (1GB). Note that the host has 4KB as the base page size.
In the middile of migration, kvm_tlb_flush_vmid_range() is executed
to flush TLBs. It passes MAX_TLBI_RANGE_PAGES as the argument to
__kvm_tlb_flush_vmid_range() and __flush_s2_tlb_range_op(). SCALE#3
and NUM#31, corresponding to MAX_TLBI_RANGE_PAGES, isn't supported
by __TLBI_RANGE_NUM(). In this specific case, -1 has been returned
from __TLBI_RANGE_NUM() for SCALE#3/2/1/0 and rejected by the loop
in the __flush_tlb_range_op() until the variable @scale underflows
and becomes -9, 0xffff708000040000 is set as the operand. The operand
is wrong since it's sorted out by __TLBI_VADDR_RANGE() according to
invalid @scale and @num.

Fix it by extending __TLBI_RANGE_NUM() to support the combination of
SCALE#3 and NUM#31. With the changes, [-1 31] instead of [-1 30] can
be returned from the macro, meaning the TLBs for 0x200000 pages in the
above example can be flushed in one shoot with SCALE#3 and NUM#31. The
macro TLBI_RANGE_MASK is dropped since no one uses it any more. The
comments are also adjusted accordingly.

Fixes: 117940aa6e5f ("KVM: arm64: Define kvm_tlb_flush_vmid_range()")
Cc: stable@kernel.org # v6.6+
Reported-by: Yihuang Yu <yihyu@redhat.com>
Suggested-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Reviewed-by: Shaoqin Huang <shahuang@redhat.com>
Link: https://lore.kernel.org/r/20240405035852.1532010-2-gshan@redhat.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Xie Xiaodong <624338359@qq.com>
---
 arch/arm64/include/asm/tlbflush.h | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index d1ea1732515f..e30854715311 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -157,12 +157,17 @@ static inline unsigned long get_trans_granule(void)
 #define MAX_TLBI_RANGE_PAGES		__TLBI_RANGE_PAGES(31, 3)
 
 /*
- * Generate 'num' values from -1 to 30 with -1 rejected by the
- * __flush_tlb_range() loop below.
+ * Generate 'num' values from -1 to 31 with -1 rejected by the
+ * __flush_tlb_range() loop below. Its return value is only significant
+ * for a maximum of MAX_TLBI_RANGE_PAGES pages. If 'pages' is more than
+ * that, you must iterate over the overall range.
  */
-#define TLBI_RANGE_MASK			GENMASK_ULL(4, 0)
-#define __TLBI_RANGE_NUM(pages, scale)	\
-	((((pages) >> (5 * (scale) + 1)) & TLBI_RANGE_MASK) - 1)
+#define __TLBI_RANGE_NUM(pages, scale)					\
+	({								\
+		unsigned long __pages = min_t(unsigned long, (pages),	\
+				__TLBI_RANGE_PAGES(31, (scale)));	\
+		(__pages >> (5 * (scale) + 1)) - 1;			\
+	})
 
 /*
  *	TLB Invalidation
@@ -306,10 +311,6 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
  *
  * 2. If there is 1 page remaining, flush it through non-range operations. Range
  *    operations can only span an even number of pages.
- *
- * Note that certain ranges can be represented by either num = 31 and
- * scale or num = 0 and scale + 1. The loop below favours the latter
- * since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
  */
 #define __flush_tlb_range_op(op, start, pages, stride,			\
 				asid, tlb_level, tlbi_user)		\
-- 
Gitee


From 92380ad2e4ff5cdf6a464a56f0671035fa8578ac Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Fri, 5 Apr 2024 13:58:51 +1000
Subject: [PATCH 4/5] arm64: tlb: Improve __TLBI_VADDR_RANGE()

commit e07255d69702bc9131427fda8f9749355b10780f upstream

The macro returns the operand of TLBI RANGE instruction. A mask needs
to be applied to each individual field upon producing the operand, to
avoid the adjacent fields can interfere with each other when invalid
arguments have been provided. The code looks more tidy at least with
a mask and FIELD_PREP().

Suggested-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Reviewed-by: Shaoqin Huang <shahuang@redhat.com>
Link: https://lore.kernel.org/r/20240405035852.1532010-3-gshan@redhat.com
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Xie Xiaodong <624338359@qq.com>
---
 arch/arm64/include/asm/tlbflush.h | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index e30854715311..b4e4267d4312 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -139,16 +139,24 @@ static inline unsigned long get_trans_granule(void)
  * [BADDR, BADDR + (NUM + 1) * 2^(5*SCALE + 1) * PAGESIZE)
  *
  */
+#define TLBIR_ASID_MASK		GENMASK_ULL(63, 48)
+#define TLBIR_TG_MASK		GENMASK_ULL(47, 46)
+#define TLBIR_SCALE_MASK	GENMASK_ULL(45, 44)
+#define TLBIR_NUM_MASK		GENMASK_ULL(43, 39)
+#define TLBIR_TTL_MASK		GENMASK_ULL(38, 37)
+#define TLBIR_BADDR_MASK	GENMASK_ULL(36,  0)
+
 #define __TLBI_VADDR_RANGE(addr, asid, scale, num, ttl)		\
-	({							\
-		unsigned long __ta = (addr) >> PAGE_SHIFT;	\
-		__ta &= GENMASK_ULL(36, 0);			\
-		__ta |= (unsigned long)(ttl) << 37;		\
-		__ta |= (unsigned long)(num) << 39;		\
-		__ta |= (unsigned long)(scale) << 44;		\
-		__ta |= get_trans_granule() << 46;		\
-		__ta |= (unsigned long)(asid) << 48;		\
-		__ta;						\
+	({								\
+		unsigned long __ta = 0;					\
+		unsigned long __ttl = (ttl >= 1 && ttl <= 3) ? ttl : 0;	\
+		__ta |= FIELD_PREP(TLBIR_BADDR_MASK, (addr) >> PAGE_SHIFT); \
+		__ta |= FIELD_PREP(TLBIR_TTL_MASK, __ttl);		\
+		__ta |= FIELD_PREP(TLBIR_NUM_MASK, num);		\
+		__ta |= FIELD_PREP(TLBIR_SCALE_MASK, scale);		\
+		__ta |= FIELD_PREP(TLBIR_TG_MASK, get_trans_granule());	\
+		__ta |= FIELD_PREP(TLBIR_ASID_MASK, asid);		\
+		__ta;							\
 	})
 
 /* These macros are used by the TLBI RANGE feature. */
-- 
Gitee


From 48f9b5ef8d43ce7ae866df6d6106a1c7a962ca20 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Fri, 5 Apr 2024 13:58:52 +1000
Subject: [PATCH 5/5] arm64: tlb: Allow range operation for
 MAX_TLBI_RANGE_PAGES

commit 73301e464a72a0d007d0d4e0f4d3dab5c58125bf upstream

MAX_TLBI_RANGE_PAGES pages is covered by SCALE#3 and NUM#31 and it's
supported now. Allow TLBI RANGE operation when the number of pages is
equal to MAX_TLBI_RANGE_PAGES in __flush_tlb_range_nosync().

Suggested-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Shaoqin Huang <shahuang@redhat.com>
Link: https://lore.kernel.org/r/20240405035852.1532010-4-gshan@redhat.com
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Xie Xiaodong <624338359@qq.com>
---
 arch/arm64/include/asm/tlbflush.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index b4e4267d4312..5c5baa7cef53 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -371,11 +371,11 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
 	 * When not uses TLB range ops, we can handle up to
 	 * (MAX_TLBI_OPS - 1) pages;
 	 * When uses TLB range ops, we can handle up to
-	 * (MAX_TLBI_RANGE_PAGES - 1) pages.
+	 * MAX_TLBI_RANGE_PAGES pages.
 	 */
 	if ((!system_supports_tlb_range() &&
 	     (end - start) >= (MAX_TLBI_OPS * stride)) ||
-	    pages >= MAX_TLBI_RANGE_PAGES) {
+	    pages > MAX_TLBI_RANGE_PAGES) {
 		flush_tlb_mm(vma->vm_mm);
 		return;
 	}
-- 
Gitee