diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 850ef4a6d3894f5445643ea7a4bf0b570c7b69ec..cbca2fc7fd318b224fe32557ceb8ddbe94cb62b7 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -693,7 +693,7 @@ void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu);
 void kvm_set_ipa_limit(void);
 
 #define __KVM_HAVE_ARCH_VM_ALLOC
-#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
 #define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
 struct kvm *kvm_arch_alloc_vm(void);
 void kvm_arch_free_vm(struct kvm *kvm);
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index 11e9d8bd8b75e68500aa5fb6d115fcada6cc630c..5c5baa7cef5362bcd0ee783c50e7022240832883 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -139,16 +139,24 @@ static inline unsigned long get_trans_granule(void)
  * [BADDR, BADDR + (NUM + 1) * 2^(5*SCALE + 1) * PAGESIZE)
  *
  */
+#define TLBIR_ASID_MASK		GENMASK_ULL(63, 48)
+#define TLBIR_TG_MASK		GENMASK_ULL(47, 46)
+#define TLBIR_SCALE_MASK	GENMASK_ULL(45, 44)
+#define TLBIR_NUM_MASK		GENMASK_ULL(43, 39)
+#define TLBIR_TTL_MASK		GENMASK_ULL(38, 37)
+#define TLBIR_BADDR_MASK	GENMASK_ULL(36,  0)
+
 #define __TLBI_VADDR_RANGE(addr, asid, scale, num, ttl)		\
-	({							\
-		unsigned long __ta = (addr) >> PAGE_SHIFT;	\
-		__ta &= GENMASK_ULL(36, 0);			\
-		__ta |= (unsigned long)(ttl) << 37;		\
-		__ta |= (unsigned long)(num) << 39;		\
-		__ta |= (unsigned long)(scale) << 44;		\
-		__ta |= get_trans_granule() << 46;		\
-		__ta |= (unsigned long)(asid) << 48;		\
-		__ta;						\
+	({								\
+		unsigned long __ta = 0;					\
+		unsigned long __ttl = (ttl >= 1 && ttl <= 3) ? ttl : 0;	\
+		__ta |= FIELD_PREP(TLBIR_BADDR_MASK, (addr) >> PAGE_SHIFT); \
+		__ta |= FIELD_PREP(TLBIR_TTL_MASK, __ttl);		\
+		__ta |= FIELD_PREP(TLBIR_NUM_MASK, num);		\
+		__ta |= FIELD_PREP(TLBIR_SCALE_MASK, scale);		\
+		__ta |= FIELD_PREP(TLBIR_TG_MASK, get_trans_granule());	\
+		__ta |= FIELD_PREP(TLBIR_ASID_MASK, asid);		\
+		__ta;							\
 	})
 
 /* These macros are used by the TLBI RANGE feature. */
@@ -157,12 +165,17 @@ static inline unsigned long get_trans_granule(void)
 #define MAX_TLBI_RANGE_PAGES		__TLBI_RANGE_PAGES(31, 3)
 
 /*
- * Generate 'num' values from -1 to 30 with -1 rejected by the
- * __flush_tlb_range() loop below.
+ * Generate 'num' values from -1 to 31 with -1 rejected by the
+ * __flush_tlb_range() loop below. Its return value is only significant
+ * for a maximum of MAX_TLBI_RANGE_PAGES pages. If 'pages' is more than
+ * that, you must iterate over the overall range.
  */
-#define TLBI_RANGE_MASK			GENMASK_ULL(4, 0)
-#define __TLBI_RANGE_NUM(pages, scale)	\
-	((((pages) >> (5 * (scale) + 1)) & TLBI_RANGE_MASK) - 1)
+#define __TLBI_RANGE_NUM(pages, scale)					\
+	({								\
+		unsigned long __pages = min_t(unsigned long, (pages),	\
+				__TLBI_RANGE_PAGES(31, (scale)));	\
+		(__pages >> (5 * (scale) + 1)) - 1;			\
+	})
 
 /*
  *	TLB Invalidation
@@ -298,29 +311,25 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
  * entries one by one at the granularity of 'stride'. If the TLB
  * range ops are supported, then:
  *
- * 1. If 'pages' is odd, flush the first page through non-range
- *    operations;
- *
- * 2. For remaining pages: the minimum range granularity is decided
- *    by 'scale', so multiple range TLBI operations may be required.
- *    Start from scale = 0, flush the corresponding number of pages
- *    ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
- *    until no pages left.
+ * 1. The minimum range granularity is decided by 'scale', so multiple range
+ *    TLBI operations may be required. Start from scale = 3, flush the largest
+ *    possible number of pages ((num+1)*2^(5*scale+1)) that fit into the
+ *    requested range, then decrement scale and continue until one or zero pages
+ *    are left.
  *
- * Note that certain ranges can be represented by either num = 31 and
- * scale or num = 0 and scale + 1. The loop below favours the latter
- * since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
+ * 2. If there is 1 page remaining, flush it through non-range operations. Range
+ *    operations can only span an even number of pages.
  */
 #define __flush_tlb_range_op(op, start, pages, stride,			\
 				asid, tlb_level, tlbi_user)		\
 do {									\
 	int num = 0;							\
-	int scale = 0;							\
+	int scale = 3;							\
 	unsigned long addr;						\
 									\
 	while (pages > 0) {						\
 		if (!system_supports_tlb_range() ||			\
-		    pages % 2 == 1) {					\
+		    pages == 1) {					\
 			addr = __TLBI_VADDR(start, asid);		\
 			__tlbi_level(op, addr, tlb_level);		\
 			if (tlbi_user)					\
@@ -340,7 +349,7 @@ do {									\
 			start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \
 			pages -= __TLBI_RANGE_PAGES(num, scale);	\
 		}							\
-		scale++;						\
+		scale--;						\
 	}								\
 } while (0)
 
@@ -362,11 +371,11 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
 	 * When not uses TLB range ops, we can handle up to
 	 * (MAX_TLBI_OPS - 1) pages;
 	 * When uses TLB range ops, we can handle up to
-	 * (MAX_TLBI_RANGE_PAGES - 1) pages.
+	 * MAX_TLBI_RANGE_PAGES pages.
 	 */
 	if ((!system_supports_tlb_range() &&
 	     (end - start) >= (MAX_TLBI_OPS * stride)) ||
-	    pages >= MAX_TLBI_RANGE_PAGES) {
+	    pages > MAX_TLBI_RANGE_PAGES) {
 		flush_tlb_mm(vma->vm_mm);
 		return;
 	}
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 1d0a3791c01768da4bbd6bc494e5e5d78ef8da18..ca5401afc1fb326b06b9bfc22f64e16da28fbe41 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -24,6 +24,7 @@ config KVM
 	select MMU_NOTIFIER
 	select PREEMPT_NOTIFIERS
 	select HAVE_KVM_CPU_RELAX_INTERCEPT
+	select HAVE_KVM_ARCH_TLB_FLUSH_ALL
 	select KVM_MMIO
 	select KVM_ARM_HOST
 	select KVM_GENERIC_DIRTYLOG_READ_PROTECT
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9019db30b8a762857429e0c92ebcb070b7351044..ac29d18293fb8e15f1e9307f784100809b07983d 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -931,13 +931,13 @@ static inline void kvm_arch_free_vm(struct kvm *kvm)
 }
 #endif
 
-#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
-static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
+#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
+static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
 {
 	return -ENOTSUPP;
 }
 #else
-int kvm_arch_flush_remote_tlbs(struct kvm *kvm);
+int kvm_arch_flush_remote_tlb(struct kvm *kvm);
 #endif
 
 #ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 6ec39b52214c7533ce9c6a650a9808804e1b3016..aad9284c043a029481072a8d802400902fd730fb 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -36,6 +36,9 @@ config HAVE_KVM_CPU_RELAX_INTERCEPT
 config KVM_VFIO
        bool
 
+config HAVE_KVM_ARCH_TLB_FLUSH_ALL
+       bool
+
 config HAVE_KVM_INVALID_WAKEUPS
        bool
 
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 254537f5ed00ebb07e5b22c56b1f845af96ca086..34ef6ed11b2025f2253229331ef59456687c71ab 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -1274,11 +1274,12 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
  * Steps 1-4 below provide general overview of dirty page logging. See
  * kvm_get_dirty_log_protect() function description for additional details.
  *
- * We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we
- * always flush the TLB (step 4) even if previous step failed  and the dirty
- * bitmap may be corrupt. Regardless of previous outcome the KVM logging API
- * does not preclude user space subsequent dirty log read. Flushing TLB ensures
- * writes will be marked dirty for next log read.
+ * We call kvm_get_dirty_log_protect() to handle steps 1-4. The helper
+ * flushes the relevant memslot TLBs when needed, even if the subsequent
+ * copy_to_user() fails and the dirty bitmap may be corrupt. Regardless of
+ * previous outcome the KVM logging API does not preclude user space
+ * subsequent dirty log read. Flushing TLB ensures writes will be marked
+ * dirty for next log read.
  *
  *   1. Take a snapshot of the bit and clear it if needed.
  *   2. Write protect the corresponding page.
@@ -1294,9 +1295,6 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
 
 	r = kvm_get_dirty_log_protect(kvm, log, &flush);
 
-	if (flush)
-		kvm_flush_remote_tlbs(kvm);
-
 	mutex_unlock(&kvm->slots_lock);
 	return r;
 }
@@ -1310,9 +1308,6 @@ int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, struct kvm_clear_dirty_log *lo
 
 	r = kvm_clear_dirty_log_protect(kvm, log, &flush);
 
-	if (flush)
-		kvm_flush_remote_tlbs(kvm);
-
 	mutex_unlock(&kvm->slots_lock);
 	return r;
 }
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index abb5f76454b5baeb1529748a7b31c7602c0d7511..0d293f9f1b393493fd7b1b4108f3a769732daf55 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -49,12 +49,12 @@ static bool memslot_is_logging(struct kvm_memory_slot *memslot)
 }
 
 /**
- * kvm_arch_flush_remote_tlbs() - flush all VM TLB entries for v7/8
+ * kvm_arch_flush_remote_tlb() - flush all VM TLB entries for v7/8
  * @kvm:	pointer to kvm structure.
  *
  * Interface to HYP function to flush all VM TLB entries
  */
-int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
+int kvm_arch_flush_remote_tlb(struct kvm *kvm)
 {
 	kvm_call_hyp(__kvm_tlb_flush_vmid, kvm);
 	return 0;
@@ -429,10 +429,14 @@ static void __unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size,
 		next = stage2_pgd_addr_end(kvm, addr, end);
 		if (!stage2_pgd_none(kvm, *pgd))
 			unmap_stage2_puds(kvm, pgd, addr, next);
-		
+
 		if (stage2_unmap_defer_tlb_flush())
-			/* Perform the deferred TLB invalidations */
-			kvm_tlb_flush_vmid_range(kvm, addr, size);
+			/*
+			 * Mirror the upstream chunked unmap semantics by
+			 * completing deferred range invalidation for this
+			 * walker chunk before the next iteration can resched.
+			 */
+			kvm_tlb_flush_vmid_range(kvm, addr, next - addr);
 
 		/*
 		 * If the range is too large, release the kvm->mmu_lock
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ac74b31efb811f23135cf8941b84d22e8b5c5609..ceb70dc7e7f0d4ef34566c80e9029b136adfcfa3 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -341,7 +341,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
 	 * kvm_make_all_cpus_request() reads vcpu->mode. We reuse that
 	 * barrier here.
 	 */
-	if (!kvm_arch_flush_remote_tlbs(kvm)
+	if (!kvm_arch_flush_remote_tlb(kvm)
 	    || kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
 		++kvm->stat.remote_tlb_flush;
 	cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
@@ -1343,8 +1343,10 @@ int kvm_get_dirty_log_protect(struct kvm *kvm,
 		}
 		spin_unlock(&kvm->mmu_lock);
 	}
-	if (flush)
+	if (*flush) {
 		kvm_flush_remote_tlbs_memslot(kvm, memslot);
+		*flush = false;
+	}
 	if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
 		return -EFAULT;
 	return 0;
@@ -1420,9 +1422,10 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm,
 		}
 	}
 	spin_unlock(&kvm->mmu_lock);
-	
-	if(flush)
+	if (*flush) {
 		kvm_flush_remote_tlbs_memslot(kvm, memslot);
+		*flush = false;
+	}
 
 	return 0;
 }