diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 850ef4a6d3894f5445643ea7a4bf0b570c7b69ec..cbca2fc7fd318b224fe32557ceb8ddbe94cb62b7 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -693,7 +693,7 @@ void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu); void kvm_set_ipa_limit(void); #define __KVM_HAVE_ARCH_VM_ALLOC -#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS +#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB #define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE struct kvm *kvm_arch_alloc_vm(void); void kvm_arch_free_vm(struct kvm *kvm); diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 11e9d8bd8b75e68500aa5fb6d115fcada6cc630c..5c5baa7cef5362bcd0ee783c50e7022240832883 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -139,16 +139,24 @@ static inline unsigned long get_trans_granule(void) * [BADDR, BADDR + (NUM + 1) * 2^(5*SCALE + 1) * PAGESIZE) * */ +#define TLBIR_ASID_MASK GENMASK_ULL(63, 48) +#define TLBIR_TG_MASK GENMASK_ULL(47, 46) +#define TLBIR_SCALE_MASK GENMASK_ULL(45, 44) +#define TLBIR_NUM_MASK GENMASK_ULL(43, 39) +#define TLBIR_TTL_MASK GENMASK_ULL(38, 37) +#define TLBIR_BADDR_MASK GENMASK_ULL(36, 0) + #define __TLBI_VADDR_RANGE(addr, asid, scale, num, ttl) \ - ({ \ - unsigned long __ta = (addr) >> PAGE_SHIFT; \ - __ta &= GENMASK_ULL(36, 0); \ - __ta |= (unsigned long)(ttl) << 37; \ - __ta |= (unsigned long)(num) << 39; \ - __ta |= (unsigned long)(scale) << 44; \ - __ta |= get_trans_granule() << 46; \ - __ta |= (unsigned long)(asid) << 48; \ - __ta; \ + ({ \ + unsigned long __ta = 0; \ + unsigned long __ttl = (ttl >= 1 && ttl <= 3) ? ttl : 0; \ + __ta |= FIELD_PREP(TLBIR_BADDR_MASK, (addr) >> PAGE_SHIFT); \ + __ta |= FIELD_PREP(TLBIR_TTL_MASK, __ttl); \ + __ta |= FIELD_PREP(TLBIR_NUM_MASK, num); \ + __ta |= FIELD_PREP(TLBIR_SCALE_MASK, scale); \ + __ta |= FIELD_PREP(TLBIR_TG_MASK, get_trans_granule()); \ + __ta |= FIELD_PREP(TLBIR_ASID_MASK, asid); \ + __ta; \ }) /* These macros are used by the TLBI RANGE feature. */ @@ -157,12 +165,17 @@ static inline unsigned long get_trans_granule(void) #define MAX_TLBI_RANGE_PAGES __TLBI_RANGE_PAGES(31, 3) /* - * Generate 'num' values from -1 to 30 with -1 rejected by the - * __flush_tlb_range() loop below. + * Generate 'num' values from -1 to 31 with -1 rejected by the + * __flush_tlb_range() loop below. Its return value is only significant + * for a maximum of MAX_TLBI_RANGE_PAGES pages. If 'pages' is more than + * that, you must iterate over the overall range. */ -#define TLBI_RANGE_MASK GENMASK_ULL(4, 0) -#define __TLBI_RANGE_NUM(pages, scale) \ - ((((pages) >> (5 * (scale) + 1)) & TLBI_RANGE_MASK) - 1) +#define __TLBI_RANGE_NUM(pages, scale) \ + ({ \ + unsigned long __pages = min_t(unsigned long, (pages), \ + __TLBI_RANGE_PAGES(31, (scale))); \ + (__pages >> (5 * (scale) + 1)) - 1; \ + }) /* * TLB Invalidation @@ -298,29 +311,25 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, * entries one by one at the granularity of 'stride'. If the TLB * range ops are supported, then: * - * 1. If 'pages' is odd, flush the first page through non-range - * operations; - * - * 2. For remaining pages: the minimum range granularity is decided - * by 'scale', so multiple range TLBI operations may be required. - * Start from scale = 0, flush the corresponding number of pages - * ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it - * until no pages left. + * 1. The minimum range granularity is decided by 'scale', so multiple range + * TLBI operations may be required. Start from scale = 3, flush the largest + * possible number of pages ((num+1)*2^(5*scale+1)) that fit into the + * requested range, then decrement scale and continue until one or zero pages + * are left. * - * Note that certain ranges can be represented by either num = 31 and - * scale or num = 0 and scale + 1. The loop below favours the latter - * since num is limited to 30 by the __TLBI_RANGE_NUM() macro. + * 2. If there is 1 page remaining, flush it through non-range operations. Range + * operations can only span an even number of pages. */ #define __flush_tlb_range_op(op, start, pages, stride, \ asid, tlb_level, tlbi_user) \ do { \ int num = 0; \ - int scale = 0; \ + int scale = 3; \ unsigned long addr; \ \ while (pages > 0) { \ if (!system_supports_tlb_range() || \ - pages % 2 == 1) { \ + pages == 1) { \ addr = __TLBI_VADDR(start, asid); \ __tlbi_level(op, addr, tlb_level); \ if (tlbi_user) \ @@ -340,7 +349,7 @@ do { \ start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \ pages -= __TLBI_RANGE_PAGES(num, scale); \ } \ - scale++; \ + scale--; \ } \ } while (0) @@ -362,11 +371,11 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, * When not uses TLB range ops, we can handle up to * (MAX_TLBI_OPS - 1) pages; * When uses TLB range ops, we can handle up to - * (MAX_TLBI_RANGE_PAGES - 1) pages. + * MAX_TLBI_RANGE_PAGES pages. */ if ((!system_supports_tlb_range() && (end - start) >= (MAX_TLBI_OPS * stride)) || - pages >= MAX_TLBI_RANGE_PAGES) { + pages > MAX_TLBI_RANGE_PAGES) { flush_tlb_mm(vma->vm_mm); return; } diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 1d0a3791c01768da4bbd6bc494e5e5d78ef8da18..ca5401afc1fb326b06b9bfc22f64e16da28fbe41 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -24,6 +24,7 @@ config KVM select MMU_NOTIFIER select PREEMPT_NOTIFIERS select HAVE_KVM_CPU_RELAX_INTERCEPT + select HAVE_KVM_ARCH_TLB_FLUSH_ALL select KVM_MMIO select KVM_ARM_HOST select KVM_GENERIC_DIRTYLOG_READ_PROTECT diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9019db30b8a762857429e0c92ebcb070b7351044..ac29d18293fb8e15f1e9307f784100809b07983d 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -931,13 +931,13 @@ static inline void kvm_arch_free_vm(struct kvm *kvm) } #endif -#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS -static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm) +#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB +static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm) { return -ENOTSUPP; } #else -int kvm_arch_flush_remote_tlbs(struct kvm *kvm); +int kvm_arch_flush_remote_tlb(struct kvm *kvm); #endif #ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 6ec39b52214c7533ce9c6a650a9808804e1b3016..aad9284c043a029481072a8d802400902fd730fb 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -36,6 +36,9 @@ config HAVE_KVM_CPU_RELAX_INTERCEPT config KVM_VFIO bool +config HAVE_KVM_ARCH_TLB_FLUSH_ALL + bool + config HAVE_KVM_INVALID_WAKEUPS bool diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 254537f5ed00ebb07e5b22c56b1f845af96ca086..34ef6ed11b2025f2253229331ef59456687c71ab 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -1274,11 +1274,12 @@ long kvm_arch_vcpu_ioctl(struct file *filp, * Steps 1-4 below provide general overview of dirty page logging. See * kvm_get_dirty_log_protect() function description for additional details. * - * We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we - * always flush the TLB (step 4) even if previous step failed and the dirty - * bitmap may be corrupt. Regardless of previous outcome the KVM logging API - * does not preclude user space subsequent dirty log read. Flushing TLB ensures - * writes will be marked dirty for next log read. + * We call kvm_get_dirty_log_protect() to handle steps 1-4. The helper + * flushes the relevant memslot TLBs when needed, even if the subsequent + * copy_to_user() fails and the dirty bitmap may be corrupt. Regardless of + * previous outcome the KVM logging API does not preclude user space + * subsequent dirty log read. Flushing TLB ensures writes will be marked + * dirty for next log read. * * 1. Take a snapshot of the bit and clear it if needed. * 2. Write protect the corresponding page. @@ -1294,9 +1295,6 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) r = kvm_get_dirty_log_protect(kvm, log, &flush); - if (flush) - kvm_flush_remote_tlbs(kvm); - mutex_unlock(&kvm->slots_lock); return r; } @@ -1310,9 +1308,6 @@ int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, struct kvm_clear_dirty_log *lo r = kvm_clear_dirty_log_protect(kvm, log, &flush); - if (flush) - kvm_flush_remote_tlbs(kvm); - mutex_unlock(&kvm->slots_lock); return r; } diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index abb5f76454b5baeb1529748a7b31c7602c0d7511..0d293f9f1b393493fd7b1b4108f3a769732daf55 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -49,12 +49,12 @@ static bool memslot_is_logging(struct kvm_memory_slot *memslot) } /** - * kvm_arch_flush_remote_tlbs() - flush all VM TLB entries for v7/8 + * kvm_arch_flush_remote_tlb() - flush all VM TLB entries for v7/8 * @kvm: pointer to kvm structure. * * Interface to HYP function to flush all VM TLB entries */ -int kvm_arch_flush_remote_tlbs(struct kvm *kvm) +int kvm_arch_flush_remote_tlb(struct kvm *kvm) { kvm_call_hyp(__kvm_tlb_flush_vmid, kvm); return 0; @@ -429,10 +429,14 @@ static void __unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size, next = stage2_pgd_addr_end(kvm, addr, end); if (!stage2_pgd_none(kvm, *pgd)) unmap_stage2_puds(kvm, pgd, addr, next); - + if (stage2_unmap_defer_tlb_flush()) - /* Perform the deferred TLB invalidations */ - kvm_tlb_flush_vmid_range(kvm, addr, size); + /* + * Mirror the upstream chunked unmap semantics by + * completing deferred range invalidation for this + * walker chunk before the next iteration can resched. + */ + kvm_tlb_flush_vmid_range(kvm, addr, next - addr); /* * If the range is too large, release the kvm->mmu_lock diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index ac74b31efb811f23135cf8941b84d22e8b5c5609..ceb70dc7e7f0d4ef34566c80e9029b136adfcfa3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -341,7 +341,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm) * kvm_make_all_cpus_request() reads vcpu->mode. We reuse that * barrier here. */ - if (!kvm_arch_flush_remote_tlbs(kvm) + if (!kvm_arch_flush_remote_tlb(kvm) || kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) ++kvm->stat.remote_tlb_flush; cmpxchg(&kvm->tlbs_dirty, dirty_count, 0); @@ -1343,8 +1343,10 @@ int kvm_get_dirty_log_protect(struct kvm *kvm, } spin_unlock(&kvm->mmu_lock); } - if (flush) + if (*flush) { kvm_flush_remote_tlbs_memslot(kvm, memslot); + *flush = false; + } if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n)) return -EFAULT; return 0; @@ -1420,9 +1422,10 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm, } } spin_unlock(&kvm->mmu_lock); - - if(flush) + if (*flush) { kvm_flush_remote_tlbs_memslot(kvm, memslot); + *flush = false; + } return 0; }