[SRU][O:linux-intel][PATCH v2 3/3] UBUNTU: SAUCE: KVM: x86: Make cpu_dirty_log_size a per-VM value

Fri Mar 7 12:33:17 UTC 2025

BugLink: https://bugs.launchpad.net/bugs/2101083

Make cpu_dirty_log_size (CPU's dirty log buffer size) a per-VM value and
set the per-VM cpu_dirty_log_size only for normal VMs when PML is enabled.
Do not set it for TDs.

Until now, cpu_dirty_log_size was a system-wide value that is used for
all VMs and is set to the PML buffer size when PML was enabled in VMX.
However, PML is not currently supported for TDs, though PML remains
available for normal VMs as long as the feature is supported by hardware
and enabled in VMX.

Making cpu_dirty_log_size a per-VM value allows it to be ther PML buffer
size for normal VMs and 0 for TDs. This allows functions like
kvm_arch_sync_dirty_log() and kvm_mmu_update_cpu_dirty_logging() to
determine if PML is supported, in order to kick off vCPUs or request them
to update CPU dirty logging status (turn on/off PML in VMCS).

This fixes an issue first reported in [1], where QEMU attaches an
emulated VGA device to a TD; note that KVM_MEM_LOG_DIRTY_PAGES
still works if the corresponding has no flag KVM_MEM_GUEST_MEMFD.
KVM then invokes kvm_mmu_update_cpu_dirty_logging() and from there
vmx_update_cpu_dirty_logging(), which incorrectly accesses a kvm_vmx
struct for a TDX VM.

Reported-by: ANAND NARSHINHA PATIL <Anand.N.Patil at ibm.com>
Reported-by: Pedro Principeza <pedro.principeza at canonical.com>
Reported-by: Farrah Chen <farrah.chen at intel.com>
Closes: https://github.com/canonical/tdx/issues/202
Link: https://github.com/canonical/tdx/issues/202 [1]
Suggested-by: Kai Huang <kai.huang at intel.com>
Signed-off-by: Yan Zhao <yan.y.zhao at intel.com>
Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
Link: https://lore.kernel.org/all/20250226195529.2314580-28-pbonzini@redhat.com/
Signed-off-by: Thibault Ferrante <thibault.ferrante at canonical.com>
---
 arch/x86/include/asm/kvm_host.h | 11 ++++++-----
 arch/x86/kvm/mmu/mmu.c          |  4 ++--
 arch/x86/kvm/mmu/mmu_internal.h |  2 +-
 arch/x86/kvm/vmx/main.c         |  1 -
 arch/x86/kvm/vmx/vmx.c          |  5 ++---
 arch/x86/kvm/x86.c              |  6 +++---
 6 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index fddbb417463f..c686acb30e10 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1583,6 +1583,12 @@ struct kvm_arch {
 	struct kvm_mmu_memory_cache split_desc_cache;
 
 	gfn_t gfn_shared_mask;
+	/*
+	 * Size of the CPU's dirty log buffer, i.e. VMX's PML buffer.  A Zero
+	 * value indicates CPU dirty logging is unsupported or disabled in
+	 * current VM.
+	 */
+	int cpu_dirty_log_size;
 };
 
 struct kvm_vm_stat {
@@ -1835,11 +1841,6 @@ struct kvm_x86_ops {
 			       struct x86_exception *exception);
 	void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu);
 
-	/*
-	 * Size of the CPU's dirty log buffer, i.e. VMX's PML buffer.  A zero
-	 * value indicates CPU dirty logging is unsupported or disabled.
-	 */
-	int cpu_dirty_log_size;
 	void (*update_cpu_dirty_logging)(struct kvm_vcpu *vcpu);
 
 	const struct kvm_x86_nested_ops *nested_ops;
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 6263739ba9d3..a420f2264038 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1420,7 +1420,7 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
 	}
 
 	/* Now handle 4K PTEs.  */
-	if (kvm_x86_ops.cpu_dirty_log_size)
+	if (kvm->arch.cpu_dirty_log_size)
 		kvm_mmu_clear_dirty_pt_masked(kvm, slot, gfn_offset, mask);
 	else
 		kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
@@ -1428,7 +1428,7 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
 
 int kvm_cpu_dirty_log_size(struct kvm *kvm)
 {
-	return kvm_x86_ops.cpu_dirty_log_size;
+	return kvm->arch.cpu_dirty_log_size;
 }
 
 bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index 47288806f32a..d0044405c210 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -236,7 +236,7 @@ static inline bool kvm_mmu_page_ad_need_write_protect(struct kvm *kvm,
 	 * being enabled is mandatory as the bits used to denote WP-only SPTEs
 	 * are reserved for PAE paging (32-bit KVM).
 	 */
-	return kvm_x86_ops.cpu_dirty_log_size && sp->role.guest_mode;
+	return kvm->arch.cpu_dirty_log_size && sp->role.guest_mode;
 }
 
 static inline gfn_t gfn_round_for_level(gfn_t gfn, int level)
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index c700b7a9d63b..0651eda3b191 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -1126,7 +1126,6 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
 	.check_intercept = vt_check_intercept,
 	.handle_exit_irqoff = vt_handle_exit_irqoff,
 
-	.cpu_dirty_log_size = PML_ENTITY_NUM,
 	.update_cpu_dirty_logging = vt_update_cpu_dirty_logging,
 
 	.nested_ops = &vmx_nested_ops,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index a43dc3c2a733..e19f5ae7266c 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7506,6 +7506,8 @@ int vmx_vm_init(struct kvm *kvm)
 			break;
 		}
 	}
+	if (enable_pml)
+		kvm->arch.cpu_dirty_log_size = PML_ENTITY_NUM;
 	return 0;
 }
 
@@ -8366,9 +8368,6 @@ __init int vmx_hardware_setup(void)
 	if (!enable_ept || !enable_ept_ad_bits || !cpu_has_vmx_pml())
 		enable_pml = 0;
 
-	if (!enable_pml)
-		vt_x86_ops.cpu_dirty_log_size = 0;
-
 	if (!cpu_has_vmx_preemption_timer())
 		enable_preemption_timer = false;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3cc447db6c11..128e7982f078 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6561,7 +6561,7 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
 	struct kvm_vcpu *vcpu;
 	unsigned long i;
 
-	if (!kvm_x86_ops.cpu_dirty_log_size)
+	if (!kvm->arch.cpu_dirty_log_size)
 		return;
 
 	kvm_for_each_vcpu(i, vcpu, kvm)
@@ -13080,7 +13080,7 @@ static void kvm_mmu_update_cpu_dirty_logging(struct kvm *kvm, bool enable)
 {
 	int nr_slots;
 
-	if (!kvm_x86_ops.cpu_dirty_log_size)
+	if (!kvm->arch.cpu_dirty_log_size)
 		return;
 
 	nr_slots = atomic_read(&kvm->nr_memslots_dirty_logging);
@@ -13156,7 +13156,7 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
 		if (READ_ONCE(eager_page_split))
 			kvm_mmu_slot_try_split_huge_pages(kvm, new, PG_LEVEL_4K);
 
-		if (kvm_x86_ops.cpu_dirty_log_size) {
+		if (kvm->arch.cpu_dirty_log_size) {
 			kvm_mmu_slot_leaf_clear_dirty(kvm, new);
 			kvm_mmu_slot_remove_write_access(kvm, new, PG_LEVEL_2M);
 		} else {
-- 
2.45.2