[SRU][N:linux-intel][PATCH] UBUNTU: SAUCE: KVM: x86: Make cpu_dirty_log_size a per-VM value

Thibault Ferrante thibault.ferrante at canonical.com
Fri Mar 7 12:27:36 UTC 2025


BugLink: https://bugs.launchpad.net/bugs/2101083

Make cpu_dirty_log_size (CPU's dirty log buffer size) a per-VM value and
set the per-VM cpu_dirty_log_size only for normal VMs when PML is enabled.
Do not set it for TDs.

Until now, cpu_dirty_log_size was a system-wide value that is used for
all VMs and is set to the PML buffer size when PML was enabled in VMX.
However, PML is not currently supported for TDs, though PML remains
available for normal VMs as long as the feature is supported by hardware
and enabled in VMX.

Making cpu_dirty_log_size a per-VM value allows it to be ther PML buffer
size for normal VMs and 0 for TDs. This allows functions like
kvm_arch_sync_dirty_log() and kvm_mmu_update_cpu_dirty_logging() to
determine if PML is supported, in order to kick off vCPUs or request them
to update CPU dirty logging status (turn on/off PML in VMCS).

This fixes an issue first reported in [1], where QEMU attaches an
emulated VGA device to a TD; note that KVM_MEM_LOG_DIRTY_PAGES
still works if the corresponding has no flag KVM_MEM_GUEST_MEMFD.
KVM then invokes kvm_mmu_update_cpu_dirty_logging() and from there
vmx_update_cpu_dirty_logging(), which incorrectly accesses a kvm_vmx
struct for a TDX VM.

Reported-by: ANAND NARSHINHA PATIL <Anand.N.Patil at ibm.com>
Reported-by: Pedro Principeza <pedro.principeza at canonical.com>
Reported-by: Farrah Chen <farrah.chen at intel.com>
Closes: https://github.com/canonical/tdx/issues/202
Link: https://github.com/canonical/tdx/issues/202 [1]
Suggested-by: Kai Huang <kai.huang at intel.com>
Signed-off-by: Yan Zhao <yan.y.zhao at intel.com>
Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
Link: https://lore.kernel.org/all/20250226195529.2314580-28-pbonzini@redhat.com/
Signed-off-by: Thibault Ferrante <thibault.ferrante at canonical.com>
---
 arch/x86/include/asm/kvm_host.h | 11 ++++++-----
 arch/x86/kvm/mmu/mmu.c          |  4 ++--
 arch/x86/kvm/mmu/mmu_internal.h |  2 +-
 arch/x86/kvm/vmx/main.c         |  1 -
 arch/x86/kvm/vmx/vmx.c          |  5 ++---
 arch/x86/kvm/x86.c              |  6 +++---
 6 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index fc16c79df562..e0019f4d2980 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1550,6 +1550,12 @@ struct kvm_arch {
 	struct kvm_mmu_memory_cache split_desc_cache;
 
 	gfn_t gfn_shared_mask;
+	/*
+	 * Size of the CPU's dirty log buffer, i.e. VMX's PML buffer. A Zero
+	 * value indicates CPU dirty logging is unsupported or disabled in
+	 * current VM.
+	 */
+	int cpu_dirty_log_size;
 };
 
 struct kvm_vm_stat {
@@ -1806,11 +1812,6 @@ struct kvm_x86_ops {
 
 	void (*sched_in)(struct kvm_vcpu *vcpu, int cpu);
 
-	/*
-	 * Size of the CPU's dirty log buffer, i.e. VMX's PML buffer.  A zero
-	 * value indicates CPU dirty logging is unsupported or disabled.
-	 */
-	int cpu_dirty_log_size;
 	void (*update_cpu_dirty_logging)(struct kvm_vcpu *vcpu);
 
 	const struct kvm_x86_nested_ops *nested_ops;
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index ea538bbf62a9..dd7b42118772 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1408,7 +1408,7 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
 	}
 
 	/* Now handle 4K PTEs.  */
-	if (kvm_x86_ops.cpu_dirty_log_size)
+	if (kvm->arch.cpu_dirty_log_size)
 		kvm_mmu_clear_dirty_pt_masked(kvm, slot, gfn_offset, mask);
 	else
 		kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
@@ -1416,7 +1416,7 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
 
 int kvm_cpu_dirty_log_size(struct kvm *kvm)
 {
-	return kvm_x86_ops.cpu_dirty_log_size;
+	return kvm->arch.cpu_dirty_log_size;
 }
 
 bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index b46abb0b3c80..12b720ce6c0c 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -236,7 +236,7 @@ static inline bool kvm_mmu_page_ad_need_write_protect(struct kvm *kvm,
 	 * being enabled is mandatory as the bits used to denote WP-only SPTEs
 	 * are reserved for PAE paging (32-bit KVM).
 	 */
-	return kvm_x86_ops.cpu_dirty_log_size && sp->role.guest_mode;
+	return kvm->arch.cpu_dirty_log_size && sp->role.guest_mode;
 }
 
 static inline gfn_t gfn_round_for_level(gfn_t gfn, int level)
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index a10b2a1afc16..cf99cf7fb286 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -1153,7 +1153,6 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
 
 	.sched_in = vt_sched_in,
 
-	.cpu_dirty_log_size = PML_ENTITY_NUM,
 	.update_cpu_dirty_logging = vt_update_cpu_dirty_logging,
 
 	.nested_ops = &vmx_nested_ops,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 72171334c9a1..31eeaef5b189 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7509,6 +7509,8 @@ int vmx_vm_init(struct kvm *kvm)
 			break;
 		}
 	}
+	if (enable_pml)
+		kvm->arch.cpu_dirty_log_size = PML_ENTITY_NUM;
 	return 0;
 }
 
@@ -8391,9 +8393,6 @@ __init int vmx_hardware_setup(void)
 	if (!enable_ept || !enable_ept_ad_bits || !cpu_has_vmx_pml())
 		enable_pml = 0;
 
-	if (!enable_pml)
-		vt_x86_ops.cpu_dirty_log_size = 0;
-
 	if (!cpu_has_vmx_preemption_timer())
 		enable_preemption_timer = false;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4b183190d84c..ffc56f033393 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6499,7 +6499,7 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
 	struct kvm_vcpu *vcpu;
 	unsigned long i;
 
-	if (!kvm_x86_ops.cpu_dirty_log_size)
+	if (!kvm->arch.cpu_dirty_log_size)
 		return;
 
 	kvm_for_each_vcpu(i, vcpu, kvm)
@@ -13019,7 +13019,7 @@ static void kvm_mmu_update_cpu_dirty_logging(struct kvm *kvm, bool enable)
 {
 	int nr_slots;
 
-	if (!kvm_x86_ops.cpu_dirty_log_size)
+	if (!kvm->arch.cpu_dirty_log_size)
 		return;
 
 	nr_slots = atomic_read(&kvm->nr_memslots_dirty_logging);
@@ -13095,7 +13095,7 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
 		if (READ_ONCE(eager_page_split))
 			kvm_mmu_slot_try_split_huge_pages(kvm, new, PG_LEVEL_4K);
 
-		if (kvm_x86_ops.cpu_dirty_log_size) {
+		if (kvm->arch.cpu_dirty_log_size) {
 			kvm_mmu_slot_leaf_clear_dirty(kvm, new);
 			kvm_mmu_slot_remove_write_access(kvm, new, PG_LEVEL_2M);
 		} else {
-- 
2.45.2




More information about the kernel-team mailing list