[SRU][Kinetic][PATCH 1/1] UBUNTU: SAUCE: (no-up) Add mdev_set_iommu_device() kABI.

Tarun Gupta targupta at nvidia.com
Tue Sep 6 18:08:01 UTC 2022


With below commit present from 5.16+ upstream kernel onwards, support
mdev_set_iommu_device() kABI has been removed from kernel due to lack of
in-tree vendor drivers using the kABI.

fda49d97f2c4 ("vfio: remove the unused mdev iommu hook")

This patch partially reverts the above commit so that
mdev_set_iommu_device() kABI is still supported with HWE kernels
for Ubuntu 22.04.

This kABI is used by SRIOV based Nvidia vGPU to pi all guest sysmem
during vGPU VM boot. With this patch, SRIOV based Nvidia vGPU will
continue to work with upstream kernels.

BugLink : https://bugs.launchpad.net/bugs/1988806

Signed-off-by: Tarun Gupta <targupta at nvidia.com>
---
 drivers/vfio/vfio_iommu_type1.c | 114 +++++++++++++++++++++++++++++---
 include/linux/mdev.h            |  20 ++++++
 2 files changed, 125 insertions(+), 9 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index c13b9290e357..db528e827db9 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -36,6 +36,7 @@
 #include <linux/uaccess.h>
 #include <linux/vfio.h>
 #include <linux/workqueue.h>
+#include <linux/mdev.h>
 #include <linux/notifier.h>
 #include <linux/dma-iommu.h>
 #include <linux/irqdomain.h>
@@ -113,6 +114,7 @@ struct vfio_batch {
 struct vfio_iommu_group {
 	struct iommu_group	*iommu_group;
 	struct list_head	next;
+	bool			mdev_group;
 	bool			pinned_page_dirty_scope;
 };
 
@@ -1930,6 +1932,81 @@ static bool vfio_iommu_has_sw_msi(struct list_head *group_resv_regions,
 	return ret;
 }
 
+static int vfio_mdev_attach_domain(struct device *dev, void *data)
+{
+	struct mdev_device *mdev = to_mdev_device(dev);
+	struct iommu_domain *domain = data;
+	struct device *iommu_device;
+
+	iommu_device = mdev_get_iommu_device(mdev);
+	if (iommu_device)
+		return iommu_attach_device(domain, iommu_device);
+
+	return -EINVAL;
+}
+
+static int vfio_mdev_detach_domain(struct device *dev, void *data)
+{
+	struct mdev_device *mdev = to_mdev_device(dev);
+	struct iommu_domain *domain = data;
+	struct device *iommu_device;
+
+	iommu_device = mdev_get_iommu_device(mdev);
+	if (iommu_device)
+		iommu_detach_device(domain, iommu_device);
+
+	return 0;
+}
+
+static int vfio_iommu_attach_group(struct vfio_domain *domain,
+				   struct vfio_iommu_group *group)
+{
+	if (group->mdev_group)
+		return iommu_group_for_each_dev(group->iommu_group,
+						domain->domain,
+						vfio_mdev_attach_domain);
+	else
+		return iommu_attach_group(domain->domain, group->iommu_group);
+}
+
+static void vfio_iommu_detach_group(struct vfio_domain *domain,
+				    struct vfio_iommu_group *group)
+{
+	if (group->mdev_group)
+		iommu_group_for_each_dev(group->iommu_group, domain->domain,
+					 vfio_mdev_detach_domain);
+	else
+		iommu_detach_group(domain->domain, group->iommu_group);
+}
+
+static bool vfio_bus_is_mdev(struct bus_type *bus)
+{
+	struct bus_type *mdev_bus;
+	bool ret = false;
+
+	mdev_bus = symbol_get(mdev_bus_type);
+	if (mdev_bus) {
+		ret = (bus == mdev_bus);
+		symbol_put(mdev_bus_type);
+	}
+
+	return ret;
+}
+
+static int vfio_mdev_iommu_device(struct device *dev, void *data)
+{
+	struct mdev_device *mdev = to_mdev_device(dev);
+	struct device **old = data, *new;
+
+	new = mdev_get_iommu_device(mdev);
+	if (!new || (*old && *old != new))
+		return -EINVAL;
+
+	*old = new;
+
+	return 0;
+}
+
 /*
  * This is a helper function to insert an address range to iova list.
  * The list is initially created with a single entry corresponding to
@@ -2180,6 +2257,25 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
 	group->iommu_group = iommu_group;
 
 	if (type == VFIO_EMULATED_IOMMU) {
+		/* Ubuntu-only - BEGIN */
+		ret = iommu_group_for_each_dev(iommu_group, &bus, vfio_bus_type);
+
+		if (!ret && vfio_bus_is_mdev(bus)) {
+			struct device *iommu_device = NULL;
+
+			group->mdev_group = true;
+
+			/* Determine the isolation type */
+			ret = iommu_group_for_each_dev(iommu_group,
+						       &iommu_device,
+						       vfio_mdev_iommu_device);
+			if (!ret && iommu_device) {
+				bus = iommu_device->bus;
+				goto mdev_iommu_device;
+			}
+		}
+		/* Ubuntu-only - END */
+
 		list_add(&group->next, &iommu->emulated_iommu_groups);
 		/*
 		 * An emulated IOMMU group cannot dirty memory directly, it can
@@ -2197,6 +2293,8 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
 	if (ret)
 		goto out_free_group;
 
+mdev_iommu_device:
+
 	ret = -ENOMEM;
 	domain = kzalloc(sizeof(*domain), GFP_KERNEL);
 	if (!domain)
@@ -2213,7 +2311,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
 			goto out_domain;
 	}
 
-	ret = iommu_attach_group(domain->domain, group->iommu_group);
+	ret = vfio_iommu_attach_group(domain, group);
 	if (ret)
 		goto out_domain;
 
@@ -2288,17 +2386,15 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
 		if (d->domain->ops == domain->domain->ops &&
 		    d->enforce_cache_coherency ==
 			    domain->enforce_cache_coherency) {
-			iommu_detach_group(domain->domain, group->iommu_group);
-			if (!iommu_attach_group(d->domain,
-						group->iommu_group)) {
+			vfio_iommu_detach_group(domain, group);
+			if (!vfio_iommu_attach_group(d, group)) {
 				list_add(&group->next, &d->group_list);
 				iommu_domain_free(domain->domain);
 				kfree(domain);
 				goto done;
 			}
 
-			ret = iommu_attach_group(domain->domain,
-						 group->iommu_group);
+			ret = vfio_iommu_attach_group(domain, group);
 			if (ret)
 				goto out_domain;
 		}
@@ -2335,7 +2431,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
 	return 0;
 
 out_detach:
-	iommu_detach_group(domain->domain, group->iommu_group);
+	vfio_iommu_detach_group(domain, group);
 out_domain:
 	iommu_domain_free(domain->domain);
 	vfio_iommu_iova_free(&iova_copy);
@@ -2496,7 +2592,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
 		if (!group)
 			continue;
 
-		iommu_detach_group(domain->domain, group->iommu_group);
+		vfio_iommu_detach_group(domain, group);
 		update_dirty_scope = !group->pinned_page_dirty_scope;
 		list_del(&group->next);
 		kfree(group);
@@ -2585,7 +2681,7 @@ static void vfio_release_domain(struct vfio_domain *domain)
 
 	list_for_each_entry_safe(group, group_tmp,
 				 &domain->group_list, next) {
-		iommu_detach_group(domain->domain, group->iommu_group);
+		vfio_iommu_detach_group(domain, group);
 		list_del(&group->next);
 		kfree(group);
 	}
diff --git a/include/linux/mdev.h b/include/linux/mdev.h
index 47ad3b104d9e..86a46e10726f 100644
--- a/include/linux/mdev.h
+++ b/include/linux/mdev.h
@@ -17,6 +17,7 @@ struct mdev_device {
 	guid_t uuid;
 	struct list_head next;
 	struct mdev_type *type;
+	struct device *iommu_device;
 	bool active;
 };
 
@@ -25,6 +26,25 @@ static inline struct mdev_device *to_mdev_device(struct device *dev)
 	return container_of(dev, struct mdev_device, dev);
 }
 
+/*
+ * Called by the parent device driver to set the device which represents
+ * this mdev in iommu protection scope. By default, the iommu device is
+ * NULL, that indicates using vendor defined isolation.
+ *
+ * @dev: the mediated device that iommu will isolate.
+ * @iommu_device: a pci device which represents the iommu for @dev.
+ */
+static inline void mdev_set_iommu_device(struct mdev_device *mdev,
+					struct device *iommu_device)
+{
+	mdev->iommu_device = iommu_device;
+}
+
+static inline struct device *mdev_get_iommu_device(struct mdev_device *mdev)
+{
+	return mdev->iommu_device;
+}
+
 unsigned int mdev_get_type_group_id(struct mdev_device *mdev);
 unsigned int mtype_get_type_group_id(struct mdev_type *mtype);
 struct device *mtype_get_parent_dev(struct mdev_type *mtype);
-- 
2.31.1




More information about the kernel-team mailing list