APPLIED: [SRU Bionic 1/1] drm/i915: Flush TLBs before releasing backing store

Stefan Bader stefan.bader at canonical.com
Thu Jan 27 13:57:56 UTC 2022


On 26.01.22 17:58, Thadeu Lima de Souza Cascardo wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> 
> We need to flush TLBs before releasing backing store otherwise userspace
> is able to encounter stale entries if a) it is not declaring GPU access to
> certain buffers and b) this GPU execution then races with the backing
> store release getting triggered asynchronously.
> 
> Approach taken is to mark any buffer objects which were ever bound to the
> GPU and triggering a serialized TLB flush when their backing store is
> released.
> 
> Alternatively the flushing could be done on VMA unbind, at which point we
> would be able to ascertain whether there is potential parallel GPU
> execution (which could race), but choice essentially boils down to paying
> the cost of TLB flushes maybe needlessly at VMA unbind time (when the
> backing store is not known to be definitely going away, so flushing not
> always required for safety), versus potentially needlessly at backing
> store relase time since at that point cannot tell whether there is a
> parallel GPU execution happening.
> 
> Therefore simplicity of implementation has been chosen for now, with scope
> to benchmark and refine later as required.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Reported-by: Sushma Venkatesh Reddy <sushma.venkatesh.reddy at intel.com>
> Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
> Cc: Jon Bloomfield <jon.bloomfield at intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
> Cc: Jani Nikula <jani.nikula at intel.com>
> Cc: stable at vger.kernel.org
> (backported from commit 7938d61591d33394a21bdd7797a245b65428f44c)
> [cascardo: confict cleanup because of extra i915_gemfs_init]
> CVE-2022-0330
> Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo at canonical.com>
> ---

Applied to bionic:linux/master-next. Thanks.

-Stefan

>   drivers/gpu/drm/i915/i915_drv.h        |  2 +
>   drivers/gpu/drm/i915/i915_gem.c        | 84 +++++++++++++++++++++++++-
>   drivers/gpu/drm/i915/i915_gem_object.h |  1 +
>   drivers/gpu/drm/i915/i915_reg.h        |  6 ++
>   drivers/gpu/drm/i915/i915_vma.c        |  4 ++
>   5 files changed, 96 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index a7b32ee8549e..0efcb09c3306 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2245,6 +2245,8 @@ struct drm_i915_private {
>   
>   	struct intel_uncore uncore;
>   
> +	struct mutex tlb_invalidate_lock;
> +
>   	struct i915_virtual_gpu vgpu;
>   
>   	struct intel_gvt *gvt;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 8356adc9a67a..32974cf96f73 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2284,6 +2284,76 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
>   	rcu_read_unlock();
>   }
>   
> +struct reg_and_bit {
> +	i915_reg_t reg;
> +	u32 bit;
> +};
> +
> +static struct reg_and_bit
> +get_reg_and_bit(const struct intel_engine_cs *engine,
> +		const i915_reg_t *regs, const unsigned int num)
> +{
> +	const unsigned int class = engine->class;
> +	struct reg_and_bit rb = { .bit = 1 };
> +
> +	if (WARN_ON_ONCE(class >= num || !regs[class].reg))
> +		return rb;
> +
> +	rb.reg = regs[class];
> +	if (class == VIDEO_DECODE_CLASS)
> +		rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */
> +
> +	return rb;
> +}
> +
> +static void invalidate_tlbs(struct drm_i915_private *dev_priv)
> +{
> +	static const i915_reg_t gen8_regs[] = {
> +		[RENDER_CLASS]                  = GEN8_RTCR,
> +		[VIDEO_DECODE_CLASS]            = GEN8_M1TCR, /* , GEN8_M2TCR */
> +		[VIDEO_ENHANCEMENT_CLASS]       = GEN8_VTCR,
> +		[COPY_ENGINE_CLASS]             = GEN8_BTCR,
> +	};
> +	const unsigned int num = ARRAY_SIZE(gen8_regs);
> +	const i915_reg_t *regs = gen8_regs;
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +
> +	if (INTEL_GEN(dev_priv) < 8)
> +		return;
> +
> +	assert_rpm_wakelock_held(dev_priv);
> +
> +	mutex_lock(&dev_priv->tlb_invalidate_lock);
> +	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> +
> +	for_each_engine(engine, dev_priv, id) {
> +		/*
> +		 * HW architecture suggest typical invalidation time at 40us,
> +		 * with pessimistic cases up to 100us and a recommendation to
> +		 * cap at 1ms. We go a bit higher just in case.
> +		 */
> +		const unsigned int timeout_us = 100;
> +		const unsigned int timeout_ms = 4;
> +		struct reg_and_bit rb;
> +
> +		rb = get_reg_and_bit(engine, regs, num);
> +		if (!i915_mmio_reg_offset(rb.reg))
> +			continue;
> +
> +		I915_WRITE_FW(rb.reg, rb.bit);
> +		if (__intel_wait_for_register_fw(dev_priv,
> +						 rb.reg, rb.bit, 0,
> +						 timeout_us, timeout_ms,
> +						 NULL))
> +			DRM_ERROR_RATELIMITED("%s TLB invalidation did not complete in %ums!\n",
> +					      engine->name, timeout_ms);
> +	}
> +
> +	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> +	mutex_unlock(&dev_priv->tlb_invalidate_lock);
> +}
> +
>   void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
>   				 enum i915_mm_subclass subclass)
>   {
> @@ -2326,8 +2396,18 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
>   
>   	__i915_gem_object_reset_page_iter(obj);
>   
> -	if (!IS_ERR(pages))
> +	if (!IS_ERR(pages)) {
> +		if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) {
> +			struct drm_i915_private *i915 = to_i915(obj->base.dev);
> +
> +			if (intel_runtime_pm_get_if_in_use(i915)) {
> +				invalidate_tlbs(i915);
> +				intel_runtime_pm_put(i915);
> +			}
> +		}
> +
>   		obj->ops->put_pages(obj, pages);
> +	}
>   
>   	obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
>   
> @@ -5288,6 +5368,8 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
>   
>   	spin_lock_init(&dev_priv->fb_tracking.lock);
>   
> +	mutex_init(&dev_priv->tlb_invalidate_lock);
> +
>   	err = i915_gemfs_init(dev_priv);
>   	if (err)
>   		DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err);
> diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
> index 6b1de7942349..e3c0a897ca19 100644
> --- a/drivers/gpu/drm/i915/i915_gem_object.h
> +++ b/drivers/gpu/drm/i915/i915_gem_object.h
> @@ -135,6 +135,7 @@ struct drm_i915_gem_object {
>   	 * activity?
>   	 */
>   #define I915_BO_ACTIVE_REF 0
> +#define I915_BO_WAS_BOUND_BIT    1
>   
>   	/*
>   	 * Is the object to be mapped as read-only to the GPU
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 814b4c66c6e3..ae1f77e3a824 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -2384,6 +2384,12 @@ enum i915_power_well_id {
>   #define   GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING	(1<<28)
>   #define   GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT	(1<<24)
>   
> +#define GEN8_RTCR	_MMIO(0x4260)
> +#define GEN8_M1TCR	_MMIO(0x4264)
> +#define GEN8_M2TCR	_MMIO(0x4268)
> +#define GEN8_BTCR	_MMIO(0x426c)
> +#define GEN8_VTCR	_MMIO(0x4270)
> +
>   #if 0
>   #define PRB0_TAIL	_MMIO(0x2030)
>   #define PRB0_HEAD	_MMIO(0x2034)
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index 7c4ab4263d33..19475637ba2d 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -283,6 +283,10 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
>   		return ret;
>   
>   	vma->flags |= bind_flags;
> +
> +	if (vma->obj)
> +		set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags);
> +
>   	return 0;
>   }
>   

-------------- next part --------------
A non-text attachment was scrubbed...
Name: OpenPGP_signature
Type: application/pgp-signature
Size: 833 bytes
Desc: OpenPGP digital signature
URL: <https://lists.ubuntu.com/archives/kernel-team/attachments/20220127/7b63e59c/attachment.sig>


More information about the kernel-team mailing list