ACK:PATCH][kinetic/azure] net: mana: Fix race on per-CQ variable napi work_done

Philip Cox philip.cox at canonical.com
Thu Jan 19 16:15:09 UTC 2023


-- 
Acked-by: Philip Cox <philip.cox at canonical.com>


On Thu, 2023-01-19 at 08:34 -0700, Tim Gardner wrote:
> From: Haiyang Zhang <haiyangz at microsoft.com>
> 
> BugLink: https://bugs.launchpad.net/bugs/2003353
> 
> After calling napi_complete_done(), the NAPIF_STATE_SCHED bit may be
> cleared, and another CPU can start napi thread and access per-CQ
> variable,
> cq->work_done. If the other thread (for example, from busy_poll) sets
> it to a value >= budget, this thread will continue to run when it
> should
> stop, and cause memory corruption and panic.
> 
> To fix this issue, save the per-CQ work_done variable in a local
> variable
> before napi_complete_done(), so it won't be corrupted by a possible
> concurrent thread after napi_complete_done().
> 
> Also, add a flag bit to advertise to the NIC firmware: the NAPI
> work_done
> variable race is fixed, so the driver is able to reliably support
> features
> like busy_poll.
> 
> Cc: stable at vger.kernel.org
> Fixes: e1b5683ff62e ("net: mana: Move NAPI from EQ to CQ")
> Signed-off-by: Haiyang Zhang <haiyangz at microsoft.com>
> Link:
> https://lore.kernel.org/r/1670010190-28595-1-git-send-email-haiyangz@microsoft.com
> Signed-off-by: Paolo Abeni <pabeni at redhat.com>
> (cherry picked from commit 18010ff776fa42340efc428b3ea6d19b3e7c7b21
> linux-next)
> Signed-off-by: Tim Gardner <tim.gardner at canonical.com>
> ---
>  drivers/net/ethernet/microsoft/mana/gdma.h    |  9 ++++++++-
>  drivers/net/ethernet/microsoft/mana/mana_en.c | 16 +++++++++++-----
>  2 files changed, 19 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/net/ethernet/microsoft/mana/gdma.h
> b/drivers/net/ethernet/microsoft/mana/gdma.h
> index 4a6efe6ada08..65c24ee49efd 100644
> --- a/drivers/net/ethernet/microsoft/mana/gdma.h
> +++ b/drivers/net/ethernet/microsoft/mana/gdma.h
> @@ -498,7 +498,14 @@ enum {
>  
>  #define GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT BIT(0)
>  
> -#define GDMA_DRV_CAP_FLAGS1
> GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT
> +/* Advertise to the NIC firmware: the NAPI work_done variable race
> is fixed,
> + * so the driver is able to reliably support features like
> busy_poll.
> + */
> +#define GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX BIT(2)
> +
> +#define GDMA_DRV_CAP_FLAGS1 \
> +       (GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \
> +        GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX)
>  
>  #define GDMA_DRV_CAP_FLAGS2 0
>  
> diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c
> b/drivers/net/ethernet/microsoft/mana/mana_en.c
> index 9259a74eca40..27a0f3af8aab 100644
> --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
> +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
> @@ -1303,10 +1303,11 @@ static void mana_poll_rx_cq(struct mana_cq
> *cq)
>                 xdp_do_flush();
>  }
>  
> -static void mana_cq_handler(void *context, struct gdma_queue
> *gdma_queue)
> +static int mana_cq_handler(void *context, struct gdma_queue
> *gdma_queue)
>  {
>         struct mana_cq *cq = context;
>         u8 arm_bit;
> +       int w;
>  
>         WARN_ON_ONCE(cq->gdma_cq != gdma_queue);
>  
> @@ -1315,26 +1316,31 @@ static void mana_cq_handler(void *context,
> struct gdma_queue *gdma_queue)
>         else
>                 mana_poll_tx_cq(cq);
>  
> -       if (cq->work_done < cq->budget &&
> -           napi_complete_done(&cq->napi, cq->work_done)) {
> +       w = cq->work_done;
> +
> +       if (w < cq->budget &&
> +           napi_complete_done(&cq->napi, w)) {
>                 arm_bit = SET_ARM_BIT;
>         } else {
>                 arm_bit = 0;
>         }
>  
>         mana_gd_ring_cq(gdma_queue, arm_bit);
> +
> +       return w;
>  }
>  
>  static int mana_poll(struct napi_struct *napi, int budget)
>  {
>         struct mana_cq *cq = container_of(napi, struct mana_cq,
> napi);
> +       int w;
>  
>         cq->work_done = 0;
>         cq->budget = budget;
>  
> -       mana_cq_handler(cq, cq->gdma_cq);
> +       w = mana_cq_handler(cq, cq->gdma_cq);
>  
> -       return min(cq->work_done, budget);
> +       return min(w, budget);
>  }
>  
>  static void mana_schedule_napi(void *context, struct gdma_queue
> *gdma_queue)
> -- 
> 2.34.1
> 
> 
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.ubuntu.com/archives/kernel-team/attachments/20230119/973b845d/attachment-0001.html>


More information about the kernel-team mailing list