ACK: [SRU][J:linux-azure][PATCH 1/1] hv_netvsc: Fix panic during namespace deletion with VF

John Cabaj john.cabaj at canonical.com
Mon Aug 18 19:28:48 UTC 2025


On 8/17/25 11:21 PM, Vinicius Peixoto wrote:
> From: Haiyang Zhang <haiyangz at microsoft.com>
> 
> BugLink: https://bugs.launchpad.net/bugs/2120803
> 
> The existing code move the VF NIC to new namespace when NETDEV_REGISTER is
> received on netvsc NIC. During deletion of the namespace,
> default_device_exit_batch() >> default_device_exit_net() is called. When
> netvsc NIC is moved back and registered to the default namespace, it
> automatically brings VF NIC back to the default namespace. This will cause
> the default_device_exit_net() >> for_each_netdev_safe loop unable to detect
> the list end, and hit NULL ptr:
> 
> [  231.449420] mana 7870:00:00.0 enP30832s1: Moved VF to namespace with: eth0
> [  231.449656] BUG: kernel NULL pointer dereference, address: 0000000000000010
> [  231.450246] #PF: supervisor read access in kernel mode
> [  231.450579] #PF: error_code(0x0000) - not-present page
> [  231.450916] PGD 17b8a8067 P4D 0
> [  231.451163] Oops: Oops: 0000 [#1] SMP NOPTI
> [  231.451450] CPU: 82 UID: 0 PID: 1394 Comm: kworker/u768:1 Not tainted 6.16.0-rc4+ #3 VOLUNTARY
> [  231.452042] Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS Hyper-V UEFI Release v4.1 11/21/2024
> [  231.452692] Workqueue: netns cleanup_net
> [  231.452947] RIP: 0010:default_device_exit_batch+0x16c/0x3f0
> [  231.453326] Code: c0 0c f5 b3 e8 d5 db fe ff 48 85 c0 74 15 48 c7 c2 f8 fd ca b2 be 10 00 00 00 48 8d 7d c0 e8 7b 77 25 00 49 8b 86 28 01 00 00 <48> 8b 50 10 4c 8b 2a 4c 8d 62 f0 49 83 ed 10 4c 39 e0 0f 84 d6 00
> [  231.454294] RSP: 0018:ff75fc7c9bf9fd00 EFLAGS: 00010246
> [  231.454610] RAX: 0000000000000000 RBX: 0000000000000002 RCX: 61c8864680b583eb
> [  231.455094] RDX: ff1fa9f71462d800 RSI: ff75fc7c9bf9fd38 RDI: 0000000030766564
> [  231.455686] RBP: ff75fc7c9bf9fd78 R08: 0000000000000000 R09: 0000000000000000
> [  231.456126] R10: 0000000000000001 R11: 0000000000000004 R12: ff1fa9f70088e340
> [  231.456621] R13: ff1fa9f70088e340 R14: ffffffffb3f50c20 R15: ff1fa9f7103e6340
> [  231.457161] FS:  0000000000000000(0000) GS:ff1faa6783a08000(0000) knlGS:0000000000000000
> [  231.457707] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [  231.458031] CR2: 0000000000000010 CR3: 0000000179ab2006 CR4: 0000000000b73ef0
> [  231.458434] Call Trace:
> [  231.458600]  <TASK>
> [  231.458777]  ops_undo_list+0x100/0x220
> [  231.459015]  cleanup_net+0x1b8/0x300
> [  231.459285]  process_one_work+0x184/0x340
> 
> To fix it, move the ns change to a workqueue, and take rtnl_lock to avoid
> changing the netdev list when default_device_exit_net() is using it.
> 
> Cc: stable at vger.kernel.org
> Fixes: 4c262801ea60 ("hv_netvsc: Fix VF namespace also in synthetic NIC NETDEV_REGISTER event")
> Signed-off-by: Haiyang Zhang <haiyangz at microsoft.com>
> Link: https://patch.msgid.link/1754511711-11188-1-git-send-email-haiyangz@linux.microsoft.com
> Signed-off-by: Jakub Kicinski <kuba at kernel.org>
> (cherry picked from commit 33caa208dba6fa639e8a92fd0c8320b652e5550c)
> Signed-off-by: Vinicius Peixoto <vinicius.peixoto at canonical.com>
> ---
>   drivers/net/hyperv/hyperv_net.h |  3 +++
>   drivers/net/hyperv/netvsc_drv.c | 29 ++++++++++++++++++++++++++++-
>   2 files changed, 31 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
> index 9e618343f4ae..1b7d4a2e5460 100644
> --- a/drivers/net/hyperv/hyperv_net.h
> +++ b/drivers/net/hyperv/hyperv_net.h
> @@ -1046,6 +1046,7 @@ struct net_device_context {
>   	struct net_device __rcu *vf_netdev;
>   	struct netvsc_vf_pcpu_stats __percpu *vf_stats;
>   	struct delayed_work vf_takeover;
> +	struct delayed_work vfns_work;
>   
>   	/* 1: allocated, serial number is valid. 0: not allocated */
>   	u32 vf_alloc;
> @@ -1060,6 +1061,8 @@ struct net_device_context {
>   	struct netvsc_device_info *saved_netvsc_dev_info;
>   };
>   
> +void netvsc_vfns_work(struct work_struct *w);
> +
>   /* Azure hosts don't support non-TCP port numbers in hashing for fragmented
>    * packets. We can use ethtool to change UDP hash level when necessary.
>    */
> diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
> index beb432eb39af..be64bdf3baa8 100644
> --- a/drivers/net/hyperv/netvsc_drv.c
> +++ b/drivers/net/hyperv/netvsc_drv.c
> @@ -2539,6 +2539,7 @@ static int netvsc_probe(struct hv_device *dev,
>   	spin_lock_init(&net_device_ctx->lock);
>   	INIT_LIST_HEAD(&net_device_ctx->reconfig_events);
>   	INIT_DELAYED_WORK(&net_device_ctx->vf_takeover, netvsc_vf_setup);
> +	INIT_DELAYED_WORK(&net_device_ctx->vfns_work, netvsc_vfns_work);
>   
>   	net_device_ctx->vf_stats
>   		= netdev_alloc_pcpu_stats(struct netvsc_vf_pcpu_stats);
> @@ -2678,6 +2679,8 @@ static int netvsc_remove(struct hv_device *dev)
>   	cancel_delayed_work_sync(&ndev_ctx->dwork);
>   
>   	rtnl_lock();
> +	cancel_delayed_work_sync(&ndev_ctx->vfns_work);
> +
>   	nvdev = rtnl_dereference(ndev_ctx->nvdev);
>   	if (nvdev) {
>   		cancel_work_sync(&nvdev->subchan_work);
> @@ -2720,6 +2723,7 @@ static int netvsc_suspend(struct hv_device *dev)
>   	cancel_delayed_work_sync(&ndev_ctx->dwork);
>   
>   	rtnl_lock();
> +	cancel_delayed_work_sync(&ndev_ctx->vfns_work);
>   
>   	nvdev = rtnl_dereference(ndev_ctx->nvdev);
>   	if (nvdev == NULL) {
> @@ -2813,6 +2817,27 @@ static void netvsc_event_set_vf_ns(struct net_device *ndev)
>   	}
>   }
>   
> +void netvsc_vfns_work(struct work_struct *w)
> +{
> +	struct net_device_context *ndev_ctx =
> +		container_of(w, struct net_device_context, vfns_work.work);
> +	struct net_device *ndev;
> +
> +	if (!rtnl_trylock()) {
> +		schedule_delayed_work(&ndev_ctx->vfns_work, 1);
> +		return;
> +	}
> +
> +	ndev = hv_get_drvdata(ndev_ctx->device_ctx);
> +	if (!ndev)
> +		goto out;
> +
> +	netvsc_event_set_vf_ns(ndev);
> +
> +out:
> +	rtnl_unlock();
> +}
> +
>   /*
>    * On Hyper-V, every VF interface is matched with a corresponding
>    * synthetic interface. The synthetic interface is presented first
> @@ -2823,10 +2848,12 @@ static int netvsc_netdev_event(struct notifier_block *this,
>   			       unsigned long event, void *ptr)
>   {
>   	struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
> +	struct net_device_context *ndev_ctx;
>   	int ret = 0;
>   
>   	if (event_dev->netdev_ops == &device_ops && event == NETDEV_REGISTER) {
> -		netvsc_event_set_vf_ns(event_dev);
> +		ndev_ctx = netdev_priv(event_dev);
> +		schedule_delayed_work(&ndev_ctx->vfns_work, 0);
>   		return NOTIFY_DONE;
>   	}
>   

Acked-by: John Cabaj <john.cabaj at canonical.com>




More information about the kernel-team mailing list