[SRU][Q:linux-azure][PATCH 2/3] net: mana: Add standard counter rx_missed_errors
John Cabaj
john.cabaj at canonical.com
Fri Mar 27 21:42:41 UTC 2026
From: Erni Sri Satya Vennela <ernis at linux.microsoft.com>
BugLink: https://bugs.launchpad.net/bugs/2146601
Report standard counter stats->rx_missed_errors
using hc_rx_discards_no_wqe from the hardware.
Add a global workqueue to periodically run
mana_query_gf_stats every 2 seconds to get the latest
info in eth_stats and define a driver capability flag
to notify hardware of the periodic queries.
To avoid repeated failures and log flooding, the workqueue
is not rescheduled if mana_query_gf_stats fails on HWC timeout
error and the stats are reset to 0. Other errors are transient
which will not need a VF reset for recovery.
Signed-off-by: Erni Sri Satya Vennela <ernis at linux.microsoft.com>
Reviewed-by: Haiyang Zhang <haiyangz at microsoft.com>
Link: https://patch.msgid.link/1763120599-6331-3-git-send-email-ernis@linux.microsoft.com
Signed-off-by: Jakub Kicinski <kuba at kernel.org>
(backported from commit be4f1d67ec56f23f37714ac73c01094e63c7ff28)
[john-cabaj: context changes due to inclusion of 3b194343c250:
"net: mana: Implement ndo_tx_timeout and serialize queue resets
per port."]
Signed-off-by: John Cabaj <john.cabaj at canonical.com>
---
drivers/net/ethernet/microsoft/mana/mana_en.c | 36 +++++++++++++++++--
.../ethernet/microsoft/mana/mana_ethtool.c | 2 --
include/net/mana/gdma.h | 4 +++
include/net/mana/mana.h | 6 +++-
4 files changed, 42 insertions(+), 6 deletions(-)
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index f0f6e0544663..568613c1dbb4 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -567,6 +567,11 @@ static void mana_get_stats64(struct net_device *ndev,
netdev_stats_to_stats64(st, &ndev->stats);
+ if (apc->ac->hwc_timeout_occurred)
+ netdev_warn_once(ndev, "HWC timeout occurred\n");
+
+ st->rx_missed_errors = apc->ac->hc_stats.hc_rx_discards_no_wqe;
+
for (q = 0; q < num_queues; q++) {
rx_stats = &apc->rxqs[q]->stats;
@@ -2870,7 +2875,7 @@ int mana_config_rss(struct mana_port_context *apc, enum TRI_STATE rx,
return 0;
}
-void mana_query_gf_stats(struct mana_context *ac)
+int mana_query_gf_stats(struct mana_context *ac)
{
struct gdma_context *gc = ac->gdma_dev->gdma_context;
struct mana_query_gf_stat_resp resp = {};
@@ -2913,14 +2918,14 @@ void mana_query_gf_stats(struct mana_context *ac)
sizeof(resp));
if (err) {
dev_err(dev, "Failed to query GF stats: %d\n", err);
- return;
+ return err;
}
err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_GF_STAT,
sizeof(resp));
if (err || resp.hdr.status) {
dev_err(dev, "Failed to query GF stats: %d, 0x%x\n", err,
resp.hdr.status);
- return;
+ return err;
}
ac->hc_stats.hc_rx_discards_no_wqe = resp.rx_discards_nowqe;
@@ -2955,6 +2960,8 @@ void mana_query_gf_stats(struct mana_context *ac)
ac->hc_stats.hc_tx_mcast_pkts = resp.hc_tx_mcast_pkts;
ac->hc_stats.hc_tx_mcast_bytes = resp.hc_tx_mcast_bytes;
ac->hc_stats.hc_tx_err_gdma = resp.tx_err_gdma;
+
+ return 0;
}
void mana_query_phy_stats(struct mana_port_context *apc)
@@ -3495,6 +3502,24 @@ int mana_rdma_service_event(struct gdma_context *gc, enum gdma_service_type even
return 0;
}
+#define MANA_GF_STATS_PERIOD (2 * HZ)
+
+static void mana_gf_stats_work_handler(struct work_struct *work)
+{
+ struct mana_context *ac =
+ container_of(to_delayed_work(work), struct mana_context, gf_stats_work);
+ int err;
+
+ err = mana_query_gf_stats(ac);
+ if (err == -ETIMEDOUT) {
+ /* HWC timeout detected - reset stats and stop rescheduling */
+ ac->hwc_timeout_occurred = true;
+ memset(&ac->hc_stats, 0, sizeof(ac->hc_stats));
+ return;
+ }
+ schedule_delayed_work(&ac->gf_stats_work, MANA_GF_STATS_PERIOD);
+}
+
int mana_probe(struct gdma_dev *gd, bool resuming)
{
struct gdma_context *gc = gd->gdma_context;
@@ -3598,6 +3623,10 @@ int mana_probe(struct gdma_dev *gd, bool resuming)
}
err = add_adev(gd, "eth");
+
+ INIT_DELAYED_WORK(&ac->gf_stats_work, mana_gf_stats_work_handler);
+ schedule_delayed_work(&ac->gf_stats_work, MANA_GF_STATS_PERIOD);
+
out:
if (err) {
mana_remove(gd, false);
@@ -3622,6 +3651,7 @@ void mana_remove(struct gdma_dev *gd, bool suspending)
int i;
disable_work_sync(&ac->link_change_work);
+ cancel_delayed_work_sync(&ac->gf_stats_work);
/* adev currently doesn't support suspending, always remove it */
if (gd->adev)
diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
index 3dfd96146424..99e811208683 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
@@ -213,8 +213,6 @@ static void mana_get_ethtool_stats(struct net_device *ndev,
if (!apc->port_is_up)
return;
- /* we call mana function to update stats from GDMA */
- mana_query_gf_stats(apc->ac);
/* We call this mana function to get the phy stats from GDMA and includes
* aggregate tx/rx drop counters, Per-TC(Traffic Channel) tx/rx and pause
diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index b61c5aae5ec7..04a13a6960f6 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -602,6 +602,9 @@ enum {
#define GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE BIT(6)
+/* Driver can send HWC periodically to query stats */
+#define GDMA_DRV_CAP_FLAG_1_PERIODIC_STATS_QUERY BIT(21)
+
/* Driver can handle hardware recovery events during probe */
#define GDMA_DRV_CAP_FLAG_1_PROBE_RECOVERY BIT(22)
@@ -615,6 +618,7 @@ enum {
GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE | \
GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE | \
GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE | \
+ GDMA_DRV_CAP_FLAG_1_PERIODIC_STATS_QUERY | \
GDMA_DRV_CAP_FLAG_1_PROBE_RECOVERY | \
GDMA_DRV_CAP_FLAG_1_HANDLE_STALL_SQ_RECOVERY)
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 9d3dce07d020..27f2ad47a1ed 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -482,6 +482,10 @@ struct mana_context {
struct workqueue_struct *per_port_queue_reset_wq;
+ /* Workqueue for querying hardware stats */
+ struct delayed_work gf_stats_work;
+ bool hwc_timeout_occurred;
+
struct net_device *ports[MAX_PORTS_IN_MANA_DEV];
/* Link state change work */
@@ -584,7 +588,7 @@ u32 mana_run_xdp(struct net_device *ndev, struct mana_rxq *rxq,
struct bpf_prog *mana_xdp_get(struct mana_port_context *apc);
void mana_chn_setxdp(struct mana_port_context *apc, struct bpf_prog *prog);
int mana_bpf(struct net_device *ndev, struct netdev_bpf *bpf);
-void mana_query_gf_stats(struct mana_context *ac);
+int mana_query_gf_stats(struct mana_context *ac);
int mana_query_link_cfg(struct mana_port_context *apc);
int mana_set_bw_clamp(struct mana_port_context *apc, u32 speed,
int enable_clamping);
--
2.43.0
More information about the kernel-team
mailing list