[SRU][P:linux-azure][PATCH 3/5] RDMA/mana_ib: Add port statistics support
John Cabaj
john.cabaj at canonical.com
Tue Aug 5 20:23:36 UTC 2025
From: Shiraz Saleem <shirazsaleem at microsoft.com>
BugLink: https://bugs.launchpad.net/bugs/2119485
Implement alloc_hw_port_stats and get_hw_stats APIs to support querying
MANA VF port level statistics from rdma stat tool.
Example output from rdma stat tool:
$rdma statistic show link mana_0/1 -p
link mana_0/1
requester_timeout 45
requester_oos_nak 0
requester_rnr_nak 0
responder_rnr_nak 0
responder_oos 0
responder_dup_request 0
requester_implicit_nak 0
requester_readresp_psn_mismatch 0
nak_inv_req 0
nak_access_error 0
nak_opp_error 0
nak_inv_read 0
responder_local_len_error 0
requestor_local_prot_error 0
responder_rem_access_error 0
responder_local_qp_error 0
responder_malformed_wqe 0
general_hw_error 6
requester_rnr_nak_retries_exceeded 0
requester_retries_exceeded 5
total_fatal_error 6
received_cnps 0
num_qps_congested 0
rate_inc_events 0
num_qps_recovered 0
current_rate 100000
Signed-off-by: Shiraz Saleem <shirazsaleem at microsoft.com>
Signed-off-by: Konstantin Taranov <kotaranov at microsoft.com>
Link: https://patch.msgid.link/1738751527-15517-1-git-send-email-kotaranov@linux.microsoft.com
Reviewed-by: Long Li <longli at microsoft.com>
Signed-off-by: Leon Romanovsky <leon at kernel.org>
(backported from commit 79bccd746132afe12b8bc3785e7b74b90440060d)
[john-cabaj: leaving out additional structs not introduced in patch]
Signed-off-by: John Cabaj <john.cabaj at canonical.com>
---
drivers/infiniband/hw/mana/Makefile | 2 +-
drivers/infiniband/hw/mana/counters.c | 105 ++++++++++++++++++++++++++
drivers/infiniband/hw/mana/counters.h | 44 +++++++++++
drivers/infiniband/hw/mana/device.c | 7 ++
drivers/infiniband/hw/mana/mana_ib.h | 37 +++++++++
5 files changed, 194 insertions(+), 1 deletion(-)
create mode 100644 drivers/infiniband/hw/mana/counters.c
create mode 100644 drivers/infiniband/hw/mana/counters.h
diff --git a/drivers/infiniband/hw/mana/Makefile b/drivers/infiniband/hw/mana/Makefile
index 88655fe5e398..8e07696f053a 100644
--- a/drivers/infiniband/hw/mana/Makefile
+++ b/drivers/infiniband/hw/mana/Makefile
@@ -1,4 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_MANA_INFINIBAND) += mana_ib.o
-mana_ib-y := device.o main.o wq.o qp.o cq.o mr.o
+mana_ib-y := device.o main.o wq.o qp.o cq.o mr.o counters.o
diff --git a/drivers/infiniband/hw/mana/counters.c b/drivers/infiniband/hw/mana/counters.c
new file mode 100644
index 000000000000..e533ce21013d
--- /dev/null
+++ b/drivers/infiniband/hw/mana/counters.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024, Microsoft Corporation. All rights reserved.
+ */
+
+#include "counters.h"
+
+static const struct rdma_stat_desc mana_ib_port_stats_desc[] = {
+ [MANA_IB_REQUESTER_TIMEOUT].name = "requester_timeout",
+ [MANA_IB_REQUESTER_OOS_NAK].name = "requester_oos_nak",
+ [MANA_IB_REQUESTER_RNR_NAK].name = "requester_rnr_nak",
+ [MANA_IB_RESPONDER_RNR_NAK].name = "responder_rnr_nak",
+ [MANA_IB_RESPONDER_OOS].name = "responder_oos",
+ [MANA_IB_RESPONDER_DUP_REQUEST].name = "responder_dup_request",
+ [MANA_IB_REQUESTER_IMPLICIT_NAK].name = "requester_implicit_nak",
+ [MANA_IB_REQUESTER_READRESP_PSN_MISMATCH].name = "requester_readresp_psn_mismatch",
+ [MANA_IB_NAK_INV_REQ].name = "nak_inv_req",
+ [MANA_IB_NAK_ACCESS_ERR].name = "nak_access_error",
+ [MANA_IB_NAK_OPP_ERR].name = "nak_opp_error",
+ [MANA_IB_NAK_INV_READ].name = "nak_inv_read",
+ [MANA_IB_RESPONDER_LOCAL_LEN_ERR].name = "responder_local_len_error",
+ [MANA_IB_REQUESTOR_LOCAL_PROT_ERR].name = "requestor_local_prot_error",
+ [MANA_IB_RESPONDER_REM_ACCESS_ERR].name = "responder_rem_access_error",
+ [MANA_IB_RESPONDER_LOCAL_QP_ERR].name = "responder_local_qp_error",
+ [MANA_IB_RESPONDER_MALFORMED_WQE].name = "responder_malformed_wqe",
+ [MANA_IB_GENERAL_HW_ERR].name = "general_hw_error",
+ [MANA_IB_REQUESTER_RNR_NAK_RETRIES_EXCEEDED].name = "requester_rnr_nak_retries_exceeded",
+ [MANA_IB_REQUESTER_RETRIES_EXCEEDED].name = "requester_retries_exceeded",
+ [MANA_IB_TOTAL_FATAL_ERR].name = "total_fatal_error",
+ [MANA_IB_RECEIVED_CNPS].name = "received_cnps",
+ [MANA_IB_NUM_QPS_CONGESTED].name = "num_qps_congested",
+ [MANA_IB_RATE_INC_EVENTS].name = "rate_inc_events",
+ [MANA_IB_NUM_QPS_RECOVERED].name = "num_qps_recovered",
+ [MANA_IB_CURRENT_RATE].name = "current_rate",
+};
+
+struct rdma_hw_stats *mana_ib_alloc_hw_port_stats(struct ib_device *ibdev,
+ u32 port_num)
+{
+ return rdma_alloc_hw_stats_struct(mana_ib_port_stats_desc,
+ ARRAY_SIZE(mana_ib_port_stats_desc),
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+int mana_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ u32 port_num, int index)
+{
+ struct mana_ib_dev *mdev = container_of(ibdev, struct mana_ib_dev,
+ ib_dev);
+ struct mana_rnic_query_vf_cntrs_resp resp = {};
+ struct mana_rnic_query_vf_cntrs_req req = {};
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_QUERY_VF_COUNTERS,
+ sizeof(req), sizeof(resp));
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+
+ err = mana_gd_send_request(mdev_to_gc(mdev), sizeof(req), &req,
+ sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to query vf counters err %d",
+ err);
+ return err;
+ }
+
+ stats->value[MANA_IB_REQUESTER_TIMEOUT] = resp.requester_timeout;
+ stats->value[MANA_IB_REQUESTER_OOS_NAK] = resp.requester_oos_nak;
+ stats->value[MANA_IB_REQUESTER_RNR_NAK] = resp.requester_rnr_nak;
+ stats->value[MANA_IB_RESPONDER_RNR_NAK] = resp.responder_rnr_nak;
+ stats->value[MANA_IB_RESPONDER_OOS] = resp.responder_oos;
+ stats->value[MANA_IB_RESPONDER_DUP_REQUEST] = resp.responder_dup_request;
+ stats->value[MANA_IB_REQUESTER_IMPLICIT_NAK] =
+ resp.requester_implicit_nak;
+ stats->value[MANA_IB_REQUESTER_READRESP_PSN_MISMATCH] =
+ resp.requester_readresp_psn_mismatch;
+ stats->value[MANA_IB_NAK_INV_REQ] = resp.nak_inv_req;
+ stats->value[MANA_IB_NAK_ACCESS_ERR] = resp.nak_access_err;
+ stats->value[MANA_IB_NAK_OPP_ERR] = resp.nak_opp_err;
+ stats->value[MANA_IB_NAK_INV_READ] = resp.nak_inv_read;
+ stats->value[MANA_IB_RESPONDER_LOCAL_LEN_ERR] =
+ resp.responder_local_len_err;
+ stats->value[MANA_IB_REQUESTOR_LOCAL_PROT_ERR] =
+ resp.requestor_local_prot_err;
+ stats->value[MANA_IB_RESPONDER_REM_ACCESS_ERR] =
+ resp.responder_rem_access_err;
+ stats->value[MANA_IB_RESPONDER_LOCAL_QP_ERR] =
+ resp.responder_local_qp_err;
+ stats->value[MANA_IB_RESPONDER_MALFORMED_WQE] =
+ resp.responder_malformed_wqe;
+ stats->value[MANA_IB_GENERAL_HW_ERR] = resp.general_hw_err;
+ stats->value[MANA_IB_REQUESTER_RNR_NAK_RETRIES_EXCEEDED] =
+ resp.requester_rnr_nak_retries_exceeded;
+ stats->value[MANA_IB_REQUESTER_RETRIES_EXCEEDED] =
+ resp.requester_retries_exceeded;
+ stats->value[MANA_IB_TOTAL_FATAL_ERR] = resp.total_fatal_err;
+
+ stats->value[MANA_IB_RECEIVED_CNPS] = resp.received_cnps;
+ stats->value[MANA_IB_NUM_QPS_CONGESTED] = resp.num_qps_congested;
+ stats->value[MANA_IB_RATE_INC_EVENTS] = resp.rate_inc_events;
+ stats->value[MANA_IB_NUM_QPS_RECOVERED] = resp.num_qps_recovered;
+ stats->value[MANA_IB_CURRENT_RATE] = resp.current_rate;
+
+ return ARRAY_SIZE(mana_ib_port_stats_desc);
+}
diff --git a/drivers/infiniband/hw/mana/counters.h b/drivers/infiniband/hw/mana/counters.h
new file mode 100644
index 000000000000..7ff92d27f6c3
--- /dev/null
+++ b/drivers/infiniband/hw/mana/counters.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2024 Microsoft Corporation. All rights reserved.
+ */
+
+#ifndef _COUNTERS_H_
+#define _COUNTERS_H_
+
+#include "mana_ib.h"
+
+enum mana_ib_port_counters {
+ MANA_IB_REQUESTER_TIMEOUT,
+ MANA_IB_REQUESTER_OOS_NAK,
+ MANA_IB_REQUESTER_RNR_NAK,
+ MANA_IB_RESPONDER_RNR_NAK,
+ MANA_IB_RESPONDER_OOS,
+ MANA_IB_RESPONDER_DUP_REQUEST,
+ MANA_IB_REQUESTER_IMPLICIT_NAK,
+ MANA_IB_REQUESTER_READRESP_PSN_MISMATCH,
+ MANA_IB_NAK_INV_REQ,
+ MANA_IB_NAK_ACCESS_ERR,
+ MANA_IB_NAK_OPP_ERR,
+ MANA_IB_NAK_INV_READ,
+ MANA_IB_RESPONDER_LOCAL_LEN_ERR,
+ MANA_IB_REQUESTOR_LOCAL_PROT_ERR,
+ MANA_IB_RESPONDER_REM_ACCESS_ERR,
+ MANA_IB_RESPONDER_LOCAL_QP_ERR,
+ MANA_IB_RESPONDER_MALFORMED_WQE,
+ MANA_IB_GENERAL_HW_ERR,
+ MANA_IB_REQUESTER_RNR_NAK_RETRIES_EXCEEDED,
+ MANA_IB_REQUESTER_RETRIES_EXCEEDED,
+ MANA_IB_TOTAL_FATAL_ERR,
+ MANA_IB_RECEIVED_CNPS,
+ MANA_IB_NUM_QPS_CONGESTED,
+ MANA_IB_RATE_INC_EVENTS,
+ MANA_IB_NUM_QPS_RECOVERED,
+ MANA_IB_CURRENT_RATE,
+};
+
+struct rdma_hw_stats *mana_ib_alloc_hw_port_stats(struct ib_device *ibdev,
+ u32 port_num);
+int mana_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ u32 port_num, int index);
+#endif /* _COUNTERS_H_ */
diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c
index b0b866b574a0..8d882d9b10e9 100644
--- a/drivers/infiniband/hw/mana/device.c
+++ b/drivers/infiniband/hw/mana/device.c
@@ -51,6 +51,11 @@ static const struct ib_device_ops mana_ib_dev_ops = {
ib_ind_table),
};
+static const struct ib_device_ops mana_ib_stats_ops = {
+ .alloc_hw_port_stats = mana_ib_alloc_hw_port_stats,
+ .get_hw_stats = mana_ib_get_hw_stats,
+};
+
static int mana_ib_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
@@ -155,6 +160,8 @@ static int mana_ib_probe(struct auxiliary_device *adev,
goto deregister_net_notifier;
}
+ ib_set_device_ops(&dev->ib_dev, &mana_ib_stats_ops);
+
ret = mana_ib_create_eqs(dev);
if (ret) {
ibdev_err(&dev->ib_dev, "Failed to create EQs, ret %d", ret);
diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h
index bb9c6b1af24e..56bfd388589e 100644
--- a/drivers/infiniband/hw/mana/mana_ib.h
+++ b/drivers/infiniband/hw/mana/mana_ib.h
@@ -13,6 +13,7 @@
#include <rdma/uverbs_ioctl.h>
#include <net/mana/mana.h>
+#include "counters.h"
#define PAGE_SZ_BM \
(SZ_4K | SZ_8K | SZ_16K | SZ_32K | SZ_64K | SZ_128K | SZ_256K | \
@@ -152,6 +153,7 @@ enum mana_ib_command_code {
MANA_IB_CREATE_RC_QP = 0x3000a,
MANA_IB_DESTROY_RC_QP = 0x3000b,
MANA_IB_SET_QP_STATE = 0x3000d,
+ MANA_IB_QUERY_VF_COUNTERS = 0x30022,
};
struct mana_ib_query_adapter_caps_req {
@@ -334,6 +336,41 @@ struct mana_rnic_set_qp_state_resp {
struct gdma_resp_hdr hdr;
}; /* HW Data */
+struct mana_rnic_query_vf_cntrs_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+}; /* HW Data */
+
+struct mana_rnic_query_vf_cntrs_resp {
+ struct gdma_resp_hdr hdr;
+ u64 requester_timeout;
+ u64 requester_oos_nak;
+ u64 requester_rnr_nak;
+ u64 responder_rnr_nak;
+ u64 responder_oos;
+ u64 responder_dup_request;
+ u64 requester_implicit_nak;
+ u64 requester_readresp_psn_mismatch;
+ u64 nak_inv_req;
+ u64 nak_access_err;
+ u64 nak_opp_err;
+ u64 nak_inv_read;
+ u64 responder_local_len_err;
+ u64 requestor_local_prot_err;
+ u64 responder_rem_access_err;
+ u64 responder_local_qp_err;
+ u64 responder_malformed_wqe;
+ u64 general_hw_err;
+ u64 requester_rnr_nak_retries_exceeded;
+ u64 requester_retries_exceeded;
+ u64 total_fatal_err;
+ u64 received_cnps;
+ u64 num_qps_congested;
+ u64 rate_inc_events;
+ u64 num_qps_recovered;
+ u64 current_rate;
+}; /* HW Data */
+
static inline struct gdma_context *mdev_to_gc(struct mana_ib_dev *mdev)
{
return mdev->gdma_dev->gdma_context;
--
2.43.0
More information about the kernel-team
mailing list