[SRU][Q/P/N:linux-azure][PATCH 2/3] RDMA/mana_ib: Drain send wrs of GSI QP
John Cabaj
john.cabaj at canonical.com
Thu Oct 9 20:22:58 UTC 2025
From: Konstantin Taranov <kotaranov at microsoft.com>
BugLink: https://bugs.launchpad.net/bugs/2127201
Drain send WRs of the GSI QP on device removal.
In rare servicing scenarios, the hardware may delete the
state of the GSI QP, preventing it from generating CQEs
for pending send WRs. Since WRs submitted to the GSI QP
hold CM resources, the device cannot be removed until
those WRs are completed. This patch marks all pending
send WRs as failed, allowing the GSI QP to release the CM
resources and enabling safe device removal.
Signed-off-by: Konstantin Taranov <kotaranov at microsoft.com>
Link: https://patch.msgid.link/1753779618-23629-1-git-send-email-kotaranov@linux.microsoft.com
Signed-off-by: Leon Romanovsky <leon at kernel.org>
(cherry picked from commit 44d69d3cf2e8047c279cbb9708f05e2c43e33234)
Signed-off-by: John Cabaj <john.cabaj at canonical.com>
---
drivers/infiniband/hw/mana/cq.c | 26 ++++++++++++++++++++++++++
drivers/infiniband/hw/mana/device.c | 3 +++
drivers/infiniband/hw/mana/mana_ib.h | 3 +++
3 files changed, 32 insertions(+)
diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c
index 54c71e82ce18..c8a66432dc95 100644
--- a/drivers/infiniband/hw/mana/cq.c
+++ b/drivers/infiniband/hw/mana/cq.c
@@ -290,6 +290,32 @@ static int mana_process_completions(struct mana_ib_cq *cq, int nwc, struct ib_wc
return wc_index;
}
+void mana_drain_gsi_sqs(struct mana_ib_dev *mdev)
+{
+ struct mana_ib_qp *qp = mana_get_qp_ref(mdev, MANA_GSI_QPN, false);
+ struct ud_sq_shadow_wqe *shadow_wqe;
+ struct mana_ib_cq *cq;
+ unsigned long flags;
+
+ if (!qp)
+ return;
+
+ cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq);
+
+ spin_lock_irqsave(&cq->cq_lock, flags);
+ while ((shadow_wqe = shadow_queue_get_next_to_complete(&qp->shadow_sq))
+ != NULL) {
+ shadow_wqe->header.error_code = IB_WC_GENERAL_ERR;
+ shadow_queue_advance_next_to_complete(&qp->shadow_sq);
+ }
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
+
+ if (cq->ibcq.comp_handler)
+ cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
+
+ mana_put_qp_ref(qp);
+}
+
int mana_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
{
struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c
index b64a41e10df7..267f129dcafa 100644
--- a/drivers/infiniband/hw/mana/device.c
+++ b/drivers/infiniband/hw/mana/device.c
@@ -224,6 +224,9 @@ static void mana_ib_remove(struct auxiliary_device *adev)
{
struct mana_ib_dev *dev = dev_get_drvdata(&adev->dev);
+ if (mana_ib_is_rnic(dev))
+ mana_drain_gsi_sqs(dev);
+
ib_unregister_device(&dev->ib_dev);
dma_pool_destroy(dev->av_pool);
if (mana_ib_is_rnic(dev)) {
diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h
index 2207c30cb727..52a542a70ffb 100644
--- a/drivers/infiniband/hw/mana/mana_ib.h
+++ b/drivers/infiniband/hw/mana/mana_ib.h
@@ -43,6 +43,8 @@
*/
#define MANA_AV_BUFFER_SIZE 64
+#define MANA_GSI_QPN (1)
+
struct mana_ib_adapter_caps {
u32 max_sq_id;
u32 max_rq_id;
@@ -715,6 +717,7 @@ int mana_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
int mana_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
const struct ib_send_wr **bad_wr);
+void mana_drain_gsi_sqs(struct mana_ib_dev *mdev);
int mana_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int mana_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
--
2.43.0
More information about the kernel-team
mailing list