[SRU][J:linux-azure][PATCH] net: mana: Allow variable size indirection table
John Cabaj
john.cabaj at canonical.com
Tue Jun 24 19:42:56 UTC 2025
From: Shradha Gupta <shradhagupta at linux.microsoft.com>
BugLink: https://bugs.launchpad.net/bugs/2100902
Allow variable size indirection table allocation in MANA instead
of using a constant value MANA_INDIRECT_TABLE_SIZE.
The size is now derived from the MANA_QUERY_VPORT_CONFIG and the
indirection table is allocated dynamically.
Signed-off-by: Shradha Gupta <shradhagupta at linux.microsoft.com>
Link: https://lore.kernel.org/r/1718015319-9609-1-git-send-email-shradhagupta@linux.microsoft.com
Reviewed-by: Dexuan Cui <decui at microsoft.com>
Reviewed-by: Haiyang Zhang <haiyangz at microsoft.com>
Signed-off-by: Leon Romanovsky <leon at kernel.org>
(backported from commit 7fc45cb68696c7213c484ec81892bc8a986fde52)
[john-cabaj: upstream 29b8e13a8b4c: "RDMA/mana_ib: Prefer struct_size
over open coded arithmetic" and a68292eb4316: "net: mana: Avoid
open coded arithmetic" made changes to wrap some calculations
in a safer function, but are not easily backported. Additional changes from
fb6e30a72539: "net: ethtool: pass a pointer to parameters to
get/set_rxfh ethtool ops" that are not required. Leaving these out and
adjusting for other context changes]
Signed-off-by: John Cabaj <john.cabaj at canonical.com>
---
drivers/infiniband/hw/mana/qp.c | 10 +--
drivers/net/ethernet/microsoft/mana/mana_en.c | 85 ++++++++++++++++---
.../ethernet/microsoft/mana/mana_ethtool.c | 23 +++--
include/net/mana/gdma.h | 4 +-
include/net/mana/mana.h | 9 +-
5 files changed, 100 insertions(+), 31 deletions(-)
diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c
index 40a33fdde84c..b8d25e6fc370 100644
--- a/drivers/infiniband/hw/mana/qp.c
+++ b/drivers/infiniband/hw/mana/qp.c
@@ -25,7 +25,7 @@ static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev,
mdev = &gc->mana;
req_buf_size =
- sizeof(*req) + sizeof(mana_handle_t) * MANA_INDIRECT_TABLE_SIZE;
+ sizeof(*req) + sizeof(mana_handle_t) * MANA_INDIRECT_TABLE_DEF_SIZE;
req = kzalloc(req_buf_size, GFP_KERNEL);
if (!req)
return -ENOMEM;
@@ -43,17 +43,17 @@ static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev,
if (log_ind_tbl_size)
req->rss_enable = true;
- req->num_indir_entries = MANA_INDIRECT_TABLE_SIZE;
+ req->num_indir_entries = MANA_INDIRECT_TABLE_DEF_SIZE;
req->indir_tab_offset = sizeof(*req);
req->update_indir_tab = true;
req_indir_tab = (mana_handle_t *)(req + 1);
/* The ind table passed to the hardware must have
- * MANA_INDIRECT_TABLE_SIZE entries. Adjust the verb
+ * MANA_INDIRECT_TABLE_DEF_SIZE entries. Adjust the verb
* ind_table to MANA_INDIRECT_TABLE_SIZE if required
*/
ibdev_dbg(&dev->ib_dev, "ind table size %u\n", 1 << log_ind_tbl_size);
- for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) {
+ for (i = 0; i < MANA_INDIRECT_TABLE_DEF_SIZE; i++) {
req_indir_tab[i] = ind_table[i % (1 << log_ind_tbl_size)];
ibdev_dbg(&dev->ib_dev, "index %u handle 0x%llx\n", i,
req_indir_tab[i]);
@@ -142,7 +142,7 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
}
ind_tbl_size = 1 << ind_tbl->log_ind_tbl_size;
- if (ind_tbl_size > MANA_INDIRECT_TABLE_SIZE) {
+ if (ind_tbl_size > MANA_INDIRECT_TABLE_DEF_SIZE) {
ibdev_dbg(&mdev->ib_dev,
"Indirect table size %d exceeding limit\n",
ind_tbl_size);
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 6267f98d5f90..6301e2199f2c 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -495,7 +495,7 @@ static int mana_get_tx_queue(struct net_device *ndev, struct sk_buff *skb,
struct sock *sk = skb->sk;
int txq;
- txq = apc->indir_table[hash & MANA_INDIRECT_TABLE_MASK];
+ txq = apc->indir_table[hash & (apc->indir_table_sz - 1)];
if (txq != old_q && sk && sk_fullsock(sk) &&
rcu_access_pointer(sk->sk_dst_cache))
@@ -745,6 +745,13 @@ static void mana_cleanup_port_context(struct mana_port_context *apc)
apc->rxqs = NULL;
}
+static void mana_cleanup_indir_table(struct mana_port_context *apc)
+{
+ apc->indir_table_sz = 0;
+ kfree(apc->indir_table);
+ kfree(apc->rxobj_table);
+}
+
static int mana_init_port_context(struct mana_port_context *apc)
{
apc->rxqs = kcalloc(apc->num_queues, sizeof(struct mana_rxq *),
@@ -988,7 +995,16 @@ static int mana_query_vport_cfg(struct mana_port_context *apc, u32 vport_index,
*max_sq = resp.max_num_sq;
*max_rq = resp.max_num_rq;
- *num_indir_entry = resp.num_indirection_ent;
+ if (resp.num_indirection_ent > 0 &&
+ resp.num_indirection_ent <= MANA_INDIRECT_TABLE_MAX_SIZE &&
+ is_power_of_2(resp.num_indirection_ent)) {
+ *num_indir_entry = resp.num_indirection_ent;
+ } else {
+ netdev_warn(apc->ndev,
+ "Setting indirection table size to default %d for vPort %d\n",
+ MANA_INDIRECT_TABLE_DEF_SIZE, apc->port_idx);
+ *num_indir_entry = MANA_INDIRECT_TABLE_DEF_SIZE;
+ }
apc->port_handle = resp.vport;
ether_addr_copy(apc->mac_addr, resp.mac_addr);
@@ -1080,7 +1096,6 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc,
bool update_default_rxobj, bool update_key,
bool update_tab)
{
- u16 num_entries = MANA_INDIRECT_TABLE_SIZE;
struct mana_cfg_rx_steer_req *req = NULL;
struct mana_cfg_rx_steer_resp resp = {};
struct net_device *ndev = apc->ndev;
@@ -1088,7 +1103,7 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc,
u32 req_buf_size;
int err;
- req_buf_size = sizeof(*req) + sizeof(mana_handle_t) * num_entries;
+ req_buf_size = sizeof(*req) + sizeof(mana_handle_t) * apc->indir_table_sz;
req = kzalloc(req_buf_size, GFP_KERNEL);
if (!req)
return -ENOMEM;
@@ -1097,7 +1112,7 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc,
sizeof(resp));
req->vport = apc->port_handle;
- req->num_indir_entries = num_entries;
+ req->num_indir_entries = apc->indir_table_sz;
req->indir_tab_offset = sizeof(*req);
req->rx_enable = rx;
req->rss_enable = apc->rss_state;
@@ -1136,7 +1151,7 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc,
}
netdev_info(ndev, "Configured steering vPort %llu entries %u\n",
- apc->port_handle, num_entries);
+ apc->port_handle, apc->indir_table_sz);
out:
kfree(req);
return err;
@@ -2454,11 +2469,33 @@ static int mana_create_vport(struct mana_port_context *apc,
return mana_create_txq(apc, net);
}
+static int mana_rss_table_alloc(struct mana_port_context *apc)
+{
+ if (!apc->indir_table_sz) {
+ netdev_err(apc->ndev,
+ "Indirection table size not set for vPort %d\n",
+ apc->port_idx);
+ return -EINVAL;
+ }
+
+ apc->indir_table = kcalloc(apc->indir_table_sz, sizeof(u32), GFP_KERNEL);
+ if (!apc->indir_table)
+ return -ENOMEM;
+
+ apc->rxobj_table = kcalloc(apc->indir_table_sz, sizeof(mana_handle_t), GFP_KERNEL);
+ if (!apc->rxobj_table) {
+ kfree(apc->indir_table);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
static void mana_rss_table_init(struct mana_port_context *apc)
{
int i;
- for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++)
+ for (i = 0; i < apc->indir_table_sz; i++)
apc->indir_table[i] =
ethtool_rxfh_indir_default(i, apc->num_queues);
}
@@ -2471,7 +2508,7 @@ int mana_config_rss(struct mana_port_context *apc, enum TRI_STATE rx,
int i;
if (update_tab) {
- for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) {
+ for (i = 0; i < apc->indir_table_sz; i++) {
queue_idx = apc->indir_table[i];
apc->rxobj_table[i] = apc->rxqs[queue_idx]->rxobj;
}
@@ -2532,7 +2569,6 @@ static int mana_init_port(struct net_device *ndev)
struct gdma_dev *gd = apc->ac->gdma_dev;
u32 max_txq, max_rxq, max_queues;
int port_idx = apc->port_idx;
- u32 num_indirect_entries;
struct gdma_context *gc;
char vport[32];
int err;
@@ -2544,7 +2580,7 @@ static int mana_init_port(struct net_device *ndev)
gc = gd->gdma_context;
err = mana_query_vport_cfg(apc, port_idx, &max_txq, &max_rxq,
- &num_indirect_entries);
+ &apc->indir_table_sz);
if (err) {
netdev_err(ndev, "Failed to query info for vPort %d\n",
port_idx);
@@ -2796,6 +2832,10 @@ static int mana_probe_port(struct mana_context *ac, int port_idx,
if (err)
goto free_net;
+ err = mana_rss_table_alloc(apc);
+ if (err)
+ goto reset_apc;
+
netdev_lockdep_set_classes(ndev);
ndev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
@@ -2809,11 +2849,13 @@ static int mana_probe_port(struct mana_context *ac, int port_idx,
err = register_netdev(ndev);
if (err) {
netdev_err(ndev, "Unable to register netdev.\n");
- goto reset_apc;
+ goto free_indir;
}
return 0;
+free_indir:
+ mana_cleanup_indir_table(apc);
reset_apc:
kfree(apc->rxqs);
apc->rxqs = NULL;
@@ -2944,16 +2986,30 @@ int mana_probe(struct gdma_dev *gd, bool resuming)
if (!resuming) {
for (i = 0; i < ac->num_ports; i++) {
err = mana_probe_port(ac, i, &ac->ports[i]);
- if (err)
+ /* we log the port for which the probe failed and stop
+ * probes for subsequent ports.
+ * Note that we keep running ports, for which the probes
+ * were successful, unless add_adev fails too
+ */
+ if (err) {
+ dev_err(dev, "Probe Failed for port %d\n", i);
break;
+ }
}
} else {
for (i = 0; i < ac->num_ports; i++) {
rtnl_lock();
err = mana_attach(ac->ports[i]);
rtnl_unlock();
- if (err)
+ /* we log the port for which the attach failed and stop
+ * attach for subsequent ports
+ * Note that we keep running ports, for which the attach
+ * were successful, unless add_adev fails too
+ */
+ if (err) {
+ dev_err(dev, "Attach Failed for port %d\n", i);
break;
+ }
}
}
@@ -2969,6 +3025,7 @@ void mana_remove(struct gdma_dev *gd, bool suspending)
{
struct gdma_context *gc = gd->gdma_context;
struct mana_context *ac = gd->driver_data;
+ struct mana_port_context *apc;
struct device *dev = gc->dev;
struct net_device *ndev;
int err;
@@ -2980,6 +3037,7 @@ void mana_remove(struct gdma_dev *gd, bool suspending)
for (i = 0; i < ac->num_ports; i++) {
ndev = ac->ports[i];
+ apc = netdev_priv(ndev);
if (!ndev) {
if (i == 0)
dev_err(dev, "No net device to remove\n");
@@ -3003,6 +3061,7 @@ void mana_remove(struct gdma_dev *gd, bool suspending)
}
unregister_netdevice(ndev);
+ mana_cleanup_indir_table(apc);
rtnl_unlock();
diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
index cf6d704542f2..ccd338315458 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
@@ -205,7 +205,9 @@ static u32 mana_get_rxfh_key_size(struct net_device *ndev)
static u32 mana_rss_indir_size(struct net_device *ndev)
{
- return MANA_INDIRECT_TABLE_SIZE;
+ struct mana_port_context *apc = netdev_priv(ndev);
+
+ return apc->indir_table_sz;
}
static int mana_get_rxfh(struct net_device *ndev, u32 *indir, u8 *key,
@@ -218,7 +220,7 @@ static int mana_get_rxfh(struct net_device *ndev, u32 *indir, u8 *key,
*hfunc = ETH_RSS_HASH_TOP; /* Toeplitz */
if (indir) {
- for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++)
+ for (i = 0; i < apc->indir_table_sz; i++)
indir[i] = apc->indir_table[i];
}
@@ -233,8 +235,8 @@ static int mana_set_rxfh(struct net_device *ndev, const u32 *indir,
{
struct mana_port_context *apc = netdev_priv(ndev);
bool update_hash = false, update_table = false;
- u32 save_table[MANA_INDIRECT_TABLE_SIZE];
u8 save_key[MANA_HASH_KEY_SIZE];
+ u32 *save_table;
int i, err;
if (!apc->port_is_up)
@@ -244,12 +246,14 @@ static int mana_set_rxfh(struct net_device *ndev, const u32 *indir,
return -EOPNOTSUPP;
if (indir) {
- for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++)
- if (indir[i] >= apc->num_queues)
- return -EINVAL;
+ for (i = 0; i < apc->indir_table_sz; i++)
+ if (indir[i] >= apc->num_queues) {
+ err = -EINVAL;
+ goto cleanup;
+ }
update_table = true;
- for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) {
+ for (i = 0; i < apc->indir_table_sz; i++) {
save_table[i] = apc->indir_table[i];
apc->indir_table[i] = indir[i];
}
@@ -265,7 +269,7 @@ static int mana_set_rxfh(struct net_device *ndev, const u32 *indir,
if (err) { /* recover to original values */
if (update_table) {
- for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++)
+ for (i = 0; i < apc->indir_table_sz; i++)
apc->indir_table[i] = save_table[i];
}
@@ -275,6 +279,9 @@ static int mana_set_rxfh(struct net_device *ndev, const u32 *indir,
mana_config_rss(apc, TRI_STATE_TRUE, update_hash, update_table);
}
+cleanup:
+ kfree(save_table);
+
return err;
}
diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index 32a3901d4d5c..56e0cd53239d 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -550,11 +550,13 @@ enum {
*/
#define GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX BIT(2)
#define GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG BIT(3)
+#define GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT BIT(5)
#define GDMA_DRV_CAP_FLAGS1 \
(GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \
GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \
- GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG)
+ GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG | \
+ GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT)
#define GDMA_DRV_CAP_FLAGS2 0
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 19df032cd958..e8d0ee72aa40 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -28,8 +28,8 @@ enum TRI_STATE {
};
/* Number of entries for hardware indirection table must be in power of 2 */
-#define MANA_INDIRECT_TABLE_SIZE 64
-#define MANA_INDIRECT_TABLE_MASK (MANA_INDIRECT_TABLE_SIZE - 1)
+#define MANA_INDIRECT_TABLE_MAX_SIZE 512
+#define MANA_INDIRECT_TABLE_DEF_SIZE 64
/* The Toeplitz hash key's length in bytes: should be multiple of 8 */
#define MANA_HASH_KEY_SIZE 40
@@ -408,10 +408,11 @@ struct mana_port_context {
struct mana_tx_qp *tx_qp;
/* Indirection Table for RX & TX. The values are queue indexes */
- u32 indir_table[MANA_INDIRECT_TABLE_SIZE];
+ u32 *indir_table;
+ u32 indir_table_sz;
/* Indirection table containing RxObject Handles */
- mana_handle_t rxobj_table[MANA_INDIRECT_TABLE_SIZE];
+ mana_handle_t *rxobj_table;
/* Hash key used by the NIC */
u8 hashkey[MANA_HASH_KEY_SIZE];
--
2.43.0
More information about the kernel-team
mailing list