[SRU][N:linux-bluefield][PATCH 1/1] UBUNTU: SAUCE: Revert "netfilter: conntrack: rework offload nf_conn timeout extension logic"

Alessio Faina alessio.faina at canonical.com
Wed Apr 29 13:57:20 UTC 2026


BugLink: https://bugs.launchpad.net/bugs/2150645

This reverts commit def3e30c59ebb7f01df9d8526978479f2c9b7b7f.

This commit needs to be reverted as Nvidia reported a massive regression
in performance.

Signed-off-by: Alessio Faina <alessio.faina at canonical.com>
---
 include/net/netfilter/nf_conntrack.h |  10 +++
 net/netfilter/nf_conntrack_core.c    |   6 ++
 net/netfilter/nf_flow_table_core.c   | 105 +--------------------------
 3 files changed, 18 insertions(+), 103 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index ca26274196b9..a85051121af8 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -323,6 +323,16 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct)
 
 #define	NF_CT_DAY	(86400 * HZ)
 
+/* Set an arbitrary timeout large enough not to ever expire, this save
+ * us a check for the IPS_OFFLOAD_BIT from the packet path via
+ * nf_ct_is_expired().
+ */
+static inline void nf_ct_offload_timeout(struct nf_conn *ct)
+{
+	if (nf_ct_expires(ct) < NF_CT_DAY / 2)
+		WRITE_ONCE(ct->timeout, nfct_time_stamp + NF_CT_DAY);
+}
+
 struct kernel_param;
 
 int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index fd0f6397da9d..299987f306c6 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1514,6 +1514,12 @@ static void gc_worker(struct work_struct *work)
 
 			tmp = nf_ct_tuplehash_to_ctrack(h);
 
+			if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
+				nf_ct_offload_timeout(tmp);
+				if (!nf_conntrack_max95)
+					continue;
+			}
+
 			if (expired_count > GC_SCAN_EXPIRED_MAX) {
 				rcu_read_unlock();
 
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 96f29f80d1bd..5c1ff07eaee0 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -294,7 +294,7 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
 		return err;
 	}
 
-	nf_ct_refresh(flow->ct, NF_CT_DAY);
+	nf_ct_offload_timeout(flow->ct);
 
 	if (nf_flowtable_hw_offload(flow_table)) {
 		__set_bit(NF_FLOW_HW, &flow->flags);
@@ -414,116 +414,15 @@ static bool nf_flow_custom_gc(struct nf_flowtable *flow_table,
 	return flow_table->type->gc && flow_table->type->gc(flow);
 }
 
-/**
- * nf_flow_table_tcp_timeout() - new timeout of offloaded tcp entry
- * @ct:		Flowtable offloaded tcp ct
- *
- * Return: number of seconds when ct entry should expire.
- */
-static u32 nf_flow_table_tcp_timeout(const struct nf_conn *ct)
-{
-	u8 state = READ_ONCE(ct->proto.tcp.state);
-
-	switch (state) {
-	case TCP_CONNTRACK_SYN_SENT:
-	case TCP_CONNTRACK_SYN_RECV:
-		return 0;
-	case TCP_CONNTRACK_ESTABLISHED:
-		return NF_CT_DAY;
-	case TCP_CONNTRACK_FIN_WAIT:
-	case TCP_CONNTRACK_CLOSE_WAIT:
-	case TCP_CONNTRACK_LAST_ACK:
-	case TCP_CONNTRACK_TIME_WAIT:
-		return 5 * 60 * HZ;
-	case TCP_CONNTRACK_CLOSE:
-		return 0;
-	}
-
-	return 0;
-}
-
-/**
- * nf_flow_table_extend_ct_timeout() - Extend ct timeout of offloaded conntrack entry
- * @ct:		Flowtable offloaded ct
- *
- * Datapath lookups in the conntrack table will evict nf_conn entries
- * if they have expired.
- *
- * Once nf_conn entries have been offloaded, nf_conntrack might not see any
- * packets anymore.  Thus ct->timeout is no longer refreshed and ct can
- * be evicted.
- *
- * To avoid the need for an additional check on the offload bit for every
- * packet processed via nf_conntrack_in(), set an arbitrary timeout large
- * enough not to ever expire, this save us a check for the IPS_OFFLOAD_BIT
- * from the packet path via nf_ct_is_expired().
- */
-static void nf_flow_table_extend_ct_timeout(struct nf_conn *ct)
-{
-	static const u32 min_timeout = 5 * 60 * HZ;
-	u32 expires = nf_ct_expires(ct);
-
-	/* normal case: large enough timeout, nothing to do. */
-	if (likely(expires >= min_timeout))
-		return;
-
-	/* must check offload bit after this, we do not hold any locks.
-	 * flowtable and ct entries could have been removed on another CPU.
-	 */
-	if (!refcount_inc_not_zero(&ct->ct_general.use))
-		return;
-
-	/* load ct->status after refcount increase */
-	smp_acquire__after_ctrl_dep();
-
-	if (nf_ct_is_confirmed(ct) &&
-	    test_bit(IPS_OFFLOAD_BIT, &ct->status)) {
-		u8 l4proto = nf_ct_protonum(ct);
-		u32 new_timeout = true;
-
-		switch (l4proto) {
-		case IPPROTO_UDP:
-			new_timeout = NF_CT_DAY;
-			break;
-		case IPPROTO_TCP:
-			new_timeout = nf_flow_table_tcp_timeout(ct);
-			break;
-		default:
-			WARN_ON_ONCE(1);
-			break;
-		}
-
-		/* Update to ct->timeout from nf_conntrack happens
-		 * without holding ct->lock.
-		 *
-		 * Use cmpxchg to ensure timeout extension doesn't
-		 * happen when we race with conntrack datapath.
-		 *
-		 * The inverse -- datapath updating ->timeout right
-		 * after this -- is fine, datapath is authoritative.
-		 */
-		if (new_timeout) {
-			new_timeout += nfct_time_stamp;
-			cmpxchg(&ct->timeout, expires, new_timeout);
-		}
-	}
-
-	nf_ct_put(ct);
-}
-
 static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table,
 				    struct flow_offload *flow, void *data)
 {
-	bool teardown = test_bit(NF_FLOW_TEARDOWN, &flow->flags);
-
 	if (nf_flow_has_expired(flow) ||
 	    nf_ct_is_dying(flow->ct) ||
 	    nf_flow_custom_gc(flow_table, flow))
 		flow_offload_teardown(flow);
-	else if (!teardown)
-		nf_flow_table_extend_ct_timeout(flow->ct);
 
-	if (teardown) {
+	if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
 		if (test_bit(NF_FLOW_HW, &flow->flags)) {
 			if (!test_bit(NF_FLOW_HW_DYING, &flow->flags))
 				nf_flow_offload_del(flow_table, flow);
-- 
2.43.0




More information about the kernel-team mailing list