[SRU][N:linux-bluefield][PATCH 1/1] UBUNTU: SAUCE: Revert "netfilter: conntrack: rework offload nf_conn timeout extension logic"
Alessio Faina
alessio.faina at canonical.com
Wed Apr 29 13:57:20 UTC 2026
BugLink: https://bugs.launchpad.net/bugs/2150645
This reverts commit def3e30c59ebb7f01df9d8526978479f2c9b7b7f.
This commit needs to be reverted as Nvidia reported a massive regression
in performance.
Signed-off-by: Alessio Faina <alessio.faina at canonical.com>
---
include/net/netfilter/nf_conntrack.h | 10 +++
net/netfilter/nf_conntrack_core.c | 6 ++
net/netfilter/nf_flow_table_core.c | 105 +--------------------------
3 files changed, 18 insertions(+), 103 deletions(-)
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index ca26274196b9..a85051121af8 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -323,6 +323,16 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct)
#define NF_CT_DAY (86400 * HZ)
+/* Set an arbitrary timeout large enough not to ever expire, this save
+ * us a check for the IPS_OFFLOAD_BIT from the packet path via
+ * nf_ct_is_expired().
+ */
+static inline void nf_ct_offload_timeout(struct nf_conn *ct)
+{
+ if (nf_ct_expires(ct) < NF_CT_DAY / 2)
+ WRITE_ONCE(ct->timeout, nfct_time_stamp + NF_CT_DAY);
+}
+
struct kernel_param;
int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index fd0f6397da9d..299987f306c6 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1514,6 +1514,12 @@ static void gc_worker(struct work_struct *work)
tmp = nf_ct_tuplehash_to_ctrack(h);
+ if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
+ nf_ct_offload_timeout(tmp);
+ if (!nf_conntrack_max95)
+ continue;
+ }
+
if (expired_count > GC_SCAN_EXPIRED_MAX) {
rcu_read_unlock();
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 96f29f80d1bd..5c1ff07eaee0 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -294,7 +294,7 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
return err;
}
- nf_ct_refresh(flow->ct, NF_CT_DAY);
+ nf_ct_offload_timeout(flow->ct);
if (nf_flowtable_hw_offload(flow_table)) {
__set_bit(NF_FLOW_HW, &flow->flags);
@@ -414,116 +414,15 @@ static bool nf_flow_custom_gc(struct nf_flowtable *flow_table,
return flow_table->type->gc && flow_table->type->gc(flow);
}
-/**
- * nf_flow_table_tcp_timeout() - new timeout of offloaded tcp entry
- * @ct: Flowtable offloaded tcp ct
- *
- * Return: number of seconds when ct entry should expire.
- */
-static u32 nf_flow_table_tcp_timeout(const struct nf_conn *ct)
-{
- u8 state = READ_ONCE(ct->proto.tcp.state);
-
- switch (state) {
- case TCP_CONNTRACK_SYN_SENT:
- case TCP_CONNTRACK_SYN_RECV:
- return 0;
- case TCP_CONNTRACK_ESTABLISHED:
- return NF_CT_DAY;
- case TCP_CONNTRACK_FIN_WAIT:
- case TCP_CONNTRACK_CLOSE_WAIT:
- case TCP_CONNTRACK_LAST_ACK:
- case TCP_CONNTRACK_TIME_WAIT:
- return 5 * 60 * HZ;
- case TCP_CONNTRACK_CLOSE:
- return 0;
- }
-
- return 0;
-}
-
-/**
- * nf_flow_table_extend_ct_timeout() - Extend ct timeout of offloaded conntrack entry
- * @ct: Flowtable offloaded ct
- *
- * Datapath lookups in the conntrack table will evict nf_conn entries
- * if they have expired.
- *
- * Once nf_conn entries have been offloaded, nf_conntrack might not see any
- * packets anymore. Thus ct->timeout is no longer refreshed and ct can
- * be evicted.
- *
- * To avoid the need for an additional check on the offload bit for every
- * packet processed via nf_conntrack_in(), set an arbitrary timeout large
- * enough not to ever expire, this save us a check for the IPS_OFFLOAD_BIT
- * from the packet path via nf_ct_is_expired().
- */
-static void nf_flow_table_extend_ct_timeout(struct nf_conn *ct)
-{
- static const u32 min_timeout = 5 * 60 * HZ;
- u32 expires = nf_ct_expires(ct);
-
- /* normal case: large enough timeout, nothing to do. */
- if (likely(expires >= min_timeout))
- return;
-
- /* must check offload bit after this, we do not hold any locks.
- * flowtable and ct entries could have been removed on another CPU.
- */
- if (!refcount_inc_not_zero(&ct->ct_general.use))
- return;
-
- /* load ct->status after refcount increase */
- smp_acquire__after_ctrl_dep();
-
- if (nf_ct_is_confirmed(ct) &&
- test_bit(IPS_OFFLOAD_BIT, &ct->status)) {
- u8 l4proto = nf_ct_protonum(ct);
- u32 new_timeout = true;
-
- switch (l4proto) {
- case IPPROTO_UDP:
- new_timeout = NF_CT_DAY;
- break;
- case IPPROTO_TCP:
- new_timeout = nf_flow_table_tcp_timeout(ct);
- break;
- default:
- WARN_ON_ONCE(1);
- break;
- }
-
- /* Update to ct->timeout from nf_conntrack happens
- * without holding ct->lock.
- *
- * Use cmpxchg to ensure timeout extension doesn't
- * happen when we race with conntrack datapath.
- *
- * The inverse -- datapath updating ->timeout right
- * after this -- is fine, datapath is authoritative.
- */
- if (new_timeout) {
- new_timeout += nfct_time_stamp;
- cmpxchg(&ct->timeout, expires, new_timeout);
- }
- }
-
- nf_ct_put(ct);
-}
-
static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table,
struct flow_offload *flow, void *data)
{
- bool teardown = test_bit(NF_FLOW_TEARDOWN, &flow->flags);
-
if (nf_flow_has_expired(flow) ||
nf_ct_is_dying(flow->ct) ||
nf_flow_custom_gc(flow_table, flow))
flow_offload_teardown(flow);
- else if (!teardown)
- nf_flow_table_extend_ct_timeout(flow->ct);
- if (teardown) {
+ if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
if (test_bit(NF_FLOW_HW, &flow->flags)) {
if (!test_bit(NF_FLOW_HW_DYING, &flow->flags))
nf_flow_offload_del(flow_table, flow);
--
2.43.0
More information about the kernel-team
mailing list