netfilter: ctnetlink: fix refcount leak on table dump

[ Upstream commit de788b2e62 ]

There is a reference count leak in ctnetlink_dump_table():
      if (res < 0) {
                nf_conntrack_get(&ct->ct_general); // HERE
                cb->args[1] = (unsigned long)ct;
                ...

While its very unlikely, its possible that ct == last.
If this happens, then the refcount of ct was already incremented.
This 2nd increment is never undone.

This prevents the conntrack object from being released, which in turn
keeps prevents cnet->count from dropping back to 0.

This will then block the netns dismantle (or conntrack rmmod) as
nf_conntrack_cleanup_net_list() will wait forever.

This can be reproduced by running conntrack_resize.sh selftest in a loop.
It takes ~20 minutes for me on a preemptible kernel on average before
I see a runaway kworker spinning in nf_conntrack_cleanup_net_list.

One fix would to change this to:
        if (res < 0) {
		if (ct != last)
	                nf_conntrack_get(&ct->ct_general);

But this reference counting isn't needed in the first place.
We can just store a cookie value instead.

A followup patch will do the same for ctnetlink_exp_dump_table,
it looks to me as if this has the same problem and like
ctnetlink_dump_table, we only need a 'skip hint', not the actual
object so we can apply the same cookie strategy there as well.

Fixes: d205dc4079 ("[NETFILTER]: ctnetlink: fix deadlock in table dumping")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
Florian Westphal 2025-08-01 17:25:08 +02:00 committed by Greg Kroah-Hartman
parent 800c70fce9
commit 41462f4cfc

View File

@ -859,8 +859,6 @@ errout:
static int ctnetlink_done(struct netlink_callback *cb)
{
if (cb->args[1])
nf_ct_put((struct nf_conn *)cb->args[1]);
kfree(cb->data);
return 0;
}
@ -1175,19 +1173,26 @@ ignore_entry:
return 0;
}
static unsigned long ctnetlink_get_id(const struct nf_conn *ct)
{
unsigned long id = nf_ct_get_id(ct);
return id ? id : 1;
}
static int
ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
{
unsigned int flags = cb->data ? NLM_F_DUMP_FILTERED : 0;
struct net *net = sock_net(skb->sk);
struct nf_conn *ct, *last;
unsigned long last_id = cb->args[1];
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
struct nf_conn *nf_ct_evict[8];
struct nf_conn *ct;
int res, i;
spinlock_t *lockp;
last = (struct nf_conn *)cb->args[1];
i = 0;
local_bh_disable();
@ -1224,7 +1229,7 @@ restart:
continue;
if (cb->args[1]) {
if (ct != last)
if (ctnetlink_get_id(ct) != last_id)
continue;
cb->args[1] = 0;
}
@ -1237,8 +1242,7 @@ restart:
NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
ct, true, flags);
if (res < 0) {
nf_conntrack_get(&ct->ct_general);
cb->args[1] = (unsigned long)ct;
cb->args[1] = ctnetlink_get_id(ct);
spin_unlock(lockp);
goto out;
}
@ -1251,12 +1255,10 @@ restart:
}
out:
local_bh_enable();
if (last) {
if (last_id) {
/* nf ct hash resize happened, now clear the leftover. */
if ((struct nf_conn *)cb->args[1] == last)
if (cb->args[1] == last_id)
cb->args[1] = 0;
nf_ct_put(last);
}
while (i) {