diff options
author | Jozsef Kadlecsik <kadlec@netfilter.org> | 2024-01-29 12:30:23 +0100 |
---|---|---|
committer | Jozsef Kadlecsik <kadlec@netfilter.org> | 2024-01-29 12:30:23 +0100 |
commit | 148fad4dcc41bd07b52ed3ccca5f40765e9cf692 (patch) | |
tree | 8dabf3d719bfc7875f6c71254aca764821f27025 /kernel/net/netfilter/ipset/ip_set_core.c | |
parent | 0378d91222c1aba5a766c3d745574ed1c59cbf8f (diff) |
netfilter: ipset: fix race condition between swap/destroy and kernel side add/del/test v4
The patch "netfilter: ipset: fix race condition between swap/destroy
and kernel side add/del/test", commit 28628fa9 fixes a race condition.
But the synchronize_rcu() added to the swap function unnecessarily slows
it down: it can safely be moved to destroy and use call_rcu() instead.
Eric Dumazet pointed out that simply calling the destroy functions as
rcu callback does not work: sets with timeout use garbage collectors
which need cancelling at destroy which can wait. Therefore the destroy
functions are split into two: cancelling garbage collectors safely at
executing the command received by netlink and moving the remaining
part only into the rcu callback.
Link: https://lore.kernel.org/lkml/C0829B10-EAA6-4809-874E-E1E9C05A8D84@automattic.com/
Fixes: 28628fa952fe ("netfilter: ipset: fix race condition between swap/destroy and kernel side add/del/test")
Reported-by: Ale Crismani <ale.crismani@automattic.com>
Reported-by: David Wang <00107082@163.com>
Tested-by: David Wang <00107082@163.com>
Signed-off-by: Jozsef Kadlecsik <kadlec@netfilter.org>
Diffstat (limited to 'kernel/net/netfilter/ipset/ip_set_core.c')
-rw-r--r-- | kernel/net/netfilter/ipset/ip_set_core.c | 38 |
1 files changed, 28 insertions, 10 deletions
diff --git a/kernel/net/netfilter/ipset/ip_set_core.c b/kernel/net/netfilter/ipset/ip_set_core.c index 3a93fd5..126312d 100644 --- a/kernel/net/netfilter/ipset/ip_set_core.c +++ b/kernel/net/netfilter/ipset/ip_set_core.c @@ -1192,6 +1192,14 @@ ip_set_destroy_set(struct ip_set *set) kfree(set); } +static void +ip_set_destroy_set_rcu(struct rcu_head *head) +{ + struct ip_set *set = container_of(head, struct ip_set, rcu); + + ip_set_destroy_set(set); +} + static int IPSET_CBFN(ip_set_destroy, struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, @@ -1207,9 +1215,6 @@ IPSET_CBFN(ip_set_destroy, struct net *net, struct sock *ctnl, if (unlikely(protocol_min_failed(attr))) return -IPSET_ERR_PROTOCOL; - /* Must wait for flush to be really finished in list:set */ - rcu_barrier(); - /* Commands are serialized and references are * protected by the ip_set_ref_lock. * External systems (i.e. xt_set) must call @@ -1220,8 +1225,10 @@ IPSET_CBFN(ip_set_destroy, struct net *net, struct sock *ctnl, * counter, so if it's already zero, we can proceed * without holding the lock. */ - read_lock_bh(&ip_set_ref_lock); if (!attr[IPSET_ATTR_SETNAME]) { + /* Must wait for flush to be really finished in list:set */ + rcu_barrier(); + read_lock_bh(&ip_set_ref_lock); for (i = 0; i < inst->ip_set_max; i++) { s = ip_set(inst, i); if (s && (s->ref || s->ref_netlink)) { @@ -1235,6 +1242,8 @@ IPSET_CBFN(ip_set_destroy, struct net *net, struct sock *ctnl, s = ip_set(inst, i); if (s) { ip_set(inst, i) = NULL; + /* Must cancel garbage collectors */ + s->variant->cancel_gc(s); ip_set_destroy_set(s); } } @@ -1242,6 +1251,9 @@ IPSET_CBFN(ip_set_destroy, struct net *net, struct sock *ctnl, inst->is_destroyed = false; } else { u32 flags = flag_exist(INFO_NLH(info, nlh)); + u16 features = 0; + + read_lock_bh(&ip_set_ref_lock); s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), &i); if (!s) { @@ -1252,10 +1264,16 @@ IPSET_CBFN(ip_set_destroy, struct net *net, struct sock *ctnl, ret = -IPSET_ERR_BUSY; goto out; } + features = s->type->features; ip_set(inst, i) = NULL; read_unlock_bh(&ip_set_ref_lock); - - ip_set_destroy_set(s); + if (features & IPSET_TYPE_NAME) { + /* Must wait for flush to be really finished */ + rcu_barrier(); + } + /* Must cancel garbage collectors */ + s->variant->cancel_gc(s); + call_rcu(&s->rcu, ip_set_destroy_set_rcu); } return 0; out: @@ -1420,9 +1438,6 @@ IPSET_CBFN(ip_set_swap, struct net *net, struct sock *ctnl, ip_set(inst, to_id) = from; write_unlock_bh(&ip_set_ref_lock); - /* Make sure all readers of the old set pointers are completed. */ - synchronize_rcu(); - return 0; } @@ -2540,8 +2555,11 @@ ip_set_fini(void) { nf_unregister_sockopt(&so_set); nfnetlink_subsys_unregister(&ip_set_netlink_subsys); - UNREGISTER_PERNET_SUBSYS(&ip_set_net_ops); + + /* Wait for call_rcu() in destroy */ + rcu_barrier(); + pr_debug("these are the famous last words\n"); } |