summaryrefslogtreecommitdiffstats
path: root/kernel/net/netfilter/ipset/ip_set_core.c
diff options
context:
space:
mode:
authorJozsef Kadlecsik <kadlec@netfilter.org>2024-01-29 12:30:23 +0100
committerJozsef Kadlecsik <kadlec@netfilter.org>2024-01-29 12:30:23 +0100
commit148fad4dcc41bd07b52ed3ccca5f40765e9cf692 (patch)
tree8dabf3d719bfc7875f6c71254aca764821f27025 /kernel/net/netfilter/ipset/ip_set_core.c
parent0378d91222c1aba5a766c3d745574ed1c59cbf8f (diff)
netfilter: ipset: fix race condition between swap/destroy and kernel side add/del/test v4
The patch "netfilter: ipset: fix race condition between swap/destroy and kernel side add/del/test", commit 28628fa9 fixes a race condition. But the synchronize_rcu() added to the swap function unnecessarily slows it down: it can safely be moved to destroy and use call_rcu() instead. Eric Dumazet pointed out that simply calling the destroy functions as rcu callback does not work: sets with timeout use garbage collectors which need cancelling at destroy which can wait. Therefore the destroy functions are split into two: cancelling garbage collectors safely at executing the command received by netlink and moving the remaining part only into the rcu callback. Link: https://lore.kernel.org/lkml/C0829B10-EAA6-4809-874E-E1E9C05A8D84@automattic.com/ Fixes: 28628fa952fe ("netfilter: ipset: fix race condition between swap/destroy and kernel side add/del/test") Reported-by: Ale Crismani <ale.crismani@automattic.com> Reported-by: David Wang <00107082@163.com> Tested-by: David Wang <00107082@163.com> Signed-off-by: Jozsef Kadlecsik <kadlec@netfilter.org>
Diffstat (limited to 'kernel/net/netfilter/ipset/ip_set_core.c')
-rw-r--r--kernel/net/netfilter/ipset/ip_set_core.c38
1 files changed, 28 insertions, 10 deletions
diff --git a/kernel/net/netfilter/ipset/ip_set_core.c b/kernel/net/netfilter/ipset/ip_set_core.c
index 3a93fd5..126312d 100644
--- a/kernel/net/netfilter/ipset/ip_set_core.c
+++ b/kernel/net/netfilter/ipset/ip_set_core.c
@@ -1192,6 +1192,14 @@ ip_set_destroy_set(struct ip_set *set)
kfree(set);
}
+static void
+ip_set_destroy_set_rcu(struct rcu_head *head)
+{
+ struct ip_set *set = container_of(head, struct ip_set, rcu);
+
+ ip_set_destroy_set(set);
+}
+
static int
IPSET_CBFN(ip_set_destroy, struct net *net, struct sock *ctnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
@@ -1207,9 +1215,6 @@ IPSET_CBFN(ip_set_destroy, struct net *net, struct sock *ctnl,
if (unlikely(protocol_min_failed(attr)))
return -IPSET_ERR_PROTOCOL;
- /* Must wait for flush to be really finished in list:set */
- rcu_barrier();
-
/* Commands are serialized and references are
* protected by the ip_set_ref_lock.
* External systems (i.e. xt_set) must call
@@ -1220,8 +1225,10 @@ IPSET_CBFN(ip_set_destroy, struct net *net, struct sock *ctnl,
* counter, so if it's already zero, we can proceed
* without holding the lock.
*/
- read_lock_bh(&ip_set_ref_lock);
if (!attr[IPSET_ATTR_SETNAME]) {
+ /* Must wait for flush to be really finished in list:set */
+ rcu_barrier();
+ read_lock_bh(&ip_set_ref_lock);
for (i = 0; i < inst->ip_set_max; i++) {
s = ip_set(inst, i);
if (s && (s->ref || s->ref_netlink)) {
@@ -1235,6 +1242,8 @@ IPSET_CBFN(ip_set_destroy, struct net *net, struct sock *ctnl,
s = ip_set(inst, i);
if (s) {
ip_set(inst, i) = NULL;
+ /* Must cancel garbage collectors */
+ s->variant->cancel_gc(s);
ip_set_destroy_set(s);
}
}
@@ -1242,6 +1251,9 @@ IPSET_CBFN(ip_set_destroy, struct net *net, struct sock *ctnl,
inst->is_destroyed = false;
} else {
u32 flags = flag_exist(INFO_NLH(info, nlh));
+ u16 features = 0;
+
+ read_lock_bh(&ip_set_ref_lock);
s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
&i);
if (!s) {
@@ -1252,10 +1264,16 @@ IPSET_CBFN(ip_set_destroy, struct net *net, struct sock *ctnl,
ret = -IPSET_ERR_BUSY;
goto out;
}
+ features = s->type->features;
ip_set(inst, i) = NULL;
read_unlock_bh(&ip_set_ref_lock);
-
- ip_set_destroy_set(s);
+ if (features & IPSET_TYPE_NAME) {
+ /* Must wait for flush to be really finished */
+ rcu_barrier();
+ }
+ /* Must cancel garbage collectors */
+ s->variant->cancel_gc(s);
+ call_rcu(&s->rcu, ip_set_destroy_set_rcu);
}
return 0;
out:
@@ -1420,9 +1438,6 @@ IPSET_CBFN(ip_set_swap, struct net *net, struct sock *ctnl,
ip_set(inst, to_id) = from;
write_unlock_bh(&ip_set_ref_lock);
- /* Make sure all readers of the old set pointers are completed. */
- synchronize_rcu();
-
return 0;
}
@@ -2540,8 +2555,11 @@ ip_set_fini(void)
{
nf_unregister_sockopt(&so_set);
nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
-
UNREGISTER_PERNET_SUBSYS(&ip_set_net_ops);
+
+ /* Wait for call_rcu() in destroy */
+ rcu_barrier();
+
pr_debug("these are the famous last words\n");
}