From 4c70c227fdbb4ec302286bb7ade2ec4051b4d508 Mon Sep 17 00:00:00 2001 From: Vishwanath Pai Date: Wed, 16 Mar 2016 09:03:30 +0100 Subject: netfilter: ipset: fix race condition in ipset save, swap and delete This fix adds a new reference counter (ref_netlink) for the struct ip_set. The other reference counter (ref) can be swapped out by ip_set_swap and we need a separate counter to keep track of references for netlink events like dump. Using the same ref counter for dump causes a race condition which can be demonstrated by the following script: ipset create hash_ip1 hash:ip family inet hashsize 1024 maxelem 500000 \ counters ipset create hash_ip2 hash:ip family inet hashsize 300000 maxelem 500000 \ counters ipset create hash_ip3 hash:ip family inet hashsize 1024 maxelem 500000 \ counters ipset save & ipset swap hash_ip3 hash_ip2 ipset destroy hash_ip3 /* will crash the machine */ Swap will exchange the values of ref so destroy will see ref = 0 instead of ref = 1. With this fix in place swap will not succeed because ipset save still has ref_netlink on the set (ip_set_swap doesn't swap ref_netlink). Both delete and swap will error out if ref_netlink != 0 on the set. Note: The changes to *_head functions is because previously we would increment ref whenever we called these functions, we don't do that anymore. Reviewed-by: Joshua Hunt Signed-off-by: Vishwanath Pai Signed-off-by: Jozsef Kadlecsik --- kernel/net/netfilter/ipset/ip_set_bitmap_gen.h | 2 +- kernel/net/netfilter/ipset/ip_set_core.c | 33 ++++++++++++++++++++++---- kernel/net/netfilter/ipset/ip_set_hash_gen.h | 2 +- kernel/net/netfilter/ipset/ip_set_list_set.c | 2 +- 4 files changed, 31 insertions(+), 8 deletions(-) (limited to 'kernel/net') diff --git a/kernel/net/netfilter/ipset/ip_set_bitmap_gen.h b/kernel/net/netfilter/ipset/ip_set_bitmap_gen.h index 0c1ba2e..211dc66 100644 --- a/kernel/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/kernel/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -105,7 +105,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) if (!nested) goto nla_put_failure; if (mtype_do_head(skb, map) || - nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || + nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) || nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) || nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(set->elements))) goto nla_put_failure; diff --git a/kernel/net/netfilter/ipset/ip_set_core.c b/kernel/net/netfilter/ipset/ip_set_core.c index d34dfef..0be8846 100644 --- a/kernel/net/netfilter/ipset/ip_set_core.c +++ b/kernel/net/netfilter/ipset/ip_set_core.c @@ -502,6 +502,26 @@ __ip_set_put(struct ip_set *set) write_unlock_bh(&ip_set_ref_lock); } +/* set->ref can be swapped out by ip_set_swap, netlink events (like dump) need + * a separate reference counter + */ +static inline void +__ip_set_get_netlink(struct ip_set *set) +{ + write_lock_bh(&ip_set_ref_lock); + set->ref_netlink++; + write_unlock_bh(&ip_set_ref_lock); +} + +static inline void +__ip_set_put_netlink(struct ip_set *set) +{ + write_lock_bh(&ip_set_ref_lock); + BUG_ON(set->ref_netlink == 0); + set->ref_netlink--; + write_unlock_bh(&ip_set_ref_lock); +} + /* Add, del and test set entries from kernel. * * The set behind the index must exist and must be referenced @@ -1014,7 +1034,7 @@ IPSET_CBFN(ip_set_destroy, struct net *net, struct sock *ctnl, if (!attr[IPSET_ATTR_SETNAME]) { for (i = 0; i < inst->ip_set_max; i++) { s = ip_set(inst, i); - if (s && s->ref) { + if (s && (s->ref || s->ref_netlink)) { ret = -IPSET_ERR_BUSY; goto out; } @@ -1036,7 +1056,7 @@ IPSET_CBFN(ip_set_destroy, struct net *net, struct sock *ctnl, if (!s) { ret = -ENOENT; goto out; - } else if (s->ref) { + } else if (s->ref || s->ref_netlink) { ret = -IPSET_ERR_BUSY; goto out; } @@ -1186,6 +1206,9 @@ IPSET_CBFN(ip_set_swap, struct net *net, struct sock *ctnl, from->family == to->family)) return -IPSET_ERR_TYPE_MISMATCH; + if (from->ref_netlink || to->ref_netlink) + return -EBUSY; + strncpy(from_name, from->name, IPSET_MAXNAMELEN); strncpy(from->name, to->name, IPSET_MAXNAMELEN); strncpy(to->name, from_name, IPSET_MAXNAMELEN); @@ -1221,7 +1244,7 @@ ip_set_dump_done(struct netlink_callback *cb) if (set->variant->uref) set->variant->uref(set, cb, false); pr_debug("release set %s\n", set->name); - __ip_set_put_byindex(inst, index); + __ip_set_put_netlink(set); } return 0; } @@ -1343,7 +1366,7 @@ dump_last: if (!cb->args[IPSET_CB_ARG0]) { /* Start listing: make sure set won't be destroyed */ pr_debug("reference set\n"); - set->ref++; + set->ref_netlink++; } write_unlock_bh(&ip_set_ref_lock); nlh = start_msg(skb, NETLINK_PORTID(cb->skb), @@ -1411,7 +1434,7 @@ release_refcount: if (set->variant->uref) set->variant->uref(set, cb, false); pr_debug("release set %s\n", set->name); - __ip_set_put_byindex(inst, index); + __ip_set_put_netlink(set); cb->args[IPSET_CB_ARG0] = 0; } out: diff --git a/kernel/net/netfilter/ipset/ip_set_hash_gen.h b/kernel/net/netfilter/ipset/ip_set_hash_gen.h index 75c6d22..9128a22 100644 --- a/kernel/net/netfilter/ipset/ip_set_hash_gen.h +++ b/kernel/net/netfilter/ipset/ip_set_hash_gen.h @@ -1075,7 +1075,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) if (nla_put_u32(skb, IPSET_ATTR_MARKMASK, h->markmask)) goto nla_put_failure; #endif - if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || + if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) || nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) || nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(set->elements))) goto nla_put_failure; diff --git a/kernel/net/netfilter/ipset/ip_set_list_set.c b/kernel/net/netfilter/ipset/ip_set_list_set.c index 4fb9186..dede343 100644 --- a/kernel/net/netfilter/ipset/ip_set_list_set.c +++ b/kernel/net/netfilter/ipset/ip_set_list_set.c @@ -474,7 +474,7 @@ list_set_head(struct ip_set *set, struct sk_buff *skb) if (!nested) goto nla_put_failure; if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) || - nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || + nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) || nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) || nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(set->elements))) goto nla_put_failure; -- cgit v1.2.3