diff options
Diffstat (limited to 'kernel/net/netfilter/ipset/ip_set_core.c')
-rw-r--r-- | kernel/net/netfilter/ipset/ip_set_core.c | 452 |
1 files changed, 254 insertions, 198 deletions
diff --git a/kernel/net/netfilter/ipset/ip_set_core.c b/kernel/net/netfilter/ipset/ip_set_core.c index 4d8e8b1..c31dbc3 100644 --- a/kernel/net/netfilter/ipset/ip_set_core.c +++ b/kernel/net/netfilter/ipset/ip_set_core.c @@ -21,6 +21,7 @@ #include <linux/netfilter/x_tables.h> #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/ipset/ip_set.h> +#include <linux/netfilter/ipset/ip_set_compiler.h> static LIST_HEAD(ip_set_type_list); /* all registered set types */ static DEFINE_MUTEX(ip_set_type_mutex); /* protects ip_set_type_list */ @@ -29,7 +30,6 @@ static DEFINE_RWLOCK(ip_set_ref_lock); /* protects the set refs */ struct ip_set_net { struct ip_set * __rcu *ip_set_list; /* all individual sets */ ip_set_id_t ip_set_max; /* max number of sets */ - bool is_deleted; /* deleted by ip_set_net_exit */ bool is_destroyed; /* all sets are destroyed */ }; @@ -61,6 +61,8 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET); ip_set_dereference((inst)->ip_set_list)[id] #define ip_set_ref_netlink(inst,id) \ rcu_dereference_raw((inst)->ip_set_list)[id] +#define ip_set_dereference_nfnl(p) \ + rcu_dereference_check(p, lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET)) /* The set types are implemented in modules and registered set types * can be found in ip_set_type_list. Adding/deleting types is @@ -86,7 +88,8 @@ find_set_type(const char *name, u8 family, u8 revision) { struct ip_set_type *type; - list_for_each_entry_rcu(type, &ip_set_type_list, list) + list_for_each_entry_rcu_compat(type, &ip_set_type_list, list, + lockdep_is_held(&ip_set_type_mutex)) if (STRNCMP(type->name, name) && (type->family == family || type->family == NFPROTO_UNSPEC) && @@ -249,22 +252,7 @@ EXPORT_SYMBOL_GPL(ip_set_type_unregister); void * ip_set_alloc(size_t size) { - void *members = NULL; - - if (size < KMALLOC_MAX_SIZE) - members = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); - - if (members) { - pr_debug("%p: allocated with kmalloc\n", members); - return members; - } - - members = vzalloc(size); - if (!members) - return NULL; - pr_debug("%p: allocated with vmalloc\n", members); - - return members; + return kvzalloc(size, GFP_KERNEL_ACCOUNT); } EXPORT_SYMBOL_GPL(ip_set_alloc); @@ -285,8 +273,7 @@ flag_nested(const struct nlattr *nla) static const struct nla_policy ipaddr_policy[IPSET_ATTR_IPADDR_MAX + 1] = { [IPSET_ATTR_IPADDR_IPV4] = { .type = NLA_U32 }, - [IPSET_ATTR_IPADDR_IPV6] = { .type = NLA_BINARY, - .len = sizeof(struct in6_addr) }, + [IPSET_ATTR_IPADDR_IPV6] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)), }; int @@ -368,7 +355,7 @@ ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment, c = kmalloc(sizeof(*c) + len + 1, GFP_ATOMIC); if (unlikely(!c)) return; - strlcpy(c->str, ext->comment, len + 1); + strscpy(c->str, ext->comment, len + 1); set->ext_size += sizeof(*c) + strlen(c->str) + 1; rcu_assign_pointer(comment->c, c); } @@ -459,6 +446,8 @@ ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len, for (id = 0; id < IPSET_EXT_ID_MAX; id++) { if (!add_extension(id, cadt_flags, tb)) continue; + if (align < ip_set_extensions[id].align) + align = ip_set_extensions[id].align; len = ALIGN(len, ip_set_extensions[id].align); set->offset[id] = len; set->extensions |= ip_set_extensions[id].type; @@ -649,13 +638,14 @@ ip_set_match_extensions(struct ip_set *set, const struct ip_set_ext *ext, if (SET_WITH_COUNTER(set)) { struct ip_set_counter *counter = ext_counter(data, set); + ip_set_update_counter(counter, ext, flags); + if (flags & IPSET_FLAG_MATCH_COUNTERS && !(ip_set_match_counter(ip_set_get_packets(counter), mext->packets, mext->packets_op) && ip_set_match_counter(ip_set_get_bytes(counter), mext->bytes, mext->bytes_op))) return false; - ip_set_update_counter(counter, ext, flags); } if (SET_WITH_SKBINFO(set)) ip_set_get_skbinfo(ext_skbinfo(data, set), @@ -695,6 +685,14 @@ __ip_set_put(struct ip_set *set) * a separate reference counter */ static void +__ip_set_get_netlink(struct ip_set *set) +{ + write_lock_bh(&ip_set_ref_lock); + set->ref_netlink++; + write_unlock_bh(&ip_set_ref_lock); +} + +static void __ip_set_put_netlink(struct ip_set *set) { write_lock_bh(&ip_set_ref_lock); @@ -712,15 +710,24 @@ __ip_set_put_netlink(struct ip_set *set) static struct ip_set * ip_set_rcu_get(struct net *net, ip_set_id_t index) { - struct ip_set *set; struct ip_set_net *inst = ip_set_pernet(net); - rcu_read_lock(); - /* ip_set_list itself needs to be protected */ - set = rcu_dereference(inst->ip_set_list)[index]; - rcu_read_unlock(); + /* ip_set_list and the set pointer need to be protected */ + return ip_set_dereference_nfnl(inst->ip_set_list)[index]; +} - return set; +static inline void +ip_set_lock(struct ip_set *set) +{ + if (!set->variant->region_lock) + spin_lock_bh(&set->lock); +} + +static inline void +ip_set_unlock(struct ip_set *set) +{ + if (!set->variant->region_lock) + spin_unlock_bh(&set->lock); } int @@ -737,16 +744,14 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb, !(opt->family == set->family || set->family == NFPROTO_UNSPEC)) return 0; - rcu_read_lock_bh(); ret = set->variant->kadt(set, skb, par, IPSET_TEST, opt); - rcu_read_unlock_bh(); if (ret == -EAGAIN) { /* Type requests element to be completed */ pr_debug("element must be completed, ADD is triggered\n"); - spin_lock_bh(&set->lock); + ip_set_lock(set); set->variant->kadt(set, skb, par, IPSET_ADD, opt); - spin_unlock_bh(&set->lock); + ip_set_unlock(set); ret = 1; } else { /* --return-nomatch: invert matched element */ @@ -775,9 +780,9 @@ ip_set_add(ip_set_id_t index, const struct sk_buff *skb, !(opt->family == set->family || set->family == NFPROTO_UNSPEC)) return -IPSET_ERR_TYPE_MISMATCH; - spin_lock_bh(&set->lock); + ip_set_lock(set); ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt); - spin_unlock_bh(&set->lock); + ip_set_unlock(set); return ret; } @@ -797,9 +802,9 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb, !(opt->family == set->family || set->family == NFPROTO_UNSPEC)) return -IPSET_ERR_TYPE_MISMATCH; - spin_lock_bh(&set->lock); + ip_set_lock(set); ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt); - spin_unlock_bh(&set->lock); + ip_set_unlock(set); return ret; } @@ -872,7 +877,7 @@ ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name) BUG_ON(!set); read_lock_bh(&ip_set_ref_lock); - strncpy(name, set->name, IPSET_MAXNAMELEN); + strscpy_pad(name, set->name, IPSET_MAXNAMELEN); read_unlock_bh(&ip_set_ref_lock); } EXPORT_SYMBOL_GPL(ip_set_name_byindex); @@ -920,11 +925,9 @@ ip_set_nfnl_put(struct net *net, ip_set_id_t index) struct ip_set_net *inst = ip_set_pernet(net); nfnl_lock(NFNL_SUBSYS_IPSET); - if (!inst->is_deleted) { /* already deleted from ip_set_net_exit() */ - set = ip_set(inst, index); - if (set) - __ip_set_put(set); - } + set = ip_set(inst, index); + if (set) + __ip_set_put(set); nfnl_unlock(NFNL_SUBSYS_IPSET); } EXPORT_SYMBOL_GPL(ip_set_nfnl_put); @@ -961,20 +964,9 @@ static struct nlmsghdr * start_msg(struct sk_buff *skb, u32 portid, u32 seq, unsigned int flags, enum ipset_cmd cmd) { - struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; - - nlh = nlmsg_put(skb, portid, seq, nfnl_msg_type(NFNL_SUBSYS_IPSET, cmd), - sizeof(*nfmsg), flags); - if (!nlh) - return NULL; - - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = NFPROTO_IPV4; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - - return nlh; + return nfnl_msg_put(skb, portid, seq, + nfnl_msg_type(NFNL_SUBSYS_IPSET, cmd), flags, + NFPROTO_IPV4, NFNETLINK_V0, 0); } /* Create a set */ @@ -1044,7 +1036,8 @@ static int IPSET_CBFN(ip_set_none, struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[], - struct netlink_ext_ack *extack) + struct netlink_ext_ack *extack, + const struct nfnl_info *info) { return -EOPNOTSUPP; } @@ -1053,16 +1046,17 @@ static int IPSET_CBFN(ip_set_create, struct net *n, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[], - struct netlink_ext_ack *extack) + struct netlink_ext_ack *extack, + const struct nfnl_info *info) { - struct net *net = IPSET_SOCK_NET(n, ctnl); + struct net *net = IPSET_SOCK_NET(n, ctnl, info); struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *set, *clash = NULL; ip_set_id_t index = IPSET_INVALID_ID; struct nlattr *tb[IPSET_ATTR_CREATE_MAX + 1] = {}; const char *name, *typename; u8 family, revision; - u32 flags = flag_exist(nlh); + u32 flags = flag_exist(INFO_NLH(info, nlh)); int ret = 0; if (unlikely(protocol_min_failed(attr) || @@ -1088,7 +1082,7 @@ IPSET_CBFN(ip_set_create, struct net *n, struct sock *ctnl, if (!set) return -ENOMEM; spin_lock_init(&set->lock); - strlcpy(set->name, name, IPSET_MAXNAMELEN); + strscpy(set->name, name, IPSET_MAXNAMELEN); set->family = family; set->revision = revision; @@ -1110,8 +1104,10 @@ IPSET_CBFN(ip_set_create, struct net *n, struct sock *ctnl, ret = -IPSET_ERR_PROTOCOL; goto put_out; } + /* Set create flags depending on the type revision */ + set->flags |= set->type->create_flags[revision]; - ret = set->type->create(net, set, tb, flags); + ret = set->type->create(INFO_NET(info, net), set, tb, flags); if (ret != 0) goto put_out; @@ -1165,6 +1161,7 @@ IPSET_CBFN(ip_set_create, struct net *n, struct sock *ctnl, return ret; cleanup: + set->variant->cancel_gc(set); set->variant->destroy(set); put_out: module_put(set->type->me); @@ -1182,24 +1179,58 @@ ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = { .len = IPSET_MAXNAMELEN - 1 }, }; +/* Destroying a set is split into two stages when a DESTROY command issued: + * - Cancel garbage collectors and decrement the module reference counter: + * - Cancelling may wait and we are allowed to do it at this stage. + * - Module remove is protected by rcu_barrier() which waits for + * the second stage to be finished. + * - In order to prevent the race between kernel side add/del/test element + * operations and destroy, the destroying of the set data areas are + * performed via a call_rcu() call. + */ + +/* Call set variant specific destroy function and reclaim the set data. */ static void -ip_set_destroy_set(struct ip_set *set) +ip_set_destroy_set_variant(struct ip_set *set) { - pr_debug("set: %s\n", set->name); - /* Must call it without holding any lock */ set->variant->destroy(set); - module_put(set->type->me); kfree(set); } +static void +ip_set_destroy_set_variant_rcu(struct rcu_head *head) +{ + struct ip_set *set = container_of(head, struct ip_set, rcu); + + ip_set_destroy_set_variant(set); +} + +/* Cancel the garbage collectors and decrement module references */ +static void +ip_set_destroy_cancel_gc(struct ip_set *set) +{ + set->variant->cancel_gc(set); + module_put(set->type->me); +} + +/* Use when we may wait for the complete destroy to be finished. + */ +static void +ip_set_destroy_set(struct ip_set *set) +{ + ip_set_destroy_cancel_gc(set); + ip_set_destroy_set_variant(set); +} + static int IPSET_CBFN(ip_set_destroy, struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[], - struct netlink_ext_ack *extack) + struct netlink_ext_ack *extack, + const struct nfnl_info *info) { - struct ip_set_net *inst = ip_set_pernet(IPSET_SOCK_NET(net, ctnl)); + struct ip_set_net *inst = ip_set_pernet(IPSET_SOCK_NET(net, ctnl, info)); struct ip_set *s; ip_set_id_t i; int ret = 0; @@ -1207,9 +1238,6 @@ IPSET_CBFN(ip_set_destroy, struct net *net, struct sock *ctnl, if (unlikely(protocol_min_failed(attr))) return -IPSET_ERR_PROTOCOL; - /* Must wait for flush to be really finished in list:set */ - rcu_barrier(); - /* Commands are serialized and references are * protected by the ip_set_ref_lock. * External systems (i.e. xt_set) must call @@ -1220,8 +1248,10 @@ IPSET_CBFN(ip_set_destroy, struct net *net, struct sock *ctnl, * counter, so if it's already zero, we can proceed * without holding the lock. */ - read_lock_bh(&ip_set_ref_lock); if (!attr[IPSET_ATTR_SETNAME]) { + /* Must wait for flush to be really finished in list:set */ + rcu_barrier(); + read_lock_bh(&ip_set_ref_lock); for (i = 0; i < inst->ip_set_max; i++) { s = ip_set(inst, i); if (s && (s->ref || s->ref_netlink)) { @@ -1241,19 +1271,30 @@ IPSET_CBFN(ip_set_destroy, struct net *net, struct sock *ctnl, /* Modified by ip_set_destroy() only, which is serialized */ inst->is_destroyed = false; } else { + u32 flags = flag_exist(INFO_NLH(info, nlh)); + u16 features = 0; + + read_lock_bh(&ip_set_ref_lock); s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), &i); if (!s) { - ret = -ENOENT; + if (!(flags & IPSET_FLAG_EXIST)) + ret = -ENOENT; goto out; } else if (s->ref || s->ref_netlink) { ret = -IPSET_ERR_BUSY; goto out; } + features = s->type->features; ip_set(inst, i) = NULL; read_unlock_bh(&ip_set_ref_lock); - - ip_set_destroy_set(s); + if (features & IPSET_TYPE_NAME) { + /* Must wait for flush to be really finished */ + rcu_barrier(); + } + /* Must cancel garbage collectors */ + ip_set_destroy_cancel_gc(s); + call_rcu(&s->rcu, ip_set_destroy_set_variant_rcu); } return 0; out: @@ -1268,18 +1309,19 @@ ip_set_flush_set(struct ip_set *set) { pr_debug("set: %s\n", set->name); - spin_lock_bh(&set->lock); + ip_set_lock(set); set->variant->flush(set); - spin_unlock_bh(&set->lock); + ip_set_unlock(set); } static int IPSET_CBFN(ip_set_flush, struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[], - struct netlink_ext_ack *extack) + struct netlink_ext_ack *extack, + const struct nfnl_info *info) { - struct ip_set_net *inst = ip_set_pernet(IPSET_SOCK_NET(net, ctnl)); + struct ip_set_net *inst = ip_set_pernet(IPSET_SOCK_NET(net, ctnl, info)); struct ip_set *s; ip_set_id_t i; @@ -1318,9 +1360,10 @@ static int IPSET_CBFN(ip_set_rename, struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[], - struct netlink_ext_ack *extack) + struct netlink_ext_ack *extack, + const struct nfnl_info *info) { - struct ip_set_net *inst = ip_set_pernet(IPSET_SOCK_NET(net, ctnl)); + struct ip_set_net *inst = ip_set_pernet(IPSET_SOCK_NET(net, ctnl, info)); struct ip_set *set, *s; const char *name2; ip_set_id_t i; @@ -1349,7 +1392,7 @@ IPSET_CBFN(ip_set_rename, struct net *net, struct sock *ctnl, goto out; } } - strncpy(set->name, name2, IPSET_MAXNAMELEN); + strscpy_pad(set->name, name2, IPSET_MAXNAMELEN); out: write_unlock_bh(&ip_set_ref_lock); @@ -1369,9 +1412,10 @@ static int IPSET_CBFN(ip_set_swap, struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[], - struct netlink_ext_ack *extack) + struct netlink_ext_ack *extack, + const struct nfnl_info *info) { - struct ip_set_net *inst = ip_set_pernet(IPSET_SOCK_NET(net, ctnl)); + struct ip_set_net *inst = ip_set_pernet(IPSET_SOCK_NET(net, ctnl, info)); struct ip_set *from, *to; ip_set_id_t from_id, to_id; char from_name[IPSET_MAXNAMELEN]; @@ -1406,9 +1450,9 @@ IPSET_CBFN(ip_set_swap, struct net *net, struct sock *ctnl, return -EBUSY; } - strncpy(from_name, from->name, IPSET_MAXNAMELEN); - strncpy(from->name, to->name, IPSET_MAXNAMELEN); - strncpy(to->name, from_name, IPSET_MAXNAMELEN); + strscpy_pad(from_name, from->name, IPSET_MAXNAMELEN); + strscpy_pad(from->name, to->name, IPSET_MAXNAMELEN); + strscpy_pad(to->name, from_name, IPSET_MAXNAMELEN); swap(from->ref, to->ref); ip_set(inst, from_id) = to; @@ -1490,31 +1534,40 @@ ip_set_dump_policy[IPSET_ATTR_CMD_MAX + 1] = { }; static int -dump_init(struct netlink_callback *cb, struct ip_set_net *inst) +ip_set_dump_start(struct netlink_callback *cb) { struct nlmsghdr *nlh = nlmsg_hdr(cb->skb); int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); struct nlattr *cda[IPSET_ATTR_CMD_MAX + 1]; struct nlattr *attr = (void *)nlh + min_len; + struct sk_buff *skb = cb->skb; + struct ip_set_net *inst = ip_set_pernet(sock_net(skb->sk)); u32 dump_type; - ip_set_id_t index; int ret; ret = NLA_PARSE(cda, IPSET_ATTR_CMD_MAX, attr, nlh->nlmsg_len - min_len, ip_set_dump_policy, NULL); if (ret) - return ret; + goto error; cb->args[IPSET_CB_PROTO] = nla_get_u8(cda[IPSET_ATTR_PROTOCOL]); if (cda[IPSET_ATTR_SETNAME]) { + ip_set_id_t index; struct ip_set *set; +#if HAVE_NETLINK_DUMP_START_ARGS != 4 + read_lock_bh(&ip_set_ref_lock); +#endif set = find_set_and_id(inst, nla_data(cda[IPSET_ATTR_SETNAME]), &index); - if (!set) - return -ENOENT; - +#if HAVE_NETLINK_DUMP_START_ARGS != 4 + read_unlock_bh(&ip_set_ref_lock); +#endif + if (!set) { + ret = -ENOENT; + goto error; + } dump_type = DUMP_ONE; cb->args[IPSET_CB_INDEX] = index; } else { @@ -1530,10 +1583,17 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst) cb->args[IPSET_CB_DUMP] = dump_type; return 0; + +error: + /* We have to create and send the error message manually :-( */ + if (nlh->nlmsg_flags & NLM_F_ACK) { + NETLINK_ACK(cb->skb, nlh, ret, NULL); + } + return ret; } static int -ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) +ip_set_dump_do(struct sk_buff *skb, struct netlink_callback *cb) { ip_set_id_t index = IPSET_INVALID_ID, max; struct ip_set *set = NULL; @@ -1545,16 +1605,14 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) int ret = 0; if (!cb->args[IPSET_CB_DUMP]) { - ret = dump_init(cb, inst); +#if HAVE_NETLINK_DUMP_START_ARGS == 4 + return -EINVAL; +#else + ret = ip_set_dump_start(cb); if (ret < 0) { - nlh = nlmsg_hdr(cb->skb); - /* We have to create and send the error message - * manually :-( - */ - if (nlh->nlmsg_flags & NLM_F_ACK) - NETLINK_ACK(cb->skb, nlh, ret, NULL); return ret; } +#endif } if (cb->args[IPSET_CB_INDEX] >= inst->ip_set_max) @@ -1634,7 +1692,7 @@ dump_last: goto next_set; if (set->variant->uref) set->variant->uref(set, cb, true); - /* fall through */ + fallthrough; default: ret = set->variant->list(set, skb, cb); if (!cb->args[IPSET_CB_ARG0]) @@ -1685,26 +1743,30 @@ static int IPSET_CBFN(ip_set_dump, struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[], - struct netlink_ext_ack *extack) + struct netlink_ext_ack *extack, + const struct nfnl_info *info) { if (unlikely(protocol_min_failed(attr))) return -IPSET_ERR_PROTOCOL; #if HAVE_NETLINK_DUMP_START_ARGS == 5 return netlink_dump_start(ctnl, skb, nlh, - ip_set_dump_start, + ip_set_dump_do, ip_set_dump_done); #elif HAVE_NETLINK_DUMP_START_ARGS == 6 return netlink_dump_start(ctnl, skb, nlh, - ip_set_dump_start, + ip_set_dump_do, ip_set_dump_done, 0); #else { struct netlink_dump_control c = { - .dump = ip_set_dump_start, +#if HAVE_NETLINK_DUMP_START_ARGS == 4 + .start = ip_set_dump_start, +#endif + .dump = ip_set_dump_do, .done = ip_set_dump_done, }; - return netlink_dump_start(ctnl, skb, nlh, &c); + return netlink_dump_start(INFO_SK(info, ctnl), skb, INFO_NLH(info, nlh), &c); } #endif } @@ -1721,8 +1783,8 @@ static const struct nla_policy ip_set_adt_policy[IPSET_ATTR_CMD_MAX + 1] = { }; static int -call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, - struct nlattr *tb[], enum ipset_adt adt, +CALL_AD(struct net *net, struct sock *ctnl, struct sk_buff *skb, + struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 flags, bool use_lineno) { int ret; @@ -1730,13 +1792,22 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, bool eexist = flags & IPSET_FLAG_EXIST, retried = false; do { - spin_lock_bh(&set->lock); + if (retried) { + __ip_set_get_netlink(set); + nfnl_unlock(NFNL_SUBSYS_IPSET); + cond_resched(); + nfnl_lock(NFNL_SUBSYS_IPSET); + __ip_set_put_netlink(set); + } + + ip_set_lock(set); ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried); - spin_unlock_bh(&set->lock); + ip_set_unlock(set); retried = true; - } while (ret == -EAGAIN && - set->variant->resize && - (ret = set->variant->resize(set, retried)) == 0); + } while (ret == -ERANGE || + (ret == -EAGAIN && + set->variant->resize && + (ret = set->variant->resize(set, retried)) == 0)); if (!ret || (ret == -IPSET_ERR_EXIST && eexist)) return 0; @@ -1755,11 +1826,12 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, skb2 = nlmsg_new(payload, GFP_KERNEL); if (!skb2) return -ENOMEM; - rep = __nlmsg_put(skb2, NETLINK_PORTID(skb), - nlh->nlmsg_seq, NLMSG_ERROR, payload, 0); + rep = nlmsg_put(skb2, NETLINK_PORTID(skb), + nlh->nlmsg_seq, NLMSG_ERROR, payload, 0); errmsg = nlmsg_data(rep); errmsg->error = ret; - memcpy(&errmsg->msg, nlh, nlh->nlmsg_len); + unsafe_memcpy(&errmsg->msg, nlh, nlh->nlmsg_len, + /* Bounds checked by the skb layer. */); cmdattr = (void *)&errmsg->msg + min_len; ret = NLA_PARSE(cda, IPSET_ATTR_CMD_MAX, cmdattr, @@ -1774,8 +1846,7 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, *errline = lineno; - netlink_unicast(ctnl, skb2, NETLINK_PORTID(skb), - MSG_DONTWAIT); + NFNETLINK_UNICAST(ctnl, skb2, net, NETLINK_PORTID(skb)); /* Signal netlink not to send its ACK/errmsg. */ return -EINTR; } @@ -1783,19 +1854,18 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, return ret; } -static int -IPSET_CBFN_AD(ip_set_ad, struct net *net, struct sock *ctnl, - struct sk_buff *skb, - enum ipset_adt adt, - const struct nlmsghdr *nlh, - const struct nlattr * const attr[], - struct netlink_ext_ack *extack) -{ - struct ip_set_net *inst = ip_set_pernet(IPSET_SOCK_NET(net, ctnl)); +static int IPSET_CBFN_AD(ip_set_ad, struct net *net, struct sock *ctnl, + struct sk_buff *skb, + enum ipset_adt adt, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[], + struct netlink_ext_ack *extack, const struct nfnl_info *info) +{ + struct ip_set_net *inst = ip_set_pernet(IPSET_SOCK_NET(net, ctnl, info)); struct ip_set *set; struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {}; const struct nlattr *nla; - u32 flags = flag_exist(nlh); + u32 flags = flag_exist(INFO_NLH(info, nlh)); bool use_lineno; int ret = 0; @@ -1820,7 +1890,7 @@ IPSET_CBFN_AD(ip_set_ad, struct net *net, struct sock *ctnl, attr[IPSET_ATTR_DATA], set->type->adt_policy, NULL)) return -IPSET_ERR_PROTOCOL; - ret = call_ad(ctnl, skb, set, tb, adt, flags, + ret = CALL_AD(net, ctnl, skb, set, tb, adt, flags, use_lineno); } else { int nla_rem; @@ -1831,7 +1901,7 @@ IPSET_CBFN_AD(ip_set_ad, struct net *net, struct sock *ctnl, NLA_PARSE_NESTED(tb, IPSET_ATTR_ADT_MAX, nla, set->type->adt_policy, NULL)) return -IPSET_ERR_PROTOCOL; - ret = call_ad(ctnl, skb, set, tb, adt, + ret = CALL_AD(net, ctnl, skb, set, tb, adt, flags, use_lineno); if (ret < 0) return ret; @@ -1844,20 +1914,22 @@ static int IPSET_CBFN(ip_set_uadd, struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[], - struct netlink_ext_ack *extack) + struct netlink_ext_ack *extack, + const struct nfnl_info *info) { - return IPSET_CBFN_AD(ip_set_ad, net, ctnl, skb, - IPSET_ADD, nlh, attr, extack); + return IPSET_CBFN_AD(ip_set_ad, INFO_NET(info, net), INFO_SK(info, ctnl), skb, + IPSET_ADD, INFO_NLH(info, nlh), attr, extack, info); } static int IPSET_CBFN(ip_set_udel, struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[], - struct netlink_ext_ack *extack) + struct netlink_ext_ack *extack, + const struct nfnl_info *info) { - return IPSET_CBFN_AD(ip_set_ad, net, ctnl, skb, - IPSET_DEL, nlh, attr, extack); + return IPSET_CBFN_AD(ip_set_ad, INFO_NET(info, net), INFO_SK(info, ctnl), skb, + IPSET_DEL, INFO_NLH(info, nlh), attr, extack, info); } static int @@ -1865,12 +1937,14 @@ IPSET_CBFN(ip_set_utest, struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[], - struct netlink_ext_ack *extack) + struct netlink_ext_ack *extack, + const struct nfnl_info *info) { - struct ip_set_net *inst = ip_set_pernet(IPSET_SOCK_NET(net, ctnl)); + struct ip_set_net *inst = ip_set_pernet(IPSET_SOCK_NET(net, ctnl, info)); struct ip_set *set; struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {}; int ret = 0; + u32 lineno; if (unlikely(protocol_min_failed(attr) || !attr[IPSET_ATTR_SETNAME] || @@ -1887,7 +1961,7 @@ IPSET_CBFN(ip_set_utest, struct net *net, struct sock *ctnl, return -IPSET_ERR_PROTOCOL; rcu_read_lock_bh(); - ret = set->variant->uadt(set, tb, IPSET_TEST, NULL, 0, 0); + ret = set->variant->uadt(set, tb, IPSET_TEST, &lineno, 0, 0); rcu_read_unlock_bh(); /* Userspace can't trigger element to be re-added */ if (ret == -EAGAIN) @@ -1902,13 +1976,13 @@ static int IPSET_CBFN(ip_set_header, struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[], - struct netlink_ext_ack *extack) + struct netlink_ext_ack *extack, + const struct nfnl_info *info) { - struct ip_set_net *inst = ip_set_pernet(IPSET_SOCK_NET(net, ctnl)); + struct ip_set_net *inst = ip_set_pernet(IPSET_SOCK_NET(net, ctnl, info)); const struct ip_set *set; struct sk_buff *skb2; struct nlmsghdr *nlh2; - int ret = 0; if (unlikely(protocol_min_failed(attr) || !attr[IPSET_ATTR_SETNAME])) @@ -1922,7 +1996,7 @@ IPSET_CBFN(ip_set_header, struct net *net, struct sock *ctnl, if (!skb2) return -ENOMEM; - nlh2 = start_msg(skb2, NETLINK_PORTID(skb), nlh->nlmsg_seq, 0, + nlh2 = start_msg(skb2, NETLINK_PORTID(skb), INFO_NLH(info, nlh)->nlmsg_seq, 0, IPSET_CMD_HEADER); if (!nlh2) goto nlmsg_failure; @@ -1934,11 +2008,7 @@ IPSET_CBFN(ip_set_header, struct net *net, struct sock *ctnl, goto nla_put_failure; nlmsg_end(skb2, nlh2); - ret = netlink_unicast(ctnl, skb2, NETLINK_PORTID(skb), MSG_DONTWAIT); - if (ret < 0) - return ret; - - return 0; + return NFNETLINK_UNICAST(INFO_SK(info, ctnl), skb2, INFO_NET(info, net), NETLINK_PORTID(skb)); nla_put_failure: nlmsg_cancel(skb2, nlh2); @@ -1960,7 +2030,8 @@ static int IPSET_CBFN(ip_set_type, struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[], - struct netlink_ext_ack *extack) + struct netlink_ext_ack *extack, + const struct nfnl_info *info) { struct sk_buff *skb2; struct nlmsghdr *nlh2; @@ -1983,7 +2054,7 @@ IPSET_CBFN(ip_set_type, struct net *net, struct sock *ctnl, if (!skb2) return -ENOMEM; - nlh2 = start_msg(skb2, NETLINK_PORTID(skb), nlh->nlmsg_seq, 0, + nlh2 = start_msg(skb2, NETLINK_PORTID(skb), INFO_NLH(info, nlh)->nlmsg_seq, 0, IPSET_CMD_TYPE); if (!nlh2) goto nlmsg_failure; @@ -1996,11 +2067,7 @@ IPSET_CBFN(ip_set_type, struct net *net, struct sock *ctnl, nlmsg_end(skb2, nlh2); pr_debug("Send TYPE, nlmsg_len: %u\n", nlh2->nlmsg_len); - ret = netlink_unicast(ctnl, skb2, NETLINK_PORTID(skb), MSG_DONTWAIT); - if (ret < 0) - return ret; - - return 0; + return NFNETLINK_UNICAST(INFO_SK(info, ctnl), skb2, INFO_NET(info, net), NETLINK_PORTID(skb)); nla_put_failure: nlmsg_cancel(skb2, nlh2); @@ -2020,11 +2087,11 @@ static int IPSET_CBFN(ip_set_protocol, struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[], - struct netlink_ext_ack *extack) + struct netlink_ext_ack *extack, + const struct nfnl_info *info) { struct sk_buff *skb2; struct nlmsghdr *nlh2; - int ret = 0; if (unlikely(!attr[IPSET_ATTR_PROTOCOL])) return -IPSET_ERR_PROTOCOL; @@ -2033,7 +2100,7 @@ IPSET_CBFN(ip_set_protocol, struct net *net, struct sock *ctnl, if (!skb2) return -ENOMEM; - nlh2 = start_msg(skb2, NETLINK_PORTID(skb), nlh->nlmsg_seq, 0, + nlh2 = start_msg(skb2, NETLINK_PORTID(skb), INFO_NLH(info, nlh)->nlmsg_seq, 0, IPSET_CMD_PROTOCOL); if (!nlh2) goto nlmsg_failure; @@ -2043,11 +2110,7 @@ IPSET_CBFN(ip_set_protocol, struct net *net, struct sock *ctnl, goto nla_put_failure; nlmsg_end(skb2, nlh2); - ret = netlink_unicast(ctnl, skb2, NETLINK_PORTID(skb), MSG_DONTWAIT); - if (ret < 0) - return ret; - - return 0; + return NFNETLINK_UNICAST(INFO_SK(info, ctnl), skb2, INFO_NET(info, net), NETLINK_PORTID(skb)); nla_put_failure: nlmsg_cancel(skb2, nlh2); @@ -2062,14 +2125,14 @@ static int IPSET_CBFN(ip_set_byname, struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[], - struct netlink_ext_ack *extack) + struct netlink_ext_ack *extack, + const struct nfnl_info *info) { - struct ip_set_net *inst = ip_set_pernet(IPSET_SOCK_NET(net, ctnl)); + struct ip_set_net *inst = ip_set_pernet(IPSET_SOCK_NET(net, ctnl, info)); struct sk_buff *skb2; struct nlmsghdr *nlh2; ip_set_id_t id = IPSET_INVALID_ID; const struct ip_set *set; - int ret = 0; if (unlikely(protocol_failed(attr) || !attr[IPSET_ATTR_SETNAME])) @@ -2083,7 +2146,7 @@ IPSET_CBFN(ip_set_byname, struct net *net, struct sock *ctnl, if (!skb2) return -ENOMEM; - nlh2 = start_msg(skb2, NETLINK_PORTID(skb), nlh->nlmsg_seq, 0, + nlh2 = start_msg(skb2, NETLINK_PORTID(skb), INFO_NLH(info, nlh)->nlmsg_seq, 0, IPSET_CMD_GET_BYNAME); if (!nlh2) goto nlmsg_failure; @@ -2093,11 +2156,7 @@ IPSET_CBFN(ip_set_byname, struct net *net, struct sock *ctnl, goto nla_put_failure; nlmsg_end(skb2, nlh2); - ret = netlink_unicast(ctnl, skb2, NETLINK_PORTID(skb), MSG_DONTWAIT); - if (ret < 0) - return ret; - - return 0; + return NFNETLINK_UNICAST(INFO_SK(info, ctnl), skb2, INFO_NET(info, net), NETLINK_PORTID(skb)); nla_put_failure: nlmsg_cancel(skb2, nlh2); @@ -2115,14 +2174,14 @@ static int IPSET_CBFN(ip_set_byindex, struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[], - struct netlink_ext_ack *extack) + struct netlink_ext_ack *extack, + const struct nfnl_info *info) { - struct ip_set_net *inst = ip_set_pernet(IPSET_SOCK_NET(net, ctnl)); + struct ip_set_net *inst = ip_set_pernet(IPSET_SOCK_NET(net, ctnl, info)); struct sk_buff *skb2; struct nlmsghdr *nlh2; ip_set_id_t id = IPSET_INVALID_ID; const struct ip_set *set; - int ret = 0; if (unlikely(protocol_failed(attr) || !attr[IPSET_ATTR_INDEX])) @@ -2139,7 +2198,7 @@ IPSET_CBFN(ip_set_byindex, struct net *net, struct sock *ctnl, if (!skb2) return -ENOMEM; - nlh2 = start_msg(skb2, NETLINK_PORTID(skb), nlh->nlmsg_seq, 0, + nlh2 = start_msg(skb2, NETLINK_PORTID(skb), INFO_NLH(info, nlh)->nlmsg_seq, 0, IPSET_CMD_GET_BYINDEX); if (!nlh2) goto nlmsg_failure; @@ -2148,11 +2207,7 @@ IPSET_CBFN(ip_set_byindex, struct net *net, struct sock *ctnl, goto nla_put_failure; nlmsg_end(skb2, nlh2); - ret = netlink_unicast(ctnl, skb2, NETLINK_PORTID(skb), MSG_DONTWAIT); - if (ret < 0) - return ret; - - return 0; + return NFNETLINK_UNICAST(INFO_SK(info, ctnl), skb2, INFO_NET(info, net), NETLINK_PORTID(skb)); nla_put_failure: nlmsg_cancel(skb2, nlh2); @@ -2164,80 +2219,96 @@ nlmsg_failure: static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = { [IPSET_CMD_NONE] = { .call = ip_set_none, + SET_NFNL_CALLBACK_TYPE(NFNL_CB_MUTEX) .attr_count = IPSET_ATTR_CMD_MAX, }, [IPSET_CMD_CREATE] = { .call = ip_set_create, + SET_NFNL_CALLBACK_TYPE(NFNL_CB_MUTEX) .attr_count = IPSET_ATTR_CMD_MAX, .policy = ip_set_create_policy, }, [IPSET_CMD_DESTROY] = { .call = ip_set_destroy, + SET_NFNL_CALLBACK_TYPE(NFNL_CB_MUTEX) .attr_count = IPSET_ATTR_CMD_MAX, .policy = ip_set_setname_policy, }, [IPSET_CMD_FLUSH] = { .call = ip_set_flush, + SET_NFNL_CALLBACK_TYPE(NFNL_CB_MUTEX) .attr_count = IPSET_ATTR_CMD_MAX, .policy = ip_set_setname_policy, }, [IPSET_CMD_RENAME] = { .call = ip_set_rename, + SET_NFNL_CALLBACK_TYPE(NFNL_CB_MUTEX) .attr_count = IPSET_ATTR_CMD_MAX, .policy = ip_set_setname2_policy, }, [IPSET_CMD_SWAP] = { .call = ip_set_swap, + SET_NFNL_CALLBACK_TYPE(NFNL_CB_MUTEX) .attr_count = IPSET_ATTR_CMD_MAX, .policy = ip_set_setname2_policy, }, [IPSET_CMD_LIST] = { .call = ip_set_dump, + SET_NFNL_CALLBACK_TYPE(NFNL_CB_MUTEX) .attr_count = IPSET_ATTR_CMD_MAX, .policy = ip_set_dump_policy, }, [IPSET_CMD_SAVE] = { .call = ip_set_dump, + SET_NFNL_CALLBACK_TYPE(NFNL_CB_MUTEX) .attr_count = IPSET_ATTR_CMD_MAX, .policy = ip_set_setname_policy, }, [IPSET_CMD_ADD] = { .call = ip_set_uadd, + SET_NFNL_CALLBACK_TYPE(NFNL_CB_MUTEX) .attr_count = IPSET_ATTR_CMD_MAX, .policy = ip_set_adt_policy, }, [IPSET_CMD_DEL] = { .call = ip_set_udel, + SET_NFNL_CALLBACK_TYPE(NFNL_CB_MUTEX) .attr_count = IPSET_ATTR_CMD_MAX, .policy = ip_set_adt_policy, }, [IPSET_CMD_TEST] = { .call = ip_set_utest, + SET_NFNL_CALLBACK_TYPE(NFNL_CB_MUTEX) .attr_count = IPSET_ATTR_CMD_MAX, .policy = ip_set_adt_policy, }, [IPSET_CMD_HEADER] = { .call = ip_set_header, + SET_NFNL_CALLBACK_TYPE(NFNL_CB_MUTEX) .attr_count = IPSET_ATTR_CMD_MAX, .policy = ip_set_setname_policy, }, [IPSET_CMD_TYPE] = { .call = ip_set_type, + SET_NFNL_CALLBACK_TYPE(NFNL_CB_MUTEX) .attr_count = IPSET_ATTR_CMD_MAX, .policy = ip_set_type_policy, }, [IPSET_CMD_PROTOCOL] = { .call = ip_set_protocol, + SET_NFNL_CALLBACK_TYPE(NFNL_CB_MUTEX) .attr_count = IPSET_ATTR_CMD_MAX, .policy = ip_set_protocol_policy, }, [IPSET_CMD_GET_BYNAME] = { .call = ip_set_byname, + SET_NFNL_CALLBACK_TYPE(NFNL_CB_MUTEX) .attr_count = IPSET_ATTR_CMD_MAX, .policy = ip_set_setname_policy, }, [IPSET_CMD_GET_BYINDEX] = { .call = ip_set_byindex, + SET_NFNL_CALLBACK_TYPE(NFNL_CB_MUTEX) .attr_count = IPSET_ATTR_CMD_MAX, .policy = ip_set_index_policy, }, @@ -2410,7 +2481,6 @@ ip_set_net_init(struct net *net) #else goto err_alloc; #endif - inst->is_deleted = false; inst->is_destroyed = false; rcu_assign_pointer(inst->ip_set_list, list); return 0; @@ -2427,20 +2497,6 @@ ip_set_net_exit(struct net *net) { struct ip_set_net *inst = ip_set_pernet(net); - struct ip_set *set = NULL; - ip_set_id_t i; - - inst->is_deleted = true; /* flag for ip_set_nfnl_put */ - - nfnl_lock(NFNL_SUBSYS_IPSET); - for (i = 0; i < inst->ip_set_max; i++) { - set = ip_set(inst, i); - if (set) { - ip_set(inst, i) = NULL; - ip_set_destroy_set(set); - } - } - nfnl_unlock(NFNL_SUBSYS_IPSET); kvfree(rcu_dereference_protected(inst->ip_set_list, 1)); #ifndef HAVE_NET_OPS_ID kvfree(inst); @@ -2505,8 +2561,8 @@ ip_set_fini(void) { nf_unregister_sockopt(&so_set); nfnetlink_subsys_unregister(&ip_set_netlink_subsys); - UNREGISTER_PERNET_SUBSYS(&ip_set_net_ops); + pr_debug("these are the famous last words\n"); } |