summaryrefslogtreecommitdiffstats
path: root/kernel/net/netfilter/ipset/ip_set_core.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/net/netfilter/ipset/ip_set_core.c')
-rw-r--r--kernel/net/netfilter/ipset/ip_set_core.c147
1 files changed, 92 insertions, 55 deletions
diff --git a/kernel/net/netfilter/ipset/ip_set_core.c b/kernel/net/netfilter/ipset/ip_set_core.c
index 0fdafb7..c31dbc3 100644
--- a/kernel/net/netfilter/ipset/ip_set_core.c
+++ b/kernel/net/netfilter/ipset/ip_set_core.c
@@ -30,7 +30,6 @@ static DEFINE_RWLOCK(ip_set_ref_lock); /* protects the set refs */
struct ip_set_net {
struct ip_set * __rcu *ip_set_list; /* all individual sets */
ip_set_id_t ip_set_max; /* max number of sets */
- bool is_deleted; /* deleted by ip_set_net_exit */
bool is_destroyed; /* all sets are destroyed */
};
@@ -62,6 +61,8 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
ip_set_dereference((inst)->ip_set_list)[id]
#define ip_set_ref_netlink(inst,id) \
rcu_dereference_raw((inst)->ip_set_list)[id]
+#define ip_set_dereference_nfnl(p) \
+ rcu_dereference_check(p, lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET))
/* The set types are implemented in modules and registered set types
* can be found in ip_set_type_list. Adding/deleting types is
@@ -354,7 +355,7 @@ ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment,
c = kmalloc(sizeof(*c) + len + 1, GFP_ATOMIC);
if (unlikely(!c))
return;
- strlcpy(c->str, ext->comment, len + 1);
+ strscpy(c->str, ext->comment, len + 1);
set->ext_size += sizeof(*c) + strlen(c->str) + 1;
rcu_assign_pointer(comment->c, c);
}
@@ -684,6 +685,14 @@ __ip_set_put(struct ip_set *set)
* a separate reference counter
*/
static void
+__ip_set_get_netlink(struct ip_set *set)
+{
+ write_lock_bh(&ip_set_ref_lock);
+ set->ref_netlink++;
+ write_unlock_bh(&ip_set_ref_lock);
+}
+
+static void
__ip_set_put_netlink(struct ip_set *set)
{
write_lock_bh(&ip_set_ref_lock);
@@ -701,15 +710,10 @@ __ip_set_put_netlink(struct ip_set *set)
static struct ip_set *
ip_set_rcu_get(struct net *net, ip_set_id_t index)
{
- struct ip_set *set;
struct ip_set_net *inst = ip_set_pernet(net);
- rcu_read_lock();
- /* ip_set_list itself needs to be protected */
- set = rcu_dereference(inst->ip_set_list)[index];
- rcu_read_unlock();
-
- return set;
+ /* ip_set_list and the set pointer need to be protected */
+ return ip_set_dereference_nfnl(inst->ip_set_list)[index];
}
static inline void
@@ -740,9 +744,7 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
!(opt->family == set->family || set->family == NFPROTO_UNSPEC))
return 0;
- rcu_read_lock_bh();
ret = set->variant->kadt(set, skb, par, IPSET_TEST, opt);
- rcu_read_unlock_bh();
if (ret == -EAGAIN) {
/* Type requests element to be completed */
@@ -875,7 +877,7 @@ ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name)
BUG_ON(!set);
read_lock_bh(&ip_set_ref_lock);
- strncpy(name, set->name, IPSET_MAXNAMELEN);
+ strscpy_pad(name, set->name, IPSET_MAXNAMELEN);
read_unlock_bh(&ip_set_ref_lock);
}
EXPORT_SYMBOL_GPL(ip_set_name_byindex);
@@ -923,11 +925,9 @@ ip_set_nfnl_put(struct net *net, ip_set_id_t index)
struct ip_set_net *inst = ip_set_pernet(net);
nfnl_lock(NFNL_SUBSYS_IPSET);
- if (!inst->is_deleted) { /* already deleted from ip_set_net_exit() */
- set = ip_set(inst, index);
- if (set)
- __ip_set_put(set);
- }
+ set = ip_set(inst, index);
+ if (set)
+ __ip_set_put(set);
nfnl_unlock(NFNL_SUBSYS_IPSET);
}
EXPORT_SYMBOL_GPL(ip_set_nfnl_put);
@@ -1082,7 +1082,7 @@ IPSET_CBFN(ip_set_create, struct net *n, struct sock *ctnl,
if (!set)
return -ENOMEM;
spin_lock_init(&set->lock);
- strlcpy(set->name, name, IPSET_MAXNAMELEN);
+ strscpy(set->name, name, IPSET_MAXNAMELEN);
set->family = family;
set->revision = revision;
@@ -1161,6 +1161,7 @@ IPSET_CBFN(ip_set_create, struct net *n, struct sock *ctnl,
return ret;
cleanup:
+ set->variant->cancel_gc(set);
set->variant->destroy(set);
put_out:
module_put(set->type->me);
@@ -1178,17 +1179,50 @@ ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = {
.len = IPSET_MAXNAMELEN - 1 },
};
+/* Destroying a set is split into two stages when a DESTROY command issued:
+ * - Cancel garbage collectors and decrement the module reference counter:
+ * - Cancelling may wait and we are allowed to do it at this stage.
+ * - Module remove is protected by rcu_barrier() which waits for
+ * the second stage to be finished.
+ * - In order to prevent the race between kernel side add/del/test element
+ * operations and destroy, the destroying of the set data areas are
+ * performed via a call_rcu() call.
+ */
+
+/* Call set variant specific destroy function and reclaim the set data. */
static void
-ip_set_destroy_set(struct ip_set *set)
+ip_set_destroy_set_variant(struct ip_set *set)
{
- pr_debug("set: %s\n", set->name);
-
/* Must call it without holding any lock */
set->variant->destroy(set);
- module_put(set->type->me);
kfree(set);
}
+static void
+ip_set_destroy_set_variant_rcu(struct rcu_head *head)
+{
+ struct ip_set *set = container_of(head, struct ip_set, rcu);
+
+ ip_set_destroy_set_variant(set);
+}
+
+/* Cancel the garbage collectors and decrement module references */
+static void
+ip_set_destroy_cancel_gc(struct ip_set *set)
+{
+ set->variant->cancel_gc(set);
+ module_put(set->type->me);
+}
+
+/* Use when we may wait for the complete destroy to be finished.
+ */
+static void
+ip_set_destroy_set(struct ip_set *set)
+{
+ ip_set_destroy_cancel_gc(set);
+ ip_set_destroy_set_variant(set);
+}
+
static int
IPSET_CBFN(ip_set_destroy, struct net *net, struct sock *ctnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
@@ -1204,9 +1238,6 @@ IPSET_CBFN(ip_set_destroy, struct net *net, struct sock *ctnl,
if (unlikely(protocol_min_failed(attr)))
return -IPSET_ERR_PROTOCOL;
- /* Must wait for flush to be really finished in list:set */
- rcu_barrier();
-
/* Commands are serialized and references are
* protected by the ip_set_ref_lock.
* External systems (i.e. xt_set) must call
@@ -1217,8 +1248,10 @@ IPSET_CBFN(ip_set_destroy, struct net *net, struct sock *ctnl,
* counter, so if it's already zero, we can proceed
* without holding the lock.
*/
- read_lock_bh(&ip_set_ref_lock);
if (!attr[IPSET_ATTR_SETNAME]) {
+ /* Must wait for flush to be really finished in list:set */
+ rcu_barrier();
+ read_lock_bh(&ip_set_ref_lock);
for (i = 0; i < inst->ip_set_max; i++) {
s = ip_set(inst, i);
if (s && (s->ref || s->ref_netlink)) {
@@ -1239,6 +1272,9 @@ IPSET_CBFN(ip_set_destroy, struct net *net, struct sock *ctnl,
inst->is_destroyed = false;
} else {
u32 flags = flag_exist(INFO_NLH(info, nlh));
+ u16 features = 0;
+
+ read_lock_bh(&ip_set_ref_lock);
s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
&i);
if (!s) {
@@ -1249,10 +1285,16 @@ IPSET_CBFN(ip_set_destroy, struct net *net, struct sock *ctnl,
ret = -IPSET_ERR_BUSY;
goto out;
}
+ features = s->type->features;
ip_set(inst, i) = NULL;
read_unlock_bh(&ip_set_ref_lock);
-
- ip_set_destroy_set(s);
+ if (features & IPSET_TYPE_NAME) {
+ /* Must wait for flush to be really finished */
+ rcu_barrier();
+ }
+ /* Must cancel garbage collectors */
+ ip_set_destroy_cancel_gc(s);
+ call_rcu(&s->rcu, ip_set_destroy_set_variant_rcu);
}
return 0;
out:
@@ -1350,11 +1392,11 @@ IPSET_CBFN(ip_set_rename, struct net *net, struct sock *ctnl,
goto out;
}
}
- ret = strscpy(set->name, name2, IPSET_MAXNAMELEN);
+ strscpy_pad(set->name, name2, IPSET_MAXNAMELEN);
out:
write_unlock_bh(&ip_set_ref_lock);
- return ret < 0 ? ret : 0;
+ return ret;
}
/* Swap two sets so that name/index points to the other.
@@ -1408,9 +1450,9 @@ IPSET_CBFN(ip_set_swap, struct net *net, struct sock *ctnl,
return -EBUSY;
}
- strncpy(from_name, from->name, IPSET_MAXNAMELEN);
- strncpy(from->name, to->name, IPSET_MAXNAMELEN);
- strncpy(to->name, from_name, IPSET_MAXNAMELEN);
+ strscpy_pad(from_name, from->name, IPSET_MAXNAMELEN);
+ strscpy_pad(from->name, to->name, IPSET_MAXNAMELEN);
+ strscpy_pad(to->name, from_name, IPSET_MAXNAMELEN);
swap(from->ref, to->ref);
ip_set(inst, from_id) = to;
@@ -1750,13 +1792,22 @@ CALL_AD(struct net *net, struct sock *ctnl, struct sk_buff *skb,
bool eexist = flags & IPSET_FLAG_EXIST, retried = false;
do {
+ if (retried) {
+ __ip_set_get_netlink(set);
+ nfnl_unlock(NFNL_SUBSYS_IPSET);
+ cond_resched();
+ nfnl_lock(NFNL_SUBSYS_IPSET);
+ __ip_set_put_netlink(set);
+ }
+
ip_set_lock(set);
ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried);
ip_set_unlock(set);
retried = true;
- } while (ret == -EAGAIN &&
- set->variant->resize &&
- (ret = set->variant->resize(set, retried)) == 0);
+ } while (ret == -ERANGE ||
+ (ret == -EAGAIN &&
+ set->variant->resize &&
+ (ret = set->variant->resize(set, retried)) == 0));
if (!ret || (ret == -IPSET_ERR_EXIST && eexist))
return 0;
@@ -1775,11 +1826,12 @@ CALL_AD(struct net *net, struct sock *ctnl, struct sk_buff *skb,
skb2 = nlmsg_new(payload, GFP_KERNEL);
if (!skb2)
return -ENOMEM;
- rep = __nlmsg_put(skb2, NETLINK_PORTID(skb),
- nlh->nlmsg_seq, NLMSG_ERROR, payload, 0);
+ rep = nlmsg_put(skb2, NETLINK_PORTID(skb),
+ nlh->nlmsg_seq, NLMSG_ERROR, payload, 0);
errmsg = nlmsg_data(rep);
errmsg->error = ret;
- memcpy(&errmsg->msg, nlh, nlh->nlmsg_len);
+ unsafe_memcpy(&errmsg->msg, nlh, nlh->nlmsg_len,
+ /* Bounds checked by the skb layer. */);
cmdattr = (void *)&errmsg->msg + min_len;
ret = NLA_PARSE(cda, IPSET_ATTR_CMD_MAX, cmdattr,
@@ -2429,7 +2481,6 @@ ip_set_net_init(struct net *net)
#else
goto err_alloc;
#endif
- inst->is_deleted = false;
inst->is_destroyed = false;
rcu_assign_pointer(inst->ip_set_list, list);
return 0;
@@ -2446,20 +2497,6 @@ ip_set_net_exit(struct net *net)
{
struct ip_set_net *inst = ip_set_pernet(net);
- struct ip_set *set = NULL;
- ip_set_id_t i;
-
- inst->is_deleted = true; /* flag for ip_set_nfnl_put */
-
- nfnl_lock(NFNL_SUBSYS_IPSET);
- for (i = 0; i < inst->ip_set_max; i++) {
- set = ip_set(inst, i);
- if (set) {
- ip_set(inst, i) = NULL;
- ip_set_destroy_set(set);
- }
- }
- nfnl_unlock(NFNL_SUBSYS_IPSET);
kvfree(rcu_dereference_protected(inst->ip_set_list, 1));
#ifndef HAVE_NET_OPS_ID
kvfree(inst);
@@ -2524,8 +2561,8 @@ ip_set_fini(void)
{
nf_unregister_sockopt(&so_set);
nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
-
UNREGISTER_PERNET_SUBSYS(&ip_set_net_ops);
+
pr_debug("these are the famous last words\n");
}