summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--kernel/include/linux/netfilter/ipset/ip_set.h13
-rw-r--r--kernel/net/netfilter/ipset/ip_set_core.c30
-rw-r--r--kernel/net/netfilter/ipset/ip_set_hash_gen.h44
-rw-r--r--tests/hash:ip.t4
-rwxr-xr-xtests/resize-and-list.sh11
5 files changed, 80 insertions, 22 deletions
diff --git a/kernel/include/linux/netfilter/ipset/ip_set.h b/kernel/include/linux/netfilter/ipset/ip_set.h
index 05642d2..e640c62 100644
--- a/kernel/include/linux/netfilter/ipset/ip_set.h
+++ b/kernel/include/linux/netfilter/ipset/ip_set.h
@@ -177,6 +177,9 @@ struct ip_set_type_variant {
/* List elements */
int (*list)(const struct ip_set *set, struct sk_buff *skb,
struct netlink_callback *cb);
+ /* Keep listing private when resizing runs parallel */
+ void (*uref)(struct ip_set *set, struct netlink_callback *cb,
+ bool start);
/* Return true if "b" set is the same as "a"
* according to the create set parameters */
@@ -424,12 +427,12 @@ ip_set_init_counter(struct ip_set_counter *counter,
/* Netlink CB args */
enum {
- IPSET_CB_NET = 0,
- IPSET_CB_DUMP,
- IPSET_CB_INDEX,
- IPSET_CB_ARG0,
+ IPSET_CB_NET = 0, /* net namespace */
+ IPSET_CB_DUMP, /* dump single set/all sets */
+ IPSET_CB_INDEX, /* set index */
+ IPSET_CB_PRIVATE, /* set private data */
+ IPSET_CB_ARG0, /* type specific */
IPSET_CB_ARG1,
- IPSET_CB_ARG2,
};
/* register and unregister set references */
diff --git a/kernel/net/netfilter/ipset/ip_set_core.c b/kernel/net/netfilter/ipset/ip_set_core.c
index b0b9611..872f71e 100644
--- a/kernel/net/netfilter/ipset/ip_set_core.c
+++ b/kernel/net/netfilter/ipset/ip_set_core.c
@@ -1185,13 +1185,16 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
static int
ip_set_dump_done(struct netlink_callback *cb)
{
- struct ip_set_net *inst = (struct ip_set_net *)cb->args[IPSET_CB_NET];
-
if (cb->args[IPSET_CB_ARG0]) {
- pr_debug("release set %s\n",
- ip_set(inst, cb->args[IPSET_CB_INDEX])->name);
- __ip_set_put_byindex(inst,
- (ip_set_id_t) cb->args[IPSET_CB_INDEX]);
+ struct ip_set_net *inst =
+ (struct ip_set_net *)cb->args[IPSET_CB_NET];
+ ip_set_id_t index = (ip_set_id_t) cb->args[IPSET_CB_INDEX];
+ struct ip_set *set = ip_set(inst, index);
+
+ if (set->variant->uref)
+ set->variant->uref(set, cb, false);
+ pr_debug("release set %s\n", set->name);
+ __ip_set_put_byindex(inst, index);
}
return 0;
}
@@ -1222,12 +1225,6 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst)
nla_parse(cda, IPSET_ATTR_CMD_MAX,
attr, nlh->nlmsg_len - min_len, ip_set_setname_policy);
- /* cb->args[IPSET_CB_NET]: net namespace
- * [IPSET_CB_DUMP]: dump single set/all sets
- * [IPSET_CB_INDEX]: set index
- * [IPSET_CB_ARG0]: type specific
- */
-
if (cda[IPSET_ATTR_SETNAME]) {
struct ip_set *set;
@@ -1335,6 +1332,8 @@ dump_last:
goto release_refcount;
if (dump_flags & IPSET_FLAG_LIST_HEADER)
goto next_set;
+ if (set->variant->uref)
+ set->variant->uref(set, cb, true);
/* Fall through and add elements */
default:
rcu_read_lock_bh();
@@ -1351,6 +1350,8 @@ dump_last:
dump_type = DUMP_LAST;
cb->args[IPSET_CB_DUMP] = dump_type | (dump_flags << 16);
cb->args[IPSET_CB_INDEX] = 0;
+ if (set && set->variant->uref)
+ set->variant->uref(set, cb, false);
goto dump_last;
}
goto out;
@@ -1365,7 +1366,10 @@ next_set:
release_refcount:
/* If there was an error or set is done, release set */
if (ret || !cb->args[IPSET_CB_ARG0]) {
- pr_debug("release set %s\n", ip_set(inst, index)->name);
+ set = ip_set(inst, index);
+ if (set->variant->uref)
+ set->variant->uref(set, cb, false);
+ pr_debug("release set %s\n", set->name);
__ip_set_put_byindex(inst, index);
cb->args[IPSET_CB_ARG0] = 0;
}
diff --git a/kernel/net/netfilter/ipset/ip_set_hash_gen.h b/kernel/net/netfilter/ipset/ip_set_hash_gen.h
index 6fd7db7..6c8fc0b 100644
--- a/kernel/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/kernel/net/netfilter/ipset/ip_set_hash_gen.h
@@ -75,6 +75,8 @@ struct hbucket {
/* The hash table: the table size stored here in order to make resizing easy */
struct htable {
+ atomic_t ref; /* References for resizing */
+ atomic_t uref; /* References for dumping */
u8 htable_bits; /* size of hash table == 2^htable_bits */
struct hbucket * __rcu bucket[0]; /* hashtable buckets */
};
@@ -184,6 +186,7 @@ htable_bits(u32 hashsize)
#undef mtype_del
#undef mtype_test_cidrs
#undef mtype_test
+#undef mtype_uref
#undef mtype_expire
#undef mtype_resize
#undef mtype_head
@@ -225,6 +228,7 @@ htable_bits(u32 hashsize)
#define mtype_del IPSET_TOKEN(MTYPE, _del)
#define mtype_test_cidrs IPSET_TOKEN(MTYPE, _test_cidrs)
#define mtype_test IPSET_TOKEN(MTYPE, _test)
+#define mtype_uref IPSET_TOKEN(MTYPE, _uref)
#define mtype_expire IPSET_TOKEN(MTYPE, _expire)
#define mtype_resize IPSET_TOKEN(MTYPE, _resize)
#define mtype_head IPSET_TOKEN(MTYPE, _head)
@@ -562,6 +566,8 @@ retry:
spin_lock_bh(&set->lock);
orig = h->table;
+ atomic_set(&orig->ref, 1);
+ atomic_inc(&orig->uref);
pr_debug("attempt to resize set %s from %u to %u, t %p\n",
set->name, orig->htable_bits, htable_bits, orig);
for (i = 0; i < jhash_size(orig->htable_bits); i++) {
@@ -605,6 +611,8 @@ retry:
ret = -ENOMEM;
}
if (ret < 0) {
+ atomic_set(&orig->ref, 0);
+ atomic_dec(&orig->uref);
spin_unlock_bh(&set->lock);
mtype_ahash_destroy(set, t, false);
if (ret == -EAGAIN)
@@ -634,7 +642,11 @@ retry:
pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name,
orig->htable_bits, orig, t->htable_bits, t);
- mtype_ahash_destroy(set, orig, false);
+ /* If there's nobody else dumping the table, destroy it */
+ if (atomic_dec_and_test(&orig->uref)) {
+ pr_debug("Table destroy by resize %p\n", orig);
+ mtype_ahash_destroy(set, orig, false);
+ }
return 0;
@@ -1032,12 +1044,35 @@ nla_put_failure:
return -EMSGSIZE;
}
+/* Make possible to run dumping parallel with resizing */
+static void
+mtype_uref(struct ip_set *set, struct netlink_callback *cb, bool start)
+{
+ struct htype *h = set->data;
+ struct htable *t;
+
+ if (start) {
+ rcu_read_lock_bh();
+ t = rcu_dereference_bh_nfnl(h->table);
+ atomic_inc(&t->uref);
+ cb->args[IPSET_CB_PRIVATE] = (unsigned long) t;
+ rcu_read_unlock_bh();
+ } else if (cb->args[IPSET_CB_PRIVATE]) {
+ t = (struct htable *) cb->args[IPSET_CB_PRIVATE];
+ if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) {
+ /* Resizing didn't destroy the hash table */
+ pr_debug("Table destroy by dump: %p\n", t);
+ mtype_ahash_destroy(set, t, false);
+ }
+ cb->args[IPSET_CB_PRIVATE] = 0;
+ }
+}
+
/* Reply a LIST/SAVE request: dump the elements of the specified set */
static int
mtype_list(const struct ip_set *set,
struct sk_buff *skb, struct netlink_callback *cb)
{
- const struct htype *h = set->data;
const struct htable *t;
struct nlattr *atd, *nested;
const struct hbucket *n;
@@ -1052,11 +1087,11 @@ mtype_list(const struct ip_set *set,
return -EMSGSIZE;
pr_debug("list hash set %s\n", set->name);
- t = rcu_dereference_bh_nfnl(h->table);
+ t = (const struct htable *) cb->args[IPSET_CB_PRIVATE];
for (; cb->args[IPSET_CB_ARG0] < jhash_size(t->htable_bits);
cb->args[IPSET_CB_ARG0]++) {
incomplete = skb_tail_pointer(skb);
- n = rcu_dereference_bh(hbucket(t, cb->args[IPSET_CB_ARG0]));
+ n = hbucket(t, cb->args[IPSET_CB_ARG0]);
pr_debug("cb->arg bucket: %lu, t %p n %p\n",
cb->args[IPSET_CB_ARG0], t, n);
if (n == NULL)
@@ -1126,6 +1161,7 @@ static const struct ip_set_type_variant mtype_variant = {
.flush = mtype_flush,
.head = mtype_head,
.list = mtype_list,
+ .uref = mtype_uref,
.resize = mtype_resize,
.same_set = mtype_same_set,
};
diff --git a/tests/hash:ip.t b/tests/hash:ip.t
index 2ad6831..a3ab1e2 100644
--- a/tests/hash:ip.t
+++ b/tests/hash:ip.t
@@ -58,6 +58,10 @@
0 ipset -W test resize-test
# IP: Check listing, which requires multiple messages
0 n=`ipset -S test | wc -l` && test $n -eq 8161
+# IP: Flush sets
+0 ipset -F
+# IP: Run resize and listing parallel
+0 ./resize-and-list.sh
# IP: Destroy sets
0 ipset -X
# IP: Create set to add a range
diff --git a/tests/resize-and-list.sh b/tests/resize-and-list.sh
new file mode 100755
index 0000000..209408a
--- /dev/null
+++ b/tests/resize-and-list.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+
+./resize.sh &
+n=0
+while [ $n -ne 8161 ]; do
+ echo $n
+ n=`ipset -S resize-test | wc -l`
+done
+ipset x
+
+ \ No newline at end of file