From 920ddfa09efbd72a0fe43251cd19bc2c27aa3662 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 30 Sep 2014 09:46:41 +0200 Subject: Introduce RCU in all set types instead of rwlock per set Performance is tested by Jesper Dangaard Brouer: Simple drop in FORWARD ~~~~~~~~~~~~~~~~~~~~~~ Dropping via simple iptables net-mask match:: iptables -t raw -N simple || iptables -t raw -F simple iptables -t raw -I simple -s 198.18.0.0/15 -j DROP iptables -t raw -D PREROUTING -j simple iptables -t raw -I PREROUTING -j simple Drop performance in "raw": 11.3Mpps Generator: sending 12.2Mpps (tx:12264083 pps) Drop via original ipset in RAW table ~~~~~~~~~~~~~~~~~~~~~~~~~~~ Create a set with lots of elements:: sudo ./ipset destroy test echo "create test hash:ip hashsize 65536" > test.set for x in `seq 0 255`; do for y in `seq 0 255`; do echo "add test 198.18.$x.$y" >> test.set done done sudo ./ipset restore < test.set Dropping via ipset:: iptables -t raw -F iptables -t raw -N net198 || iptables -t raw -F net198 iptables -t raw -I net198 -m set --match-set test src -j DROP iptables -t raw -I PREROUTING -j net198 Drop performance in "raw" with ipset: 8Mpps Perf report numbers ipset drop in "raw":: + 24.65% ksoftirqd/1 [ip_set] [k] ip_set_test - 21.42% ksoftirqd/1 [kernel.kallsyms] [k] _raw_read_lock_bh - _raw_read_lock_bh + 99.88% ip_set_test - 19.42% ksoftirqd/1 [kernel.kallsyms] [k] _raw_read_unlock_bh - _raw_read_unlock_bh + 99.72% ip_set_test + 4.31% ksoftirqd/1 [ip_set_hash_ip] [k] hash_ip4_kadt + 2.27% ksoftirqd/1 [ixgbe] [k] ixgbe_fetch_rx_buffer + 2.18% ksoftirqd/1 [ip_tables] [k] ipt_do_table + 1.81% ksoftirqd/1 [ip_set_hash_ip] [k] hash_ip4_test + 1.61% ksoftirqd/1 [kernel.kallsyms] [k] __netif_receive_skb_core + 1.44% ksoftirqd/1 [kernel.kallsyms] [k] build_skb + 1.42% ksoftirqd/1 [kernel.kallsyms] [k] ip_rcv + 1.36% ksoftirqd/1 [kernel.kallsyms] [k] __local_bh_enable_ip + 1.16% ksoftirqd/1 [kernel.kallsyms] [k] dev_gro_receive + 1.09% ksoftirqd/1 [kernel.kallsyms] [k] __rcu_read_unlock + 0.96% ksoftirqd/1 [ixgbe] [k] ixgbe_clean_rx_irq + 0.95% ksoftirqd/1 [kernel.kallsyms] [k] __netdev_alloc_frag + 0.88% ksoftirqd/1 [kernel.kallsyms] [k] kmem_cache_alloc + 0.87% ksoftirqd/1 [xt_set] [k] set_match_v3 + 0.85% ksoftirqd/1 [kernel.kallsyms] [k] inet_gro_receive + 0.83% ksoftirqd/1 [kernel.kallsyms] [k] nf_iterate + 0.76% ksoftirqd/1 [kernel.kallsyms] [k] put_compound_page + 0.75% ksoftirqd/1 [kernel.kallsyms] [k] __rcu_read_lock Drop via ipset in RAW table with RCU-locking ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ With RCU locking, the RW-lock is gone. Drop performance in "raw" with ipset with RCU-locking: 11.3Mpps Performance-tested-by: Jesper Dangaard Brouer --- kernel/net/netfilter/ipset/ip_set_core.c | 35 ++++++++++++++++---------------- 1 file changed, 18 insertions(+), 17 deletions(-) (limited to 'kernel/net/netfilter/ipset/ip_set_core.c') diff --git a/kernel/net/netfilter/ipset/ip_set_core.c b/kernel/net/netfilter/ipset/ip_set_core.c index d37ee69..1f84e64 100644 --- a/kernel/net/netfilter/ipset/ip_set_core.c +++ b/kernel/net/netfilter/ipset/ip_set_core.c @@ -223,6 +223,7 @@ ip_set_type_register(struct ip_set_type *type) type->revision_min, type->revision_max); unlock: ip_set_type_unlock(); + synchronize_rcu(); return ret; } EXPORT_SYMBOL_GPL(ip_set_type_register); @@ -502,16 +503,16 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb, !(opt->family == set->family || set->family == NFPROTO_UNSPEC)) return 0; - read_lock_bh(&set->lock); + rcu_read_lock_bh(); ret = set->variant->kadt(set, skb, par, IPSET_TEST, opt); - read_unlock_bh(&set->lock); + rcu_read_unlock_bh(); if (ret == -EAGAIN) { /* Type requests element to be completed */ pr_debug("element must be completed, ADD is triggered\n"); - write_lock_bh(&set->lock); + spin_lock_bh(&set->lock); set->variant->kadt(set, skb, par, IPSET_ADD, opt); - write_unlock_bh(&set->lock); + spin_unlock_bh(&set->lock); ret = 1; } else { /* --return-nomatch: invert matched element */ @@ -541,9 +542,9 @@ ip_set_add(ip_set_id_t index, const struct sk_buff *skb, !(opt->family == set->family || set->family == NFPROTO_UNSPEC)) return -IPSET_ERR_TYPE_MISMATCH; - write_lock_bh(&set->lock); + spin_lock_bh(&set->lock); ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt); - write_unlock_bh(&set->lock); + spin_unlock_bh(&set->lock); return ret; } @@ -564,9 +565,9 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb, !(opt->family == set->family || set->family == NFPROTO_UNSPEC)) return -IPSET_ERR_TYPE_MISMATCH; - write_lock_bh(&set->lock); + spin_lock_bh(&set->lock); ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt); - write_unlock_bh(&set->lock); + spin_unlock_bh(&set->lock); return ret; } @@ -851,7 +852,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, set = kzalloc(sizeof(struct ip_set), GFP_KERNEL); if (!set) return -ENOMEM; - rwlock_init(&set->lock); + spin_lock_init(&set->lock); strlcpy(set->name, name, IPSET_MAXNAMELEN); set->family = family; set->revision = revision; @@ -1030,9 +1031,9 @@ ip_set_flush_set(struct ip_set *set) { pr_debug("set: %s\n", set->name); - write_lock_bh(&set->lock); + spin_lock_bh(&set->lock); set->variant->flush(set); - write_unlock_bh(&set->lock); + spin_unlock_bh(&set->lock); } static int @@ -1333,9 +1334,9 @@ dump_last: goto next_set; /* Fall through and add elements */ default: - read_lock_bh(&set->lock); + rcu_read_lock_bh(); ret = set->variant->list(set, skb, cb); - read_unlock_bh(&set->lock); + rcu_read_unlock_bh(); if (!cb->args[IPSET_CB_ARG0]) /* Set is done, proceed with next one */ goto next_set; @@ -1423,9 +1424,9 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, bool eexist = flags & IPSET_FLAG_EXIST, retried = false; do { - write_lock_bh(&set->lock); + spin_lock_bh(&set->lock); ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried); - write_unlock_bh(&set->lock); + spin_unlock_bh(&set->lock); retried = true; } while (ret == -EAGAIN && set->variant->resize && @@ -1605,9 +1606,9 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb, set->type->adt_policy)) return -IPSET_ERR_PROTOCOL; - read_lock_bh(&set->lock); + rcu_read_lock_bh(); ret = set->variant->uadt(set, tb, IPSET_TEST, NULL, 0, 0); - read_unlock_bh(&set->lock); + rcu_read_unlock_bh(); /* Userspace can't trigger element to be re-added */ if (ret == -EAGAIN) ret = 1; -- cgit v1.2.3