From 920ddfa09efbd72a0fe43251cd19bc2c27aa3662 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 30 Sep 2014 09:46:41 +0200 Subject: Introduce RCU in all set types instead of rwlock per set Performance is tested by Jesper Dangaard Brouer: Simple drop in FORWARD ~~~~~~~~~~~~~~~~~~~~~~ Dropping via simple iptables net-mask match:: iptables -t raw -N simple || iptables -t raw -F simple iptables -t raw -I simple -s 198.18.0.0/15 -j DROP iptables -t raw -D PREROUTING -j simple iptables -t raw -I PREROUTING -j simple Drop performance in "raw": 11.3Mpps Generator: sending 12.2Mpps (tx:12264083 pps) Drop via original ipset in RAW table ~~~~~~~~~~~~~~~~~~~~~~~~~~~ Create a set with lots of elements:: sudo ./ipset destroy test echo "create test hash:ip hashsize 65536" > test.set for x in `seq 0 255`; do for y in `seq 0 255`; do echo "add test 198.18.$x.$y" >> test.set done done sudo ./ipset restore < test.set Dropping via ipset:: iptables -t raw -F iptables -t raw -N net198 || iptables -t raw -F net198 iptables -t raw -I net198 -m set --match-set test src -j DROP iptables -t raw -I PREROUTING -j net198 Drop performance in "raw" with ipset: 8Mpps Perf report numbers ipset drop in "raw":: + 24.65% ksoftirqd/1 [ip_set] [k] ip_set_test - 21.42% ksoftirqd/1 [kernel.kallsyms] [k] _raw_read_lock_bh - _raw_read_lock_bh + 99.88% ip_set_test - 19.42% ksoftirqd/1 [kernel.kallsyms] [k] _raw_read_unlock_bh - _raw_read_unlock_bh + 99.72% ip_set_test + 4.31% ksoftirqd/1 [ip_set_hash_ip] [k] hash_ip4_kadt + 2.27% ksoftirqd/1 [ixgbe] [k] ixgbe_fetch_rx_buffer + 2.18% ksoftirqd/1 [ip_tables] [k] ipt_do_table + 1.81% ksoftirqd/1 [ip_set_hash_ip] [k] hash_ip4_test + 1.61% ksoftirqd/1 [kernel.kallsyms] [k] __netif_receive_skb_core + 1.44% ksoftirqd/1 [kernel.kallsyms] [k] build_skb + 1.42% ksoftirqd/1 [kernel.kallsyms] [k] ip_rcv + 1.36% ksoftirqd/1 [kernel.kallsyms] [k] __local_bh_enable_ip + 1.16% ksoftirqd/1 [kernel.kallsyms] [k] dev_gro_receive + 1.09% ksoftirqd/1 [kernel.kallsyms] [k] __rcu_read_unlock + 0.96% ksoftirqd/1 [ixgbe] [k] ixgbe_clean_rx_irq + 0.95% ksoftirqd/1 [kernel.kallsyms] [k] __netdev_alloc_frag + 0.88% ksoftirqd/1 [kernel.kallsyms] [k] kmem_cache_alloc + 0.87% ksoftirqd/1 [xt_set] [k] set_match_v3 + 0.85% ksoftirqd/1 [kernel.kallsyms] [k] inet_gro_receive + 0.83% ksoftirqd/1 [kernel.kallsyms] [k] nf_iterate + 0.76% ksoftirqd/1 [kernel.kallsyms] [k] put_compound_page + 0.75% ksoftirqd/1 [kernel.kallsyms] [k] __rcu_read_lock Drop via ipset in RAW table with RCU-locking ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ With RCU locking, the RW-lock is gone. Drop performance in "raw" with ipset with RCU-locking: 11.3Mpps Performance-tested-by: Jesper Dangaard Brouer --- kernel/include/linux/netfilter/ipset/ip_set.h | 82 ++++++++++++++++------ .../include/linux/netfilter/ipset/ip_set_timeout.h | 39 ++++++---- 2 files changed, 86 insertions(+), 35 deletions(-) (limited to 'kernel/include/linux/netfilter') diff --git a/kernel/include/linux/netfilter/ipset/ip_set.h b/kernel/include/linux/netfilter/ipset/ip_set.h index 782571f..b8007a9 100644 --- a/kernel/include/linux/netfilter/ipset/ip_set.h +++ b/kernel/include/linux/netfilter/ipset/ip_set.h @@ -114,10 +114,10 @@ struct ip_set_comment { }; struct ip_set_skbinfo { - u32 skbmark; - u32 skbmarkmask; - u32 skbprio; - u16 skbqueue; + u32 __rcu skbmark; + u32 __rcu skbmarkmask; + u32 __rcu skbprio; + u16 __rcu skbqueue; }; struct ip_set; @@ -224,7 +224,7 @@ struct ip_set { /* The name of the set */ char name[IPSET_MAXNAMELEN]; /* Lock protecting the set data */ - rwlock_t lock; + spinlock_t lock; /* References to the set */ u32 ref; /* The core set type */ @@ -323,30 +323,72 @@ ip_set_update_counter(struct ip_set_counter *counter, } } +/* RCU-safe assign value */ +#define IP_SET_RCU_ASSIGN(ptr, value) \ +do { \ + smp_wmb(); \ + *(ptr) = value; \ +} while (0) + +static inline void +ip_set_rcu_assign_ulong(unsigned long *v, unsigned long value) +{ + IP_SET_RCU_ASSIGN(v, value); +} + +static inline void +ip_set_rcu_assign_u32(u32 *v, u32 value) +{ + IP_SET_RCU_ASSIGN(v, value); +} + +static inline void +ip_set_rcu_assign_u16(u16 *v, u16 value) +{ + IP_SET_RCU_ASSIGN(v, value); +} + +static inline void +ip_set_rcu_assign_u8(u8 *v, u8 value) +{ + IP_SET_RCU_ASSIGN(v, value); +} + +#define ip_set_rcu_deref(t) \ + rcu_dereference_index_check(t, \ + rcu_read_lock_held() || rcu_read_lock_bh_held()) + static inline void ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo, const struct ip_set_ext *ext, struct ip_set_ext *mext, u32 flags) { - mext->skbmark = skbinfo->skbmark; - mext->skbmarkmask = skbinfo->skbmarkmask; - mext->skbprio = skbinfo->skbprio; - mext->skbqueue = skbinfo->skbqueue; + mext->skbmark = ip_set_rcu_deref(skbinfo->skbmark); + mext->skbmarkmask = ip_set_rcu_deref(skbinfo->skbmarkmask); + mext->skbprio = ip_set_rcu_deref(skbinfo->skbprio); + mext->skbqueue = ip_set_rcu_deref(skbinfo->skbqueue); } static inline bool ip_set_put_skbinfo(struct sk_buff *skb, struct ip_set_skbinfo *skbinfo) { + u32 skbmark, skbmarkmask, skbprio; + u16 skbqueue; + + skbmark = ip_set_rcu_deref(skbinfo->skbmark); + skbmarkmask = ip_set_rcu_deref(skbinfo->skbmarkmask); + skbprio = ip_set_rcu_deref(skbinfo->skbprio); + skbqueue = ip_set_rcu_deref(skbinfo->skbqueue); /* Send nonzero parameters only */ - return ((skbinfo->skbmark || skbinfo->skbmarkmask) && + return ((skbmark || skbmarkmask) && nla_put_net64(skb, IPSET_ATTR_SKBMARK, - cpu_to_be64((u64)skbinfo->skbmark << 32 | - skbinfo->skbmarkmask))) || - (skbinfo->skbprio && + cpu_to_be64((u64)skbmark << 32 | + skbmarkmask))) || + (skbprio && nla_put_net32(skb, IPSET_ATTR_SKBPRIO, - cpu_to_be32(skbinfo->skbprio))) || - (skbinfo->skbqueue && + cpu_to_be32(skbprio))) || + (skbqueue && nla_put_net16(skb, IPSET_ATTR_SKBQUEUE, - cpu_to_be16(skbinfo->skbqueue))); + cpu_to_be16(skbqueue))); } @@ -354,10 +396,10 @@ static inline void ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo, const struct ip_set_ext *ext) { - skbinfo->skbmark = ext->skbmark; - skbinfo->skbmarkmask = ext->skbmarkmask; - skbinfo->skbprio = ext->skbprio; - skbinfo->skbqueue = ext->skbqueue; + ip_set_rcu_assign_u32(&skbinfo->skbmark, ext->skbmark); + ip_set_rcu_assign_u32(&skbinfo->skbmarkmask, ext->skbmarkmask); + ip_set_rcu_assign_u32(&skbinfo->skbprio, ext->skbprio); + ip_set_rcu_assign_u16(&skbinfo->skbqueue, ext->skbqueue); } static inline bool diff --git a/kernel/include/linux/netfilter/ipset/ip_set_timeout.h b/kernel/include/linux/netfilter/ipset/ip_set_timeout.h index 83c2f9e..9e30031 100644 --- a/kernel/include/linux/netfilter/ipset/ip_set_timeout.h +++ b/kernel/include/linux/netfilter/ipset/ip_set_timeout.h @@ -40,38 +40,47 @@ ip_set_timeout_uget(struct nlattr *tb) } static inline bool -ip_set_timeout_test(unsigned long timeout) +__ip_set_timeout_expired(unsigned long t) { - return timeout == IPSET_ELEM_PERMANENT || - time_is_after_jiffies(timeout); + return t != IPSET_ELEM_PERMANENT && time_is_before_jiffies(t); +} + +static inline bool +ip_set_timeout_expired_rcu(unsigned long *timeout) +{ + unsigned long t = ip_set_rcu_deref(*timeout); + + return __ip_set_timeout_expired(t); } static inline bool ip_set_timeout_expired(unsigned long *timeout) { - return *timeout != IPSET_ELEM_PERMANENT && - time_is_before_jiffies(*timeout); + return __ip_set_timeout_expired(*timeout); } static inline void -ip_set_timeout_set(unsigned long *timeout, u32 t) +ip_set_timeout_set(unsigned long *timeout, u32 value) { - if (!t) { - *timeout = IPSET_ELEM_PERMANENT; - return; - } + unsigned long t; + + if (!value) + return ip_set_rcu_assign_ulong(timeout, IPSET_ELEM_PERMANENT); - *timeout = msecs_to_jiffies(t * 1000) + jiffies; - if (*timeout == IPSET_ELEM_PERMANENT) + t = msecs_to_jiffies(value * 1000) + jiffies; + if (t == IPSET_ELEM_PERMANENT) /* Bingo! :-) */ - (*timeout)--; + t--; + ip_set_rcu_assign_ulong(timeout, t); } static inline u32 ip_set_timeout_get(unsigned long *timeout) { - return *timeout == IPSET_ELEM_PERMANENT ? 0 : - jiffies_to_msecs(*timeout - jiffies)/1000; + unsigned long t = ip_set_rcu_deref(*timeout); + + return t == IPSET_ELEM_PERMANENT ? 0 : + jiffies_to_msecs(t - jiffies)/1000; } #endif /* __KERNEL__ */ -- cgit v1.2.3