diff options
author | Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> | 2014-09-30 09:46:41 +0200 |
---|---|---|
committer | Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> | 2014-11-18 07:56:26 +0100 |
commit | 920ddfa09efbd72a0fe43251cd19bc2c27aa3662 (patch) | |
tree | fd07063a447142eb69a6476d96d5f54f09453734 /kernel/include/linux | |
parent | 1608f755e87bb327a950a8cf21a93eb4461801d0 (diff) |
Introduce RCU in all set types instead of rwlock per set
Performance is tested by Jesper Dangaard Brouer:
Simple drop in FORWARD
~~~~~~~~~~~~~~~~~~~~~~
Dropping via simple iptables net-mask match::
iptables -t raw -N simple || iptables -t raw -F simple
iptables -t raw -I simple -s 198.18.0.0/15 -j DROP
iptables -t raw -D PREROUTING -j simple
iptables -t raw -I PREROUTING -j simple
Drop performance in "raw": 11.3Mpps
Generator: sending 12.2Mpps (tx:12264083 pps)
Drop via original ipset in RAW table
~~~~~~~~~~~~~~~~~~~~~~~~~~~
Create a set with lots of elements::
sudo ./ipset destroy test
echo "create test hash:ip hashsize 65536" > test.set
for x in `seq 0 255`; do
for y in `seq 0 255`; do
echo "add test 198.18.$x.$y" >> test.set
done
done
sudo ./ipset restore < test.set
Dropping via ipset::
iptables -t raw -F
iptables -t raw -N net198 || iptables -t raw -F net198
iptables -t raw -I net198 -m set --match-set test src -j DROP
iptables -t raw -I PREROUTING -j net198
Drop performance in "raw" with ipset: 8Mpps
Perf report numbers ipset drop in "raw"::
+ 24.65% ksoftirqd/1 [ip_set] [k] ip_set_test
- 21.42% ksoftirqd/1 [kernel.kallsyms] [k] _raw_read_lock_bh
- _raw_read_lock_bh
+ 99.88% ip_set_test
- 19.42% ksoftirqd/1 [kernel.kallsyms] [k] _raw_read_unlock_bh
- _raw_read_unlock_bh
+ 99.72% ip_set_test
+ 4.31% ksoftirqd/1 [ip_set_hash_ip] [k] hash_ip4_kadt
+ 2.27% ksoftirqd/1 [ixgbe] [k] ixgbe_fetch_rx_buffer
+ 2.18% ksoftirqd/1 [ip_tables] [k] ipt_do_table
+ 1.81% ksoftirqd/1 [ip_set_hash_ip] [k] hash_ip4_test
+ 1.61% ksoftirqd/1 [kernel.kallsyms] [k] __netif_receive_skb_core
+ 1.44% ksoftirqd/1 [kernel.kallsyms] [k] build_skb
+ 1.42% ksoftirqd/1 [kernel.kallsyms] [k] ip_rcv
+ 1.36% ksoftirqd/1 [kernel.kallsyms] [k] __local_bh_enable_ip
+ 1.16% ksoftirqd/1 [kernel.kallsyms] [k] dev_gro_receive
+ 1.09% ksoftirqd/1 [kernel.kallsyms] [k] __rcu_read_unlock
+ 0.96% ksoftirqd/1 [ixgbe] [k] ixgbe_clean_rx_irq
+ 0.95% ksoftirqd/1 [kernel.kallsyms] [k] __netdev_alloc_frag
+ 0.88% ksoftirqd/1 [kernel.kallsyms] [k] kmem_cache_alloc
+ 0.87% ksoftirqd/1 [xt_set] [k] set_match_v3
+ 0.85% ksoftirqd/1 [kernel.kallsyms] [k] inet_gro_receive
+ 0.83% ksoftirqd/1 [kernel.kallsyms] [k] nf_iterate
+ 0.76% ksoftirqd/1 [kernel.kallsyms] [k] put_compound_page
+ 0.75% ksoftirqd/1 [kernel.kallsyms] [k] __rcu_read_lock
Drop via ipset in RAW table with RCU-locking
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
With RCU locking, the RW-lock is gone.
Drop performance in "raw" with ipset with RCU-locking: 11.3Mpps
Performance-tested-by: Jesper Dangaard Brouer <brouer@redhat.com>
Diffstat (limited to 'kernel/include/linux')
-rw-r--r-- | kernel/include/linux/netfilter/ipset/ip_set.h | 82 | ||||
-rw-r--r-- | kernel/include/linux/netfilter/ipset/ip_set_timeout.h | 39 |
2 files changed, 86 insertions, 35 deletions
diff --git a/kernel/include/linux/netfilter/ipset/ip_set.h b/kernel/include/linux/netfilter/ipset/ip_set.h index 782571f..b8007a9 100644 --- a/kernel/include/linux/netfilter/ipset/ip_set.h +++ b/kernel/include/linux/netfilter/ipset/ip_set.h @@ -114,10 +114,10 @@ struct ip_set_comment { }; struct ip_set_skbinfo { - u32 skbmark; - u32 skbmarkmask; - u32 skbprio; - u16 skbqueue; + u32 __rcu skbmark; + u32 __rcu skbmarkmask; + u32 __rcu skbprio; + u16 __rcu skbqueue; }; struct ip_set; @@ -224,7 +224,7 @@ struct ip_set { /* The name of the set */ char name[IPSET_MAXNAMELEN]; /* Lock protecting the set data */ - rwlock_t lock; + spinlock_t lock; /* References to the set */ u32 ref; /* The core set type */ @@ -323,30 +323,72 @@ ip_set_update_counter(struct ip_set_counter *counter, } } +/* RCU-safe assign value */ +#define IP_SET_RCU_ASSIGN(ptr, value) \ +do { \ + smp_wmb(); \ + *(ptr) = value; \ +} while (0) + +static inline void +ip_set_rcu_assign_ulong(unsigned long *v, unsigned long value) +{ + IP_SET_RCU_ASSIGN(v, value); +} + +static inline void +ip_set_rcu_assign_u32(u32 *v, u32 value) +{ + IP_SET_RCU_ASSIGN(v, value); +} + +static inline void +ip_set_rcu_assign_u16(u16 *v, u16 value) +{ + IP_SET_RCU_ASSIGN(v, value); +} + +static inline void +ip_set_rcu_assign_u8(u8 *v, u8 value) +{ + IP_SET_RCU_ASSIGN(v, value); +} + +#define ip_set_rcu_deref(t) \ + rcu_dereference_index_check(t, \ + rcu_read_lock_held() || rcu_read_lock_bh_held()) + static inline void ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo, const struct ip_set_ext *ext, struct ip_set_ext *mext, u32 flags) { - mext->skbmark = skbinfo->skbmark; - mext->skbmarkmask = skbinfo->skbmarkmask; - mext->skbprio = skbinfo->skbprio; - mext->skbqueue = skbinfo->skbqueue; + mext->skbmark = ip_set_rcu_deref(skbinfo->skbmark); + mext->skbmarkmask = ip_set_rcu_deref(skbinfo->skbmarkmask); + mext->skbprio = ip_set_rcu_deref(skbinfo->skbprio); + mext->skbqueue = ip_set_rcu_deref(skbinfo->skbqueue); } static inline bool ip_set_put_skbinfo(struct sk_buff *skb, struct ip_set_skbinfo *skbinfo) { + u32 skbmark, skbmarkmask, skbprio; + u16 skbqueue; + + skbmark = ip_set_rcu_deref(skbinfo->skbmark); + skbmarkmask = ip_set_rcu_deref(skbinfo->skbmarkmask); + skbprio = ip_set_rcu_deref(skbinfo->skbprio); + skbqueue = ip_set_rcu_deref(skbinfo->skbqueue); /* Send nonzero parameters only */ - return ((skbinfo->skbmark || skbinfo->skbmarkmask) && + return ((skbmark || skbmarkmask) && nla_put_net64(skb, IPSET_ATTR_SKBMARK, - cpu_to_be64((u64)skbinfo->skbmark << 32 | - skbinfo->skbmarkmask))) || - (skbinfo->skbprio && + cpu_to_be64((u64)skbmark << 32 | + skbmarkmask))) || + (skbprio && nla_put_net32(skb, IPSET_ATTR_SKBPRIO, - cpu_to_be32(skbinfo->skbprio))) || - (skbinfo->skbqueue && + cpu_to_be32(skbprio))) || + (skbqueue && nla_put_net16(skb, IPSET_ATTR_SKBQUEUE, - cpu_to_be16(skbinfo->skbqueue))); + cpu_to_be16(skbqueue))); } @@ -354,10 +396,10 @@ static inline void ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo, const struct ip_set_ext *ext) { - skbinfo->skbmark = ext->skbmark; - skbinfo->skbmarkmask = ext->skbmarkmask; - skbinfo->skbprio = ext->skbprio; - skbinfo->skbqueue = ext->skbqueue; + ip_set_rcu_assign_u32(&skbinfo->skbmark, ext->skbmark); + ip_set_rcu_assign_u32(&skbinfo->skbmarkmask, ext->skbmarkmask); + ip_set_rcu_assign_u32(&skbinfo->skbprio, ext->skbprio); + ip_set_rcu_assign_u16(&skbinfo->skbqueue, ext->skbqueue); } static inline bool diff --git a/kernel/include/linux/netfilter/ipset/ip_set_timeout.h b/kernel/include/linux/netfilter/ipset/ip_set_timeout.h index 83c2f9e..9e30031 100644 --- a/kernel/include/linux/netfilter/ipset/ip_set_timeout.h +++ b/kernel/include/linux/netfilter/ipset/ip_set_timeout.h @@ -40,38 +40,47 @@ ip_set_timeout_uget(struct nlattr *tb) } static inline bool -ip_set_timeout_test(unsigned long timeout) +__ip_set_timeout_expired(unsigned long t) { - return timeout == IPSET_ELEM_PERMANENT || - time_is_after_jiffies(timeout); + return t != IPSET_ELEM_PERMANENT && time_is_before_jiffies(t); +} + +static inline bool +ip_set_timeout_expired_rcu(unsigned long *timeout) +{ + unsigned long t = ip_set_rcu_deref(*timeout); + + return __ip_set_timeout_expired(t); } static inline bool ip_set_timeout_expired(unsigned long *timeout) { - return *timeout != IPSET_ELEM_PERMANENT && - time_is_before_jiffies(*timeout); + return __ip_set_timeout_expired(*timeout); } static inline void -ip_set_timeout_set(unsigned long *timeout, u32 t) +ip_set_timeout_set(unsigned long *timeout, u32 value) { - if (!t) { - *timeout = IPSET_ELEM_PERMANENT; - return; - } + unsigned long t; + + if (!value) + return ip_set_rcu_assign_ulong(timeout, IPSET_ELEM_PERMANENT); - *timeout = msecs_to_jiffies(t * 1000) + jiffies; - if (*timeout == IPSET_ELEM_PERMANENT) + t = msecs_to_jiffies(value * 1000) + jiffies; + if (t == IPSET_ELEM_PERMANENT) /* Bingo! :-) */ - (*timeout)--; + t--; + ip_set_rcu_assign_ulong(timeout, t); } static inline u32 ip_set_timeout_get(unsigned long *timeout) { - return *timeout == IPSET_ELEM_PERMANENT ? 0 : - jiffies_to_msecs(*timeout - jiffies)/1000; + unsigned long t = ip_set_rcu_deref(*timeout); + + return t == IPSET_ELEM_PERMANENT ? 0 : + jiffies_to_msecs(t - jiffies)/1000; } #endif /* __KERNEL__ */ |