summaryrefslogtreecommitdiffstats
path: root/kernel/include/linux
diff options
context:
space:
mode:
authorJozsef Kadlecsik <kadlec@blackhole.kfki.hu>2014-09-30 09:46:41 +0200
committerJozsef Kadlecsik <kadlec@blackhole.kfki.hu>2014-11-18 07:56:26 +0100
commit920ddfa09efbd72a0fe43251cd19bc2c27aa3662 (patch)
treefd07063a447142eb69a6476d96d5f54f09453734 /kernel/include/linux
parent1608f755e87bb327a950a8cf21a93eb4461801d0 (diff)
Introduce RCU in all set types instead of rwlock per set
Performance is tested by Jesper Dangaard Brouer: Simple drop in FORWARD ~~~~~~~~~~~~~~~~~~~~~~ Dropping via simple iptables net-mask match:: iptables -t raw -N simple || iptables -t raw -F simple iptables -t raw -I simple -s 198.18.0.0/15 -j DROP iptables -t raw -D PREROUTING -j simple iptables -t raw -I PREROUTING -j simple Drop performance in "raw": 11.3Mpps Generator: sending 12.2Mpps (tx:12264083 pps) Drop via original ipset in RAW table ~~~~~~~~~~~~~~~~~~~~~~~~~~~ Create a set with lots of elements:: sudo ./ipset destroy test echo "create test hash:ip hashsize 65536" > test.set for x in `seq 0 255`; do for y in `seq 0 255`; do echo "add test 198.18.$x.$y" >> test.set done done sudo ./ipset restore < test.set Dropping via ipset:: iptables -t raw -F iptables -t raw -N net198 || iptables -t raw -F net198 iptables -t raw -I net198 -m set --match-set test src -j DROP iptables -t raw -I PREROUTING -j net198 Drop performance in "raw" with ipset: 8Mpps Perf report numbers ipset drop in "raw":: + 24.65% ksoftirqd/1 [ip_set] [k] ip_set_test - 21.42% ksoftirqd/1 [kernel.kallsyms] [k] _raw_read_lock_bh - _raw_read_lock_bh + 99.88% ip_set_test - 19.42% ksoftirqd/1 [kernel.kallsyms] [k] _raw_read_unlock_bh - _raw_read_unlock_bh + 99.72% ip_set_test + 4.31% ksoftirqd/1 [ip_set_hash_ip] [k] hash_ip4_kadt + 2.27% ksoftirqd/1 [ixgbe] [k] ixgbe_fetch_rx_buffer + 2.18% ksoftirqd/1 [ip_tables] [k] ipt_do_table + 1.81% ksoftirqd/1 [ip_set_hash_ip] [k] hash_ip4_test + 1.61% ksoftirqd/1 [kernel.kallsyms] [k] __netif_receive_skb_core + 1.44% ksoftirqd/1 [kernel.kallsyms] [k] build_skb + 1.42% ksoftirqd/1 [kernel.kallsyms] [k] ip_rcv + 1.36% ksoftirqd/1 [kernel.kallsyms] [k] __local_bh_enable_ip + 1.16% ksoftirqd/1 [kernel.kallsyms] [k] dev_gro_receive + 1.09% ksoftirqd/1 [kernel.kallsyms] [k] __rcu_read_unlock + 0.96% ksoftirqd/1 [ixgbe] [k] ixgbe_clean_rx_irq + 0.95% ksoftirqd/1 [kernel.kallsyms] [k] __netdev_alloc_frag + 0.88% ksoftirqd/1 [kernel.kallsyms] [k] kmem_cache_alloc + 0.87% ksoftirqd/1 [xt_set] [k] set_match_v3 + 0.85% ksoftirqd/1 [kernel.kallsyms] [k] inet_gro_receive + 0.83% ksoftirqd/1 [kernel.kallsyms] [k] nf_iterate + 0.76% ksoftirqd/1 [kernel.kallsyms] [k] put_compound_page + 0.75% ksoftirqd/1 [kernel.kallsyms] [k] __rcu_read_lock Drop via ipset in RAW table with RCU-locking ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ With RCU locking, the RW-lock is gone. Drop performance in "raw" with ipset with RCU-locking: 11.3Mpps Performance-tested-by: Jesper Dangaard Brouer <brouer@redhat.com>
Diffstat (limited to 'kernel/include/linux')
-rw-r--r--kernel/include/linux/netfilter/ipset/ip_set.h82
-rw-r--r--kernel/include/linux/netfilter/ipset/ip_set_timeout.h39
2 files changed, 86 insertions, 35 deletions
diff --git a/kernel/include/linux/netfilter/ipset/ip_set.h b/kernel/include/linux/netfilter/ipset/ip_set.h
index 782571f..b8007a9 100644
--- a/kernel/include/linux/netfilter/ipset/ip_set.h
+++ b/kernel/include/linux/netfilter/ipset/ip_set.h
@@ -114,10 +114,10 @@ struct ip_set_comment {
};
struct ip_set_skbinfo {
- u32 skbmark;
- u32 skbmarkmask;
- u32 skbprio;
- u16 skbqueue;
+ u32 __rcu skbmark;
+ u32 __rcu skbmarkmask;
+ u32 __rcu skbprio;
+ u16 __rcu skbqueue;
};
struct ip_set;
@@ -224,7 +224,7 @@ struct ip_set {
/* The name of the set */
char name[IPSET_MAXNAMELEN];
/* Lock protecting the set data */
- rwlock_t lock;
+ spinlock_t lock;
/* References to the set */
u32 ref;
/* The core set type */
@@ -323,30 +323,72 @@ ip_set_update_counter(struct ip_set_counter *counter,
}
}
+/* RCU-safe assign value */
+#define IP_SET_RCU_ASSIGN(ptr, value) \
+do { \
+ smp_wmb(); \
+ *(ptr) = value; \
+} while (0)
+
+static inline void
+ip_set_rcu_assign_ulong(unsigned long *v, unsigned long value)
+{
+ IP_SET_RCU_ASSIGN(v, value);
+}
+
+static inline void
+ip_set_rcu_assign_u32(u32 *v, u32 value)
+{
+ IP_SET_RCU_ASSIGN(v, value);
+}
+
+static inline void
+ip_set_rcu_assign_u16(u16 *v, u16 value)
+{
+ IP_SET_RCU_ASSIGN(v, value);
+}
+
+static inline void
+ip_set_rcu_assign_u8(u8 *v, u8 value)
+{
+ IP_SET_RCU_ASSIGN(v, value);
+}
+
+#define ip_set_rcu_deref(t) \
+ rcu_dereference_index_check(t, \
+ rcu_read_lock_held() || rcu_read_lock_bh_held())
+
static inline void
ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo,
const struct ip_set_ext *ext,
struct ip_set_ext *mext, u32 flags)
{
- mext->skbmark = skbinfo->skbmark;
- mext->skbmarkmask = skbinfo->skbmarkmask;
- mext->skbprio = skbinfo->skbprio;
- mext->skbqueue = skbinfo->skbqueue;
+ mext->skbmark = ip_set_rcu_deref(skbinfo->skbmark);
+ mext->skbmarkmask = ip_set_rcu_deref(skbinfo->skbmarkmask);
+ mext->skbprio = ip_set_rcu_deref(skbinfo->skbprio);
+ mext->skbqueue = ip_set_rcu_deref(skbinfo->skbqueue);
}
static inline bool
ip_set_put_skbinfo(struct sk_buff *skb, struct ip_set_skbinfo *skbinfo)
{
+ u32 skbmark, skbmarkmask, skbprio;
+ u16 skbqueue;
+
+ skbmark = ip_set_rcu_deref(skbinfo->skbmark);
+ skbmarkmask = ip_set_rcu_deref(skbinfo->skbmarkmask);
+ skbprio = ip_set_rcu_deref(skbinfo->skbprio);
+ skbqueue = ip_set_rcu_deref(skbinfo->skbqueue);
/* Send nonzero parameters only */
- return ((skbinfo->skbmark || skbinfo->skbmarkmask) &&
+ return ((skbmark || skbmarkmask) &&
nla_put_net64(skb, IPSET_ATTR_SKBMARK,
- cpu_to_be64((u64)skbinfo->skbmark << 32 |
- skbinfo->skbmarkmask))) ||
- (skbinfo->skbprio &&
+ cpu_to_be64((u64)skbmark << 32 |
+ skbmarkmask))) ||
+ (skbprio &&
nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
- cpu_to_be32(skbinfo->skbprio))) ||
- (skbinfo->skbqueue &&
+ cpu_to_be32(skbprio))) ||
+ (skbqueue &&
nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
- cpu_to_be16(skbinfo->skbqueue)));
+ cpu_to_be16(skbqueue)));
}
@@ -354,10 +396,10 @@ static inline void
ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo,
const struct ip_set_ext *ext)
{
- skbinfo->skbmark = ext->skbmark;
- skbinfo->skbmarkmask = ext->skbmarkmask;
- skbinfo->skbprio = ext->skbprio;
- skbinfo->skbqueue = ext->skbqueue;
+ ip_set_rcu_assign_u32(&skbinfo->skbmark, ext->skbmark);
+ ip_set_rcu_assign_u32(&skbinfo->skbmarkmask, ext->skbmarkmask);
+ ip_set_rcu_assign_u32(&skbinfo->skbprio, ext->skbprio);
+ ip_set_rcu_assign_u16(&skbinfo->skbqueue, ext->skbqueue);
}
static inline bool
diff --git a/kernel/include/linux/netfilter/ipset/ip_set_timeout.h b/kernel/include/linux/netfilter/ipset/ip_set_timeout.h
index 83c2f9e..9e30031 100644
--- a/kernel/include/linux/netfilter/ipset/ip_set_timeout.h
+++ b/kernel/include/linux/netfilter/ipset/ip_set_timeout.h
@@ -40,38 +40,47 @@ ip_set_timeout_uget(struct nlattr *tb)
}
static inline bool
-ip_set_timeout_test(unsigned long timeout)
+__ip_set_timeout_expired(unsigned long t)
{
- return timeout == IPSET_ELEM_PERMANENT ||
- time_is_after_jiffies(timeout);
+ return t != IPSET_ELEM_PERMANENT && time_is_before_jiffies(t);
+}
+
+static inline bool
+ip_set_timeout_expired_rcu(unsigned long *timeout)
+{
+ unsigned long t = ip_set_rcu_deref(*timeout);
+
+ return __ip_set_timeout_expired(t);
}
static inline bool
ip_set_timeout_expired(unsigned long *timeout)
{
- return *timeout != IPSET_ELEM_PERMANENT &&
- time_is_before_jiffies(*timeout);
+ return __ip_set_timeout_expired(*timeout);
}
static inline void
-ip_set_timeout_set(unsigned long *timeout, u32 t)
+ip_set_timeout_set(unsigned long *timeout, u32 value)
{
- if (!t) {
- *timeout = IPSET_ELEM_PERMANENT;
- return;
- }
+ unsigned long t;
+
+ if (!value)
+ return ip_set_rcu_assign_ulong(timeout, IPSET_ELEM_PERMANENT);
- *timeout = msecs_to_jiffies(t * 1000) + jiffies;
- if (*timeout == IPSET_ELEM_PERMANENT)
+ t = msecs_to_jiffies(value * 1000) + jiffies;
+ if (t == IPSET_ELEM_PERMANENT)
/* Bingo! :-) */
- (*timeout)--;
+ t--;
+ ip_set_rcu_assign_ulong(timeout, t);
}
static inline u32
ip_set_timeout_get(unsigned long *timeout)
{
- return *timeout == IPSET_ELEM_PERMANENT ? 0 :
- jiffies_to_msecs(*timeout - jiffies)/1000;
+ unsigned long t = ip_set_rcu_deref(*timeout);
+
+ return t == IPSET_ELEM_PERMANENT ? 0 :
+ jiffies_to_msecs(t - jiffies)/1000;
}
#endif /* __KERNEL__ */