summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/ChangeLog133
-rw-r--r--kernel/include/linux/jhash.h27
-rw-r--r--kernel/include/linux/netfilter/ipset/ip_set.h35
-rw-r--r--kernel/include/linux/netfilter/ipset/ip_set_compat.h.in197
-rw-r--r--kernel/include/linux/netfilter/ipset/ip_set_compiler.h.in15
-rw-r--r--kernel/include/uapi/linux/netfilter/ipset/ip_set.h12
-rw-r--r--kernel/net/netfilter/ipset/Kconfig2
-rw-r--r--kernel/net/netfilter/ipset/ip_set_bitmap_gen.h14
-rw-r--r--kernel/net/netfilter/ipset/ip_set_bitmap_ip.c8
-rw-r--r--kernel/net/netfilter/ipset/ip_set_bitmap_ipmac.c4
-rw-r--r--kernel/net/netfilter/ipset/ip_set_bitmap_port.c4
-rw-r--r--kernel/net/netfilter/ipset/ip_set_core.c400
-rw-r--r--kernel/net/netfilter/ipset/ip_set_hash_gen.h832
-rw-r--r--kernel/net/netfilter/ipset/ip_set_hash_ip.c45
-rw-r--r--kernel/net/netfilter/ipset/ip_set_hash_ipmac.c6
-rw-r--r--kernel/net/netfilter/ipset/ip_set_hash_ipmark.c24
-rw-r--r--kernel/net/netfilter/ipset/ip_set_hash_ipport.c39
-rw-r--r--kernel/net/netfilter/ipset/ip_set_hash_ipportip.c17
-rw-r--r--kernel/net/netfilter/ipset/ip_set_hash_ipportnet.c17
-rw-r--r--kernel/net/netfilter/ipset/ip_set_hash_mac.c6
-rw-r--r--kernel/net/netfilter/ipset/ip_set_hash_net.c17
-rw-r--r--kernel/net/netfilter/ipset/ip_set_hash_netiface.c28
-rw-r--r--kernel/net/netfilter/ipset/ip_set_hash_netnet.c40
-rw-r--r--kernel/net/netfilter/ipset/ip_set_hash_netport.c17
-rw-r--r--kernel/net/netfilter/ipset/ip_set_hash_netportnet.c34
-rw-r--r--kernel/net/netfilter/ipset/ip_set_list_set.c15
-rw-r--r--kernel/net/netfilter/ipset/pfxlen.c2
-rw-r--r--kernel/net/sched/em_ipset.c7
28 files changed, 1413 insertions, 584 deletions
diff --git a/kernel/ChangeLog b/kernel/ChangeLog
index ec1eca5..1b05fbd 100644
--- a/kernel/ChangeLog
+++ b/kernel/ChangeLog
@@ -1,3 +1,136 @@
+7.21
+ - netfilter: ipset: Suppress false sparse warnings
+ - tests: Verify module unload when sets with timeout were just destroyed
+ - netfilter: ipset: remove set destroy at ip_set module removal
+ - netfilter: ipset: Cleanup the code of destroy operation and explain
+ the two stages in comments
+ - netfilter: ipset: Missing gc cancellations fixed
+
+7.20
+ - treewide: Convert del_timer*() to timer_shutdown*() (Steven Rostedt)
+ - Use timer_shutdown_sync() when available, instead of del_timer_sync()
+ - netfilter: ipset: fix race condition between swap/destroy and kernel
+ side add/del/test v4
+ - netfilter: ipset: fix race condition between swap/destroy and kernel
+ side add/del/test v3
+ - netfilter: ipset: fix race condition between swap/destroy and kernel
+ side add/del/test v2
+ - netfilter: ipset: fix race condition between swap/destroy and kernel
+ side add/del/test
+
+7.18
+ - netfilter: ipset: Fix race between IPSET_CMD_CREATE and IPSET_CMD_SWAP
+ (reported by Kyle Zeng)
+ - netfilter: ipset: add the missing IP_SET_HASH_WITH_NET0 macro for
+ ip_set_hash_netportnet.c (Kyle Zeng)
+ - compatibility: handle strscpy_pad()
+ - netfilter: ipset: refactor deprecated strncpy (Justin Stitt)
+ - netfilter: ipset: remove rcu_read_lock_bh pair from ip_set_test
+ (Florian Westphal)
+ - netfilter: ipset: Replace strlcpy with strscpy (Azeem Shaikh)
+ - netfilter: ipset: Add schedule point in call_ad(). (Kuniyuki Iwashima)
+ - net: Kconfig: fix spellos (Randy Dunlap)
+ - netfilter: ipset: Fix overflow before widen in the bitmap_ip_create()
+ function. (Gavrilov Ilia)
+
+7.17
+ - netfilter: ipset: Rework long task execution when adding/deleting entries
+ - netfilter: ipset: fix hash:net,port,net hang with /0 subnet
+
+7.16
+ - netfilter: ipset: restore allowing 64 clashing elements in hash:net,iface
+ - Fix all debug mode warnings
+ - netfilter: ipset: Add support for new bitmask parameter (Vishwanath Pai)
+ - netfilter: ipset: regression in ip_set_hash_ip.c (Vishwanath Pai)
+ - netfilter: move from strlcpy with unused retval to strscpy
+ (Wolfram Sang)
+ - compatibility: handle unsafe_memcpy()
+ - netlink: Bounds-check struct nlmsgerr creation (Kees Cook)
+ - compatibility: move to skb_protocol in the code from tc_skb_protocol
+ - Compatibility: check kvcalloc, kvfree, kvzalloc in slab.h too
+ - sched: consistently handle layer3 header accesses in the presence
+ of VLANs (Toke Høiland-Jørgensen)
+ - treewide: Replace GPLv2 boilerplate/reference with SPDX
+ - rule 500 (Thomas Gleixner)
+ - headers: Remove some left-over license text in
+ include/uapi/linux/netfilter/ (Christophe JAILLET)
+ - netfilter: ipset: enforce documented limit to prevent allocating
+ huge memory
+ - netfilter: ipset: Fix oversized kvmalloc() calls
+
+7.15
+ - netfilter: ipset: Fix maximal range check in hash_ipportnet4_uadt()
+ (Nathan Chancellor)
+
+7.14
+ - 64bit division isn't allowed on 32bit, replace it with shift
+
+7.13
+ - Limit the maximal range of consecutive elements to add/delete fix
+
+7.12
+ - Limit the maximal range of consecutive elements to add/delete
+ - Backport "netfilter: use nfnetlink_unicast()"
+ - Backport "netfilter: nfnetlink: consolidate callback type"
+ - Backport "netfilter: nfnetlink: add struct nfnl_info and
+ pass it to callbacks"
+ - Backport "netfilter: add helper function to set up the
+ nfnetlink header and use it"
+
+7.10
+ - Fix patch "Handle false warning from -Wstringop-overflow"
+ - Backward compatibility: handle renaming nla_strlcpy to nla_strscpy
+ - treewide: rename nla_strlcpy to nla_strscpy. (Francis Laniel)
+ - netfilter: ipset: fix shift-out-of-bounds in htable_bits()
+ (Vasily Averin)
+ - netfilter: ipset: fixes possible oops in mtype_resize (Vasily Averin)
+ - Handle false warning from -Wstringop-overflow
+ - Backward compatibility: handle missing strscpy with a wrapper of strlcpy.
+ - Move compiler specific compatibility support to separated file (broken
+ compatibility support reported by Ed W)
+
+7.8
+ - Complete backward compatibility fix for package copy of <linux/jhash.h>
+ - Compatibility: check for kvzalloc() and GFP_KERNEL_ACCOUNT
+ - netfilter: ipset: enable memory accounting for ipset allocations
+ (Vasily Averin)
+ - netfilter: ipset: prevent uninit-value in hash_ip6_add (Eric Dumazet)
+ - Compatibility: use skb_policy() from if_vlan.h if available
+ - Compatibility: Check for the fourth arg of list_for_each_entry_rcu()
+ - Backward compatibility fix for the package copy of <linux/jhash.h>
+
+7.7
+ - Expose the initval hash parameter to userspace
+ - Add bucketsize parameter to all hash types
+ - Use fallthrough pseudo-keyword in the package copy of <linux/jhash.h> too
+ - Support the -exist flag with the destroy command
+ - netfilter: Use fallthrough pseudo-keyword (Gustavo A. R. Silva)
+ - netfilter: Replace zero-length array with flexible-array member
+ (Gustavo A. R. Silva)
+ - netfilter: ipset: call ip_set_free() instead of kfree() (Eric Dumazet)
+ - netfiler: ipset: fix unaligned atomic access (Russell King)
+ - netfilter: ipset: Fix subcounter update skip (Phil Sutter)
+ - ipset: Update byte and packet counters regardless of whether they match
+ (Stefano Brivio)
+ - netfilter: ipset: Pass lockdep expression to RCU lists (Amol Grover)
+ - ip_set: Fix compatibility with kernels between v3.3 and v4.5
+ (Serhey Popovych)
+ - ip_set: Fix build on kernels without INIT_DEFERRABLE_WORK
+ (Serhey Popovych)
+ - ipset: Support kernels with at least system_wq support
+ - ip_set: Fix build on kernels without system_power_efficient_wq
+ (Serhey Popovych)
+
+7.6
+ - netfilter: ipset: Fix forceadd evaluation path
+ - netfilter: ipset: Correct the reported memory size
+ - ip_set: Include kernel header instead of UAPI (Serhey Popovych)
+ - netfilter: ipset: Fix "INFO: rcu detected stall in hash_xxx" reports
+ - netfilter: ipset: fix suspicious RCU usage in find_set_and_id
+ - Add compatibility support for bitmap_zalloc() and bitmap_zero()
+ - netfilter: ipset: use bitmap infrastructure completely
+ - netfilter: fix a use-after-free in mtype_destroy() (Cong Wang)
+
7.5
- netfilter: ipset: avoid null deref when IPSET_ATTR_LINENO is present
(Florian Westphal)
diff --git a/kernel/include/linux/jhash.h b/kernel/include/linux/jhash.h
index c700a77..d144e33 100644
--- a/kernel/include/linux/jhash.h
+++ b/kernel/include/linux/jhash.h
@@ -1,5 +1,6 @@
#ifndef _LINUX_JHASH_H
#define _LINUX_JHASH_H
+#include <linux/netfilter/ipset/ip_set_compiler.h>
/* jhash.h: Jenkins hash support.
*
@@ -87,17 +88,17 @@ static inline u32 jhash(const void *key, u32 length, u32 initval)
/* Last block: affect all 32 bits of (c) */
/* All the case statements fall through */
switch (length) {
- case 12: c += (u32)k[11]<<24;
- case 11: c += (u32)k[10]<<16;
- case 10: c += (u32)k[9]<<8;
- case 9: c += k[8];
- case 8: b += (u32)k[7]<<24;
- case 7: b += (u32)k[6]<<16;
- case 6: b += (u32)k[5]<<8;
- case 5: b += k[4];
- case 4: a += (u32)k[3]<<24;
- case 3: a += (u32)k[2]<<16;
- case 2: a += (u32)k[1]<<8;
+ case 12: c += (u32)k[11]<<24; fallthrough;
+ case 11: c += (u32)k[10]<<16; fallthrough;
+ case 10: c += (u32)k[9]<<8; fallthrough;
+ case 9: c += k[8]; fallthrough;
+ case 8: b += (u32)k[7]<<24; fallthrough;
+ case 7: b += (u32)k[6]<<16; fallthrough;
+ case 6: b += (u32)k[5]<<8; fallthrough;
+ case 5: b += k[4]; fallthrough;
+ case 4: a += (u32)k[3]<<24; fallthrough;
+ case 3: a += (u32)k[2]<<16; fallthrough;
+ case 2: a += (u32)k[1]<<8; fallthrough;
case 1: a += k[0];
__jhash_final(a, b, c);
case 0: /* Nothing left to add */
@@ -133,8 +134,8 @@ static inline u32 jhash2(const u32 *k, u32 length, u32 initval)
/* Handle the last 3 u32's: all the case statements fall through */
switch (length) {
- case 3: c += k[2];
- case 2: b += k[1];
+ case 3: c += k[2]; fallthrough;
+ case 2: b += k[1]; fallthrough;
case 1: a += k[0];
__jhash_final(a, b, c);
case 0: /* Nothing left to add */
diff --git a/kernel/include/linux/netfilter/ipset/ip_set.h b/kernel/include/linux/netfilter/ipset/ip_set.h
index 82c3cf8..7691b7a 100644
--- a/kernel/include/linux/netfilter/ipset/ip_set.h
+++ b/kernel/include/linux/netfilter/ipset/ip_set.h
@@ -99,7 +99,7 @@ struct ip_set_counter {
struct ip_set_comment_rcu {
struct rcu_head rcu;
- char str[0];
+ char str[];
};
struct ip_set_comment {
@@ -122,6 +122,7 @@ struct ip_set_ext {
u32 timeout;
u8 packets_op;
u8 bytes_op;
+ bool target;
};
struct ip_set;
@@ -188,8 +189,24 @@ struct ip_set_type_variant {
/* Return true if "b" set is the same as "a"
* according to the create set parameters */
bool (*same_set)(const struct ip_set *a, const struct ip_set *b);
+ /* Cancel ongoing garbage collectors before destroying the set*/
+ void (*cancel_gc)(struct ip_set *set);
+ /* Region-locking is used */
+ bool region_lock;
};
+struct ip_set_region {
+ spinlock_t lock; /* Region lock */
+ size_t ext_size; /* Size of the dynamic extensions */
+ u32 elements; /* Number of elements vs timeout */
+};
+
+/* Max range where every element is added/deleted in one step */
+#define IPSET_MAX_RANGE (1<<14)
+
+/* The max revision number supported by any set type + 1 */
+#define IPSET_REVISION_MAX 9
+
/* The core set type structure */
struct ip_set_type {
struct list_head list;
@@ -207,6 +224,8 @@ struct ip_set_type {
u8 family;
/* Type revisions */
u8 revision_min, revision_max;
+ /* Revision-specific supported (create) flags */
+ u8 create_flags[IPSET_REVISION_MAX+1];
/* Set features to control swapping */
u16 features;
@@ -228,6 +247,8 @@ extern void ip_set_type_unregister(struct ip_set_type *set_type);
/* A generic IP set */
struct ip_set {
+ /* For call_cru in destroy */
+ struct rcu_head rcu;
/* The name of the set */
char name[IPSET_MAXNAMELEN];
/* Lock protecting the set data */
@@ -511,8 +532,18 @@ ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo,
*skbinfo = ext->skbinfo;
}
+static inline void
+nf_inet_addr_mask_inplace(union nf_inet_addr *a1,
+ const union nf_inet_addr *mask)
+{
+ a1->all[0] &= mask->all[0];
+ a1->all[1] &= mask->all[1];
+ a1->all[2] &= mask->all[2];
+ a1->all[3] &= mask->all[3];
+}
+
#define IP_SET_INIT_KEXT(skb, opt, set) \
- { .bytes = (skb)->len, .packets = 1, \
+ { .bytes = (skb)->len, .packets = 1, .target = true,\
.timeout = ip_set_adt_opt_timeout(opt, set) }
#define IP_SET_INIT_UEXT(set) \
diff --git a/kernel/include/linux/netfilter/ipset/ip_set_compat.h.in b/kernel/include/linux/netfilter/ipset/ip_set_compat.h.in
index 57c9996..5746f39 100644
--- a/kernel/include/linux/netfilter/ipset/ip_set_compat.h.in
+++ b/kernel/include/linux/netfilter/ipset/ip_set_compat.h.in
@@ -5,6 +5,8 @@
* xt_set.c, ip_set_core.c, ip_set_getport.c, pfxlen.c too.
*/
+#@HAVE_INIT_DEFERRABLE_WORK@ HAVE_INIT_DEFERRABLE_WORK
+#@HAVE_SYSTEM_POWER_EFFICIENT_WQ@ HAVE_SYSTEM_POWER_EFFICIENT_WQ
#@HAVE_STRUCT_XT_ACTION_PARAM@ HAVE_STRUCT_XT_ACTION_PARAM
#@HAVE_VZALLOC@ HAVE_VZALLOC
#@HAVE_ETHER_ADDR_EQUAL@ HAVE_ETHER_ADDR_EQUAL
@@ -49,11 +51,24 @@
#@HAVE_PASSING_EXTENDED_ACK_TO_CALLBACKS@ HAVE_PASSING_EXTENDED_ACK_TO_CALLBACKS
#@HAVE_TYPEDEF_SCTP_SCTPHDR_T@ HAVE_TYPEDEF_SCTP_SCTPHDR_T
#@HAVE_TIMER_SETUP@ HAVE_TIMER_SETUP
+#@HAVE_TIMER_SHUTDOWN_SYNC@ HAVE_TIMER_SHUTDOWN_SYNC
#@HAVE_STRSCPY@ HAVE_STRSCPY
+#@HAVE_STRSCPY_PAD@ HAVE_STRSCPY_PAD
#@HAVE_SYNCHRONIZE_RCU_BH@ HAVE_SYNCHRONIZE_RCU_BH
#@HAVE_LOCKDEP_NFNL_IS_HELD@ HAVE_LOCKDEP_NFNL_IS_HELD
#@HAVE_COND_RESCHED_RCU@ HAVE_COND_RESCHED_RCU
#@HAVE_SKB_IIF@ HAVE_SKB_IIF
+#@HAVE_LIST_FOR_EACH_ENTRY_RCU_FOUR_ARGS@ HAVE_LIST_FOR_EACH_ENTRY_RCU_FOUR_ARGS
+#@HAVE_SKB_PROTOCOL@ HAVE_SKB_PROTOCOL
+#@HAVE_NLA_POLICY_EXACT_LEN@ HAVE_NLA_POLICY_EXACT_LEN
+#@HAVE_KVZALLOC@ HAVE_KVZALLOC
+#@HAVE_GFP_KERNEL_ACCOUNT@ HAVE_GFP_KERNEL_ACCOUNT
+#@HAVE_NLA_STRSCPY@ HAVE_NLA_STRSCPY
+#@HAVE_NFNL_MSG_PUT@ HAVE_NFNL_MSG_PUT
+#@HAVE_NFNL_INFO_IN_NFNL_CALLBACK@ HAVE_NFNL_INFO_IN_NFNL_CALLBACK
+#@HAVE_NFNL_CALLBACK_TYPE@ HAVE_NFNL_CALLBACK_TYPE
+#@HAVE_EAGAIN_IN_NFNETLINK_UNICAST@ HAVE_EAGAIN_IN_NFNETLINK_UNICAST
+#@HAVE_NLMSG_UNICAST@ HAVE_NLMSG_UNICAST
#ifdef HAVE_EXPORT_SYMBOL_GPL_IN_MODULE_H
#include <linux/module.h>
@@ -168,6 +183,14 @@ static inline void cond_resched_rcu(void)
#error "NETFILTER_NETLINK must be enabled: select NFACCT/NFQUEUE/LOG over NFNETLINK"
#endif
+#ifndef HAVE_INIT_DEFERRABLE_WORK
+#define INIT_DEFERRABLE_WORK INIT_DELAYED_WORK_DEFERRABLE
+#endif
+
+#ifndef HAVE_SYSTEM_POWER_EFFICIENT_EQ
+#define system_power_efficient_wq system_wq
+#endif
+
#ifndef HAVE_STRUCT_XT_ACTION_PARAM
#define xt_action_param xt_match_param
#endif
@@ -332,18 +355,44 @@ static inline int nla_put_in6_addr(struct sk_buff *skb, int attrtype,
}
#endif
-#ifdef HAVE_PASSING_EXTENDED_ACK_TO_CALLBACKS
-#define IPSET_CBFN(fn, net, nl, skb, nlh, cda, e) fn(net, nl, skb, nlh, cda, e)
-#define IPSET_CBFN_AD(fn, net, nl, skb, ad, nlh, cda, e) fn(net, nl, skb, ad, nlh, cda, e)
-#define IPSET_SOCK_NET(net, ctnl) net
+#ifdef HAVE_NFNL_INFO_IN_NFNL_CALLBACK
+#define IPSET_CBFN(fn, net, nl, skb, nlh, cda, e, i) fn(skb, i, cda)
+#define IPSET_CBFN_AD(fn, net, nl, skb, ad, nlh, cda, e, i) fn(net, nl, skb, ad, nlh, cda, i)
+#define IPSET_SOCK_NET(n, ctnl, i) (i)->net
+#define INFO_NLH(i, n) (i)->nlh
+#define INFO_NET(i, n) (i)->net
+#define INFO_SK(i, n) (i)->sk
+#define CALL_AD(net, ctnl, skb, set, tb, adt, flags, l) call_ad(net, ctnl, skb, set, tb, adt, flags, l)
+#elif defined(HAVE_PASSING_EXTENDED_ACK_TO_CALLBACKS)
+#define IPSET_CBFN(fn, net, nl, skb, nlh, cda, e, i) fn(net, nl, skb, nlh, cda, e)
+#define IPSET_CBFN_AD(fn, net, nl, skb, ad, nlh, cda, e, i) fn(net, nl, skb, ad, nlh, cda, e)
+#define IPSET_SOCK_NET(net, ctnl, i) net
+#define INFO_NLH(i, n) n
+#define INFO_NET(i, n) n
+#define INFO_SK(i, n) n
+#define CALL_AD(net, ctnl, skb, set, tb, adt, flags, l) call_ad(net, ctnl, skb, set, tb, adt, flags, l)
#elif defined(HAVE_NET_IN_NFNL_CALLBACK_FN)
-#define IPSET_CBFN(fn, net, nl, skb, nlh, cda, e) fn(net, nl, skb, nlh, cda)
-#define IPSET_CBFN_AD(fn, net, nl, skb, ad, nlh, cda, e) fn(net, nl, skb, ad, nlh, cda)
-#define IPSET_SOCK_NET(net, ctnl) net
+#define IPSET_CBFN(fn, net, nl, skb, nlh, cda, e, i) fn(net, nl, skb, nlh, cda)
+#define IPSET_CBFN_AD(fn, net, nl, skb, ad, nlh, cda, e, i) fn(net, nl, skb, ad, nlh, cda)
+#define IPSET_SOCK_NET(net, ctnl, i) net
+#define INFO_NLH(i, n) n
+#define INFO_NET(i, n) n
+#define INFO_SK(i, n) n
+#define CALL_AD(net, ctnl, skb, set, tb, adt, flags, l) call_ad(net, ctnl, skb, set, tb, adt, flags, l)
#else
-#define IPSET_CBFN(fn, net, nl, skb, nlh, cda, e) fn(nl, skb, nlh, cda)
-#define IPSET_CBFN_AD(fn, net, nl, skb, ad, nlh, cda, e) fn(nl, skb, ad, nlh, cda)
-#define IPSET_SOCK_NET(net, ctnl) sock_net(ctnl)
+#define IPSET_CBFN(fn, net, nl, skb, nlh, cda, e, i) fn(nl, skb, nlh, cda)
+#define IPSET_CBFN_AD(fn, net, nl, skb, ad, nlh, cda, e, i) fn(nl, skb, ad, nlh, cda)
+#define IPSET_SOCK_NET(net, ctnl, i) sock_net(ctnl)
+#define INFO_NLH(i, n) n
+#define INFO_NET(i, n) n
+#define INFO_SK(i, n) n
+#define CALL_AD(net, ctnl, skb, set, tb, adt, flags, l) call_ad(ctnl, skb, set, tb, adt, flags, l)
+#endif
+
+#ifdef HAVE_NFNL_CALLBACK_TYPE
+#define SET_NFNL_CALLBACK_TYPE(t) .type = t,
+#else
+#define SET_NFNL_CALLBACK_TYPE(t)
#endif
#ifndef HAVE_TC_SKB_PROTOCOL
@@ -359,7 +408,8 @@ static inline int nla_put_in6_addr(struct sk_buff *skb, int attrtype,
#define skb_vlan_tag_present vlan_tx_tag_present
#endif
-static inline __be16 tc_skb_protocol(const struct sk_buff *skb)
+#ifndef HAVE_SKB_PROTOCOL
+static inline __be16 skb_protocol(const struct sk_buff *skb, bool skip_vlan)
{
if (skb_vlan_tag_present(skb))
#ifdef HAVE_VLAN_PROTO_IN_SK_BUFF
@@ -370,6 +420,7 @@ static inline __be16 tc_skb_protocol(const struct sk_buff *skb)
return skb->protocol;
}
#endif
+#endif
#ifdef HAVE_XT_NET
#define IPSET_DEV_NET(par) xt_net(par)
@@ -386,6 +437,36 @@ static inline u16 nfnl_msg_type(u8 subsys, u8 msg_type)
}
#endif
+#ifndef HAVE_NFNL_MSG_PUT
+#include <linux/netfilter/nfnetlink.h>
+static inline void nfnl_fill_hdr(struct nlmsghdr *nlh, u8 family, u8 version,
+ __be16 res_id)
+{
+ struct nfgenmsg *nfmsg;
+
+ nfmsg = nlmsg_data(nlh);
+ nfmsg->nfgen_family = family;
+ nfmsg->version = version;
+ nfmsg->res_id = res_id;
+}
+
+static inline struct nlmsghdr *nfnl_msg_put(struct sk_buff *skb, u32 portid,
+ u32 seq, int type, int flags,
+ u8 family, u8 version,
+ __be16 res_id)
+{
+ struct nlmsghdr *nlh;
+
+ nlh = nlmsg_put(skb, portid, seq, type, sizeof(struct nfgenmsg), flags);
+ if (!nlh)
+ return NULL;
+
+ nfnl_fill_hdr(nlh, family, version, res_id);
+
+ return nlh;
+}
+#endif
+
#ifdef HAVE_NETLINK_EXTENDED_ACK
#define NETLINK_ACK(in_skb, nlh, err, extack) netlink_ack(in_skb, nlh, err, extack)
#else
@@ -426,8 +507,53 @@ static inline u16 nfnl_msg_type(u8 subsys, u8 msg_type)
struct type *var = set->data
#endif
+#ifndef HAVE_TIMER_SHUTDOWN_SYNC
+#define timer_shutdown_sync(timer) del_timer_sync(timer)
+#endif
+
#ifndef HAVE_STRSCPY
-#define strscpy(dst, src, n) (strncpy(dst, src, n) == (dst))
+static inline ssize_t strscpy(char * dest, const char * src, size_t count)
+{
+ size_t ret = strlcpy(dest, src, count);
+
+ return (ret >= count ? -E2BIG : ret);
+}
+#endif
+
+#ifndef HAVE_STRSCPY_PAD
+static inline ssize_t strscpy_pad(char *dest, const char *src, size_t count)
+{
+ ssize_t written;
+
+ written = strscpy(dest, src, count);
+ if (written < 0 || written == count - 1)
+ return written;
+
+ memset(dest + written + 1, 0, count - written - 1);
+
+ return written;
+}
+#endif
+
+#ifndef HAVE_NLA_STRSCPY
+#define nla_strscpy nla_strlcpy
+#endif
+
+#if !defined(HAVE_EAGAIN_IN_NFNETLINK_UNICAST) || !defined(HAVE_NLMSG_UNICAST)
+#define NFNETLINK_UNICAST(cntl, skb, net, portid) ipset_nfnetlink_unicast(cntl, skb, portid)
+static inline int ipset_nfnetlink_unicast(struct sock *ctnl, struct sk_buff *skb, u32 portid)
+{
+ int err = netlink_unicast(ctnl, skb, portid, MSG_DONTWAIT);
+
+ if (err > 0)
+ err = 0;
+ if (err == -EAGAIN)
+ err = -ENOBUFS;
+
+ return err;
+}
+#else
+#define NFNETLINK_UNICAST(cntl, skb, net, portid) nfnetlink_unicast(skb, net, portid)
#endif
#ifndef smp_mb__before_atomic
@@ -459,5 +585,52 @@ static inline u16 nfnl_msg_type(u8 subsys, u8 msg_type)
#define dev_get_by_index_rcu __dev_get_by_index
#endif
+#ifdef HAVE_LIST_FOR_EACH_ENTRY_RCU_FOUR_ARGS
+#define list_for_each_entry_rcu_compat(pos, head, member, cond) \
+ list_for_each_entry_rcu(pos, head, member, cond)
+#else
+#define list_for_each_entry_rcu_compat(pos, head, member, cond) \
+ list_for_each_entry_rcu(pos, head, member)
+#endif
+
+#ifndef HAVE_NLA_POLICY_EXACT_LEN
+#define NLA_POLICY_EXACT_LEN(_len) { \
+ .type = NLA_UNSPEC, \
+ .len = _len \
+}
+#endif
+
+#ifndef HAVE_KVZALLOC
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#ifndef GFP_KERNEL_ACCOUNT
+#define GFP_KERNEL_ACCOUNT GFP_KERNEL
+#endif
+static inline void *kvzalloc(size_t size, gfp_t flags)
+{
+ void *members = NULL;
+
+ if (size < KMALLOC_MAX_SIZE)
+ members = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
+
+ if (members) {
+ pr_debug("%p: allocated with kmalloc\n", members);
+ return members;
+ }
+
+ members = vzalloc(size);
+ if (!members)
+ return NULL;
+ pr_debug("%p: allocated with vmalloc\n", members);
+
+ return members;
+}
+#endif
+
+#ifndef unsafe_memcpy
+#define unsafe_memcpy(dst, src, bytes, justification) \
+ memcpy(dst, src, bytes)
+#endif
+
#endif /* IP_SET_COMPAT_HEADERS */
#endif /* __IP_SET_COMPAT_H */
diff --git a/kernel/include/linux/netfilter/ipset/ip_set_compiler.h.in b/kernel/include/linux/netfilter/ipset/ip_set_compiler.h.in
new file mode 100644
index 0000000..1b392f8
--- /dev/null
+++ b/kernel/include/linux/netfilter/ipset/ip_set_compiler.h.in
@@ -0,0 +1,15 @@
+#ifndef __IP_SET_COMPILER_H
+#define __IP_SET_COMPILER_H
+
+/* Compiler attributes */
+#ifndef __has_attribute
+# define __has_attribute(x) __GCC4_has_attribute_##x
+# define __GCC4_has_attribute___fallthrough__ 0
+#endif
+
+#if __has_attribute(__fallthrough__)
+# define fallthrough __attribute__((__fallthrough__))
+#else
+# define fallthrough do {} while (0) /* fallthrough */
+#endif
+#endif /* __IP_SET_COMPILER_H */
diff --git a/kernel/include/uapi/linux/netfilter/ipset/ip_set.h b/kernel/include/uapi/linux/netfilter/ipset/ip_set.h
index 7545af4..b81f1ae 100644
--- a/kernel/include/uapi/linux/netfilter/ipset/ip_set.h
+++ b/kernel/include/uapi/linux/netfilter/ipset/ip_set.h
@@ -3,10 +3,6 @@
* Patrick Schaaf <bof@bof.de>
* Martin Josefsson <gandalf@wlug.westbo.se>
* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#ifndef _UAPI_IP_SET_H
#define _UAPI_IP_SET_H
@@ -89,14 +85,15 @@ enum {
IPSET_ATTR_CADT_LINENO = IPSET_ATTR_LINENO, /* 9 */
IPSET_ATTR_MARK, /* 10 */
IPSET_ATTR_MARKMASK, /* 11 */
+ IPSET_ATTR_BITMASK, /* 12 */
/* Reserve empty slots */
IPSET_ATTR_CADT_MAX = 16,
/* Create-only specific attributes */
- IPSET_ATTR_GC,
+ IPSET_ATTR_INITVAL, /* was unused IPSET_ATTR_GC */
IPSET_ATTR_HASHSIZE,
IPSET_ATTR_MAXELEM,
IPSET_ATTR_NETMASK,
- IPSET_ATTR_PROBES,
+ IPSET_ATTR_BUCKETSIZE, /* was unused IPSET_ATTR_PROBES */
IPSET_ATTR_RESIZE,
IPSET_ATTR_SIZE,
/* Kernel-only */
@@ -157,6 +154,7 @@ enum ipset_errno {
IPSET_ERR_COMMENT,
IPSET_ERR_INVALID_MARKMASK,
IPSET_ERR_SKBINFO,
+ IPSET_ERR_BITMASK_NETMASK_EXCL,
/* Type specific error codes */
IPSET_ERR_TYPE_SPECIFIC = 4352,
@@ -214,6 +212,8 @@ enum ipset_cadt_flags {
enum ipset_create_flags {
IPSET_CREATE_FLAG_BIT_FORCEADD = 0,
IPSET_CREATE_FLAG_FORCEADD = (1 << IPSET_CREATE_FLAG_BIT_FORCEADD),
+ IPSET_CREATE_FLAG_BIT_BUCKETSIZE = 1,
+ IPSET_CREATE_FLAG_BUCKETSIZE = (1 << IPSET_CREATE_FLAG_BIT_BUCKETSIZE),
IPSET_CREATE_FLAG_BIT_MAX = 7,
};
diff --git a/kernel/net/netfilter/ipset/Kconfig b/kernel/net/netfilter/ipset/Kconfig
index 861659f..8772af5 100644
--- a/kernel/net/netfilter/ipset/Kconfig
+++ b/kernel/net/netfilter/ipset/Kconfig
@@ -29,7 +29,7 @@ config IP_SET_BITMAP_IP
depends on IP_SET
help
This option adds the bitmap:ip set type support, by which one
- can store IPv4 addresses (or network addresse) from a range.
+ can store IPv4 addresses (or network addresses) from a range.
To compile it as a module, choose M here. If unsure, say N.
diff --git a/kernel/net/netfilter/ipset/ip_set_bitmap_gen.h b/kernel/net/netfilter/ipset/ip_set_bitmap_gen.h
index 0479750..3245b6b 100644
--- a/kernel/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/kernel/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -29,6 +29,7 @@
#define mtype_del IPSET_TOKEN(MTYPE, _del)
#define mtype_list IPSET_TOKEN(MTYPE, _list)
#define mtype_gc IPSET_TOKEN(MTYPE, _gc)
+#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc)
#define mtype MTYPE
#define get_ext(set, map, id) ((map)->extensions + ((set)->dsize * (id)))
@@ -58,9 +59,6 @@ mtype_destroy(struct ip_set *set)
{
struct mtype *map = set->data;
- if (SET_WITH_TIMEOUT(set))
- del_timer_sync(&map->gc);
-
if (set->dsize && set->extensions & IPSET_EXT_DESTROY)
mtype_ext_cleanup(set);
ip_set_free(map->members);
@@ -290,6 +288,15 @@ mtype_gc(GC_ARG)
add_timer(&map->gc);
}
+static void
+mtype_cancel_gc(struct ip_set *set)
+{
+ struct mtype *map = set->data;
+
+ if (SET_WITH_TIMEOUT(set))
+ del_timer_sync(&map->gc);
+}
+
static const struct ip_set_type_variant mtype = {
.kadt = mtype_kadt,
.uadt = mtype_uadt,
@@ -303,6 +310,7 @@ static const struct ip_set_type_variant mtype = {
.head = mtype_head,
.list = mtype_list,
.same_set = mtype_same_set,
+ .cancel_gc = mtype_cancel_gc,
};
#endif /* __IP_SET_BITMAP_IP_GEN_H */
diff --git a/kernel/net/netfilter/ipset/ip_set_bitmap_ip.c b/kernel/net/netfilter/ipset/ip_set_bitmap_ip.c
index cb79082..f37169c 100644
--- a/kernel/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/kernel/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -48,7 +48,7 @@ struct bitmap_ip {
#ifdef HAVE_TIMER_SETUP
struct ip_set *set; /* attached to this ip_set */
#endif
- unsigned char extensions[0] /* data extensions */
+ unsigned char extensions[] /* data extensions */
__aligned(__alignof__(u64));
};
@@ -312,8 +312,8 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
return -IPSET_ERR_BITMAP_RANGE;
pr_debug("mask_bits %u, netmask %u\n", mask_bits, netmask);
- hosts = 2 << (32 - netmask - 1);
- elements = 2 << (netmask - mask_bits - 1);
+ hosts = 2U << (32 - netmask - 1);
+ elements = 2UL << (netmask - mask_bits - 1);
}
if (elements > IPSET_BITMAP_MAX_RANGE + 1)
return -IPSET_ERR_BITMAP_RANGE_SIZE;
@@ -330,7 +330,7 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
set->variant = &bitmap_ip;
if (!init_map_ip(set, map, first_ip, last_ip,
elements, hosts, netmask)) {
- kfree(map);
+ ip_set_free(map);
return -ENOMEM;
}
if (tb[IPSET_ATTR_TIMEOUT]) {
diff --git a/kernel/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/kernel/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 8d1c5f2..3d25d29 100644
--- a/kernel/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/kernel/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -52,7 +52,7 @@ struct bitmap_ipmac {
#ifdef HAVE_TIMER_SETUP
struct ip_set *set; /* attached to this ip_set */
#endif
- unsigned char extensions[0] /* MAC + data extensions */
+ unsigned char extensions[] /* MAC + data extensions */
__aligned(__alignof__(u64));
};
@@ -368,7 +368,7 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_ipmac;
if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
- kfree(map);
+ ip_set_free(map);
return -ENOMEM;
}
if (tb[IPSET_ATTR_TIMEOUT]) {
diff --git a/kernel/net/netfilter/ipset/ip_set_bitmap_port.c b/kernel/net/netfilter/ipset/ip_set_bitmap_port.c
index 171995c..2a570d8 100644
--- a/kernel/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/kernel/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -40,7 +40,7 @@ struct bitmap_port {
#ifdef HAVE_TIMER_SETUP
struct ip_set *set; /* attached to this ip_set */
#endif
- unsigned char extensions[0] /* data extensions */
+ unsigned char extensions[] /* data extensions */
__aligned(__alignof__(u64));
};
@@ -279,7 +279,7 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_port;
if (!init_map_port(set, map, first_port, last_port)) {
- kfree(map);
+ ip_set_free(map);
return -ENOMEM;
}
if (tb[IPSET_ATTR_TIMEOUT]) {
diff --git a/kernel/net/netfilter/ipset/ip_set_core.c b/kernel/net/netfilter/ipset/ip_set_core.c
index 1f506e6..c31dbc3 100644
--- a/kernel/net/netfilter/ipset/ip_set_core.c
+++ b/kernel/net/netfilter/ipset/ip_set_core.c
@@ -21,6 +21,7 @@
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/ipset/ip_set.h>
+#include <linux/netfilter/ipset/ip_set_compiler.h>
static LIST_HEAD(ip_set_type_list); /* all registered set types */
static DEFINE_MUTEX(ip_set_type_mutex); /* protects ip_set_type_list */
@@ -29,7 +30,6 @@ static DEFINE_RWLOCK(ip_set_ref_lock); /* protects the set refs */
struct ip_set_net {
struct ip_set * __rcu *ip_set_list; /* all individual sets */
ip_set_id_t ip_set_max; /* max number of sets */
- bool is_deleted; /* deleted by ip_set_net_exit */
bool is_destroyed; /* all sets are destroyed */
};
@@ -61,6 +61,8 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
ip_set_dereference((inst)->ip_set_list)[id]
#define ip_set_ref_netlink(inst,id) \
rcu_dereference_raw((inst)->ip_set_list)[id]
+#define ip_set_dereference_nfnl(p) \
+ rcu_dereference_check(p, lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET))
/* The set types are implemented in modules and registered set types
* can be found in ip_set_type_list. Adding/deleting types is
@@ -86,7 +88,8 @@ find_set_type(const char *name, u8 family, u8 revision)
{
struct ip_set_type *type;
- list_for_each_entry_rcu(type, &ip_set_type_list, list)
+ list_for_each_entry_rcu_compat(type, &ip_set_type_list, list,
+ lockdep_is_held(&ip_set_type_mutex))
if (STRNCMP(type->name, name) &&
(type->family == family ||
type->family == NFPROTO_UNSPEC) &&
@@ -249,22 +252,7 @@ EXPORT_SYMBOL_GPL(ip_set_type_unregister);
void *
ip_set_alloc(size_t size)
{
- void *members = NULL;
-
- if (size < KMALLOC_MAX_SIZE)
- members = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
-
- if (members) {
- pr_debug("%p: allocated with kmalloc\n", members);
- return members;
- }
-
- members = vzalloc(size);
- if (!members)
- return NULL;
- pr_debug("%p: allocated with vmalloc\n", members);
-
- return members;
+ return kvzalloc(size, GFP_KERNEL_ACCOUNT);
}
EXPORT_SYMBOL_GPL(ip_set_alloc);
@@ -285,8 +273,7 @@ flag_nested(const struct nlattr *nla)
static const struct nla_policy ipaddr_policy[IPSET_ATTR_IPADDR_MAX + 1] = {
[IPSET_ATTR_IPADDR_IPV4] = { .type = NLA_U32 },
- [IPSET_ATTR_IPADDR_IPV6] = { .type = NLA_BINARY,
- .len = sizeof(struct in6_addr) },
+ [IPSET_ATTR_IPADDR_IPV6] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
};
int
@@ -368,7 +355,7 @@ ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment,
c = kmalloc(sizeof(*c) + len + 1, GFP_ATOMIC);
if (unlikely(!c))
return;
- strlcpy(c->str, ext->comment, len + 1);
+ strscpy(c->str, ext->comment, len + 1);
set->ext_size += sizeof(*c) + strlen(c->str) + 1;
rcu_assign_pointer(comment->c, c);
}
@@ -459,6 +446,8 @@ ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len,
for (id = 0; id < IPSET_EXT_ID_MAX; id++) {
if (!add_extension(id, cadt_flags, tb))
continue;
+ if (align < ip_set_extensions[id].align)
+ align = ip_set_extensions[id].align;
len = ALIGN(len, ip_set_extensions[id].align);
set->offset[id] = len;
set->extensions |= ip_set_extensions[id].type;
@@ -649,13 +638,14 @@ ip_set_match_extensions(struct ip_set *set, const struct ip_set_ext *ext,
if (SET_WITH_COUNTER(set)) {
struct ip_set_counter *counter = ext_counter(data, set);
+ ip_set_update_counter(counter, ext, flags);
+
if (flags & IPSET_FLAG_MATCH_COUNTERS &&
!(ip_set_match_counter(ip_set_get_packets(counter),
mext->packets, mext->packets_op) &&
ip_set_match_counter(ip_set_get_bytes(counter),
mext->bytes, mext->bytes_op)))
return false;
- ip_set_update_counter(counter, ext, flags);
}
if (SET_WITH_SKBINFO(set))
ip_set_get_skbinfo(ext_skbinfo(data, set),
@@ -695,6 +685,14 @@ __ip_set_put(struct ip_set *set)
* a separate reference counter
*/
static void
+__ip_set_get_netlink(struct ip_set *set)
+{
+ write_lock_bh(&ip_set_ref_lock);
+ set->ref_netlink++;
+ write_unlock_bh(&ip_set_ref_lock);
+}
+
+static void
__ip_set_put_netlink(struct ip_set *set)
{
write_lock_bh(&ip_set_ref_lock);
@@ -712,15 +710,24 @@ __ip_set_put_netlink(struct ip_set *set)
static struct ip_set *
ip_set_rcu_get(struct net *net, ip_set_id_t index)
{
- struct ip_set *set;
struct ip_set_net *inst = ip_set_pernet(net);
- rcu_read_lock();
- /* ip_set_list itself needs to be protected */
- set = rcu_dereference(inst->ip_set_list)[index];
- rcu_read_unlock();
+ /* ip_set_list and the set pointer need to be protected */
+ return ip_set_dereference_nfnl(inst->ip_set_list)[index];
+}
+
+static inline void
+ip_set_lock(struct ip_set *set)
+{
+ if (!set->variant->region_lock)
+ spin_lock_bh(&set->lock);
+}
- return set;
+static inline void
+ip_set_unlock(struct ip_set *set)
+{
+ if (!set->variant->region_lock)
+ spin_unlock_bh(&set->lock);
}
int
@@ -737,16 +744,14 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
!(opt->family == set->family || set->family == NFPROTO_UNSPEC))
return 0;
- rcu_read_lock_bh();
ret = set->variant->kadt(set, skb, par, IPSET_TEST, opt);
- rcu_read_unlock_bh();
if (ret == -EAGAIN) {
/* Type requests element to be completed */
pr_debug("element must be completed, ADD is triggered\n");
- spin_lock_bh(&set->lock);
+ ip_set_lock(set);
set->variant->kadt(set, skb, par, IPSET_ADD, opt);
- spin_unlock_bh(&set->lock);
+ ip_set_unlock(set);
ret = 1;
} else {
/* --return-nomatch: invert matched element */
@@ -775,9 +780,9 @@ ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
!(opt->family == set->family || set->family == NFPROTO_UNSPEC))
return -IPSET_ERR_TYPE_MISMATCH;
- spin_lock_bh(&set->lock);
+ ip_set_lock(set);
ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt);
- spin_unlock_bh(&set->lock);
+ ip_set_unlock(set);
return ret;
}
@@ -797,9 +802,9 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
!(opt->family == set->family || set->family == NFPROTO_UNSPEC))
return -IPSET_ERR_TYPE_MISMATCH;
- spin_lock_bh(&set->lock);
+ ip_set_lock(set);
ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt);
- spin_unlock_bh(&set->lock);
+ ip_set_unlock(set);
return ret;
}
@@ -872,7 +877,7 @@ ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name)
BUG_ON(!set);
read_lock_bh(&ip_set_ref_lock);
- strncpy(name, set->name, IPSET_MAXNAMELEN);
+ strscpy_pad(name, set->name, IPSET_MAXNAMELEN);
read_unlock_bh(&ip_set_ref_lock);
}
EXPORT_SYMBOL_GPL(ip_set_name_byindex);
@@ -920,11 +925,9 @@ ip_set_nfnl_put(struct net *net, ip_set_id_t index)
struct ip_set_net *inst = ip_set_pernet(net);
nfnl_lock(NFNL_SUBSYS_IPSET);
- if (!inst->is_deleted) { /* already deleted from ip_set_net_exit() */
- set = ip_set(inst, index);
- if (set)
- __ip_set_put(set);
- }
+ set = ip_set(inst, index);
+ if (set)
+ __ip_set_put(set);
nfnl_unlock(NFNL_SUBSYS_IPSET);
}
EXPORT_SYMBOL_GPL(ip_set_nfnl_put);
@@ -961,20 +964,9 @@ static struct nlmsghdr *
start_msg(struct sk_buff *skb, u32 portid, u32 seq, unsigned int flags,
enum ipset_cmd cmd)
{
- struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
-
- nlh = nlmsg_put(skb, portid, seq, nfnl_msg_type(NFNL_SUBSYS_IPSET, cmd),
- sizeof(*nfmsg), flags);
- if (!nlh)
- return NULL;
-
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = NFPROTO_IPV4;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
-
- return nlh;
+ return nfnl_msg_put(skb, portid, seq,
+ nfnl_msg_type(NFNL_SUBSYS_IPSET, cmd), flags,
+ NFPROTO_IPV4, NFNETLINK_V0, 0);
}
/* Create a set */
@@ -1044,7 +1036,8 @@ static int
IPSET_CBFN(ip_set_none, struct net *net, struct sock *ctnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const attr[],
- struct netlink_ext_ack *extack)
+ struct netlink_ext_ack *extack,
+ const struct nfnl_info *info)
{
return -EOPNOTSUPP;
}
@@ -1053,16 +1046,17 @@ static int
IPSET_CBFN(ip_set_create, struct net *n, struct sock *ctnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const attr[],
- struct netlink_ext_ack *extack)
+ struct netlink_ext_ack *extack,
+ const struct nfnl_info *info)
{
- struct net *net = IPSET_SOCK_NET(n, ctnl);
+ struct net *net = IPSET_SOCK_NET(n, ctnl, info);
struct ip_set_net *inst = ip_set_pernet(net);
struct ip_set *set, *clash = NULL;
ip_set_id_t index = IPSET_INVALID_ID;
struct nlattr *tb[IPSET_ATTR_CREATE_MAX + 1] = {};
const char *name, *typename;
u8 family, revision;
- u32 flags = flag_exist(nlh);
+ u32 flags = flag_exist(INFO_NLH(info, nlh));
int ret = 0;
if (unlikely(protocol_min_failed(attr) ||
@@ -1088,7 +1082,7 @@ IPSET_CBFN(ip_set_create, struct net *n, struct sock *ctnl,
if (!set)
return -ENOMEM;
spin_lock_init(&set->lock);
- strlcpy(set->name, name, IPSET_MAXNAMELEN);
+ strscpy(set->name, name, IPSET_MAXNAMELEN);
set->family = family;
set->revision = revision;
@@ -1110,8 +1104,10 @@ IPSET_CBFN(ip_set_create, struct net *n, struct sock *ctnl,
ret = -IPSET_ERR_PROTOCOL;
goto put_out;
}
+ /* Set create flags depending on the type revision */
+ set->flags |= set->type->create_flags[revision];
- ret = set->type->create(net, set, tb, flags);
+ ret = set->type->create(INFO_NET(info, net), set, tb, flags);
if (ret != 0)
goto put_out;
@@ -1165,6 +1161,7 @@ IPSET_CBFN(ip_set_create, struct net *n, struct sock *ctnl,
return ret;
cleanup:
+ set->variant->cancel_gc(set);
set->variant->destroy(set);
put_out:
module_put(set->type->me);
@@ -1182,24 +1179,58 @@ ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = {
.len = IPSET_MAXNAMELEN - 1 },
};
+/* Destroying a set is split into two stages when a DESTROY command issued:
+ * - Cancel garbage collectors and decrement the module reference counter:
+ * - Cancelling may wait and we are allowed to do it at this stage.
+ * - Module remove is protected by rcu_barrier() which waits for
+ * the second stage to be finished.
+ * - In order to prevent the race between kernel side add/del/test element
+ * operations and destroy, the destroying of the set data areas are
+ * performed via a call_rcu() call.
+ */
+
+/* Call set variant specific destroy function and reclaim the set data. */
static void
-ip_set_destroy_set(struct ip_set *set)
+ip_set_destroy_set_variant(struct ip_set *set)
{
- pr_debug("set: %s\n", set->name);
-
/* Must call it without holding any lock */
set->variant->destroy(set);
- module_put(set->type->me);
kfree(set);
}
+static void
+ip_set_destroy_set_variant_rcu(struct rcu_head *head)
+{
+ struct ip_set *set = container_of(head, struct ip_set, rcu);
+
+ ip_set_destroy_set_variant(set);
+}
+
+/* Cancel the garbage collectors and decrement module references */
+static void
+ip_set_destroy_cancel_gc(struct ip_set *set)
+{
+ set->variant->cancel_gc(set);
+ module_put(set->type->me);
+}
+
+/* Use when we may wait for the complete destroy to be finished.
+ */
+static void
+ip_set_destroy_set(struct ip_set *set)
+{
+ ip_set_destroy_cancel_gc(set);
+ ip_set_destroy_set_variant(set);
+}
+
static int
IPSET_CBFN(ip_set_destroy, struct net *net, struct sock *ctnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const attr[],
- struct netlink_ext_ack *extack)
+ struct netlink_ext_ack *extack,
+ const struct nfnl_info *info)
{
- struct ip_set_net *inst = ip_set_pernet(IPSET_SOCK_NET(net, ctnl));
+ struct ip_set_net *inst = ip_set_pernet(IPSET_SOCK_NET(net, ctnl, info));
struct ip_set *s;
ip_set_id_t i;
int ret = 0;
@@ -1207,9 +1238,6 @@ IPSET_CBFN(ip_set_destroy, struct net *net, struct sock *ctnl,
if (unlikely(protocol_min_failed(attr)))
return -IPSET_ERR_PROTOCOL;
- /* Must wait for flush to be really finished in list:set */
- rcu_barrier();
-
/* Commands are serialized and references are
* protected by the ip_set_ref_lock.
* External systems (i.e. xt_set) must call
@@ -1220,8 +1248,10 @@ IPSET_CBFN(ip_set_destroy, struct net *net, struct sock *ctnl,
* counter, so if it's already zero, we can proceed
* without holding the lock.
*/
- read_lock_bh(&ip_set_ref_lock);
if (!attr[IPSET_ATTR_SETNAME]) {
+ /* Must wait for flush to be really finished in list:set */
+ rcu_barrier();
+ read_lock_bh(&ip_set_ref_lock);
for (i = 0; i < inst->ip_set_max; i++) {
s = ip_set(inst, i);
if (s && (s->ref || s->ref_netlink)) {
@@ -1241,19 +1271,30 @@ IPSET_CBFN(ip_set_destroy, struct net *net, struct sock *ctnl,
/* Modified by ip_set_destroy() only, which is serialized */
inst->is_destroyed = false;
} else {
+ u32 flags = flag_exist(INFO_NLH(info, nlh));
+ u16 features = 0;
+
+ read_lock_bh(&ip_set_ref_lock);
s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
&i);
if (!s) {
- ret = -ENOENT;
+ if (!(flags & IPSET_FLAG_EXIST))
+ ret = -ENOENT;
goto out;
} else if (s->ref || s->ref_netlink) {
ret = -IPSET_ERR_BUSY;
goto out;
}
+ features = s->type->features;
ip_set(inst, i) = NULL;
read_unlock_bh(&ip_set_ref_lock);
-
- ip_set_destroy_set(s);
+ if (features & IPSET_TYPE_NAME) {
+ /* Must wait for flush to be really finished */
+ rcu_barrier();
+ }
+ /* Must cancel garbage collectors */
+ ip_set_destroy_cancel_gc(s);
+ call_rcu(&s->rcu, ip_set_destroy_set_variant_rcu);
}
return 0;
out:
@@ -1268,18 +1309,19 @@ ip_set_flush_set(struct ip_set *set)
{
pr_debug("set: %s\n", set->name);
- spin_lock_bh(&set->lock);
+ ip_set_lock(set);
set->variant->flush(set);
- spin_unlock_bh(&set->lock);
+ ip_set_unlock(set);
}
static int
IPSET_CBFN(ip_set_flush, struct net *net, struct sock *ctnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const attr[],
- struct netlink_ext_ack *extack)
+ struct netlink_ext_ack *extack,
+ const struct nfnl_info *info)
{
- struct ip_set_net *inst = ip_set_pernet(IPSET_SOCK_NET(net, ctnl));
+ struct ip_set_net *inst = ip_set_pernet(IPSET_SOCK_NET(net, ctnl, info));
struct ip_set *s;
ip_set_id_t i;
@@ -1318,9 +1360,10 @@ static int
IPSET_CBFN(ip_set_rename, struct net *net, struct sock *ctnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const attr[],
- struct netlink_ext_ack *extack)
+ struct netlink_ext_ack *extack,
+ const struct nfnl_info *info)
{
- struct ip_set_net *inst = ip_set_pernet(IPSET_SOCK_NET(net, ctnl));
+ struct ip_set_net *inst = ip_set_pernet(IPSET_SOCK_NET(net, ctnl, info));
struct ip_set *set, *s;
const char *name2;
ip_set_id_t i;
@@ -1349,7 +1392,7 @@ IPSET_CBFN(ip_set_rename, struct net *net, struct sock *ctnl,
goto out;
}
}
- strncpy(set->name, name2, IPSET_MAXNAMELEN);
+ strscpy_pad(set->name, name2, IPSET_MAXNAMELEN);
out:
write_unlock_bh(&ip_set_ref_lock);
@@ -1369,9 +1412,10 @@ static int
IPSET_CBFN(ip_set_swap, struct net *net, struct sock *ctnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const attr[],
- struct netlink_ext_ack *extack)
+ struct netlink_ext_ack *extack,
+ const struct nfnl_info *info)
{
- struct ip_set_net *inst = ip_set_pernet(IPSET_SOCK_NET(net, ctnl));
+ struct ip_set_net *inst = ip_set_pernet(IPSET_SOCK_NET(net, ctnl, info));
struct ip_set *from, *to;
ip_set_id_t from_id, to_id;
char from_name[IPSET_MAXNAMELEN];
@@ -1406,9 +1450,9 @@ IPSET_CBFN(ip_set_swap, struct net *net, struct sock *ctnl,
return -EBUSY;
}
- strncpy(from_name, from->name, IPSET_MAXNAMELEN);
- strncpy(from->name, to->name, IPSET_MAXNAMELEN);
- strncpy(to->name, from_name, IPSET_MAXNAMELEN);
+ strscpy_pad(from_name, from->name, IPSET_MAXNAMELEN);
+ strscpy_pad(from->name, to->name, IPSET_MAXNAMELEN);
+ strscpy_pad(to->name, from_name, IPSET_MAXNAMELEN);
swap(from->ref, to->ref);
ip_set(inst, from_id) = to;
@@ -1648,7 +1692,7 @@ dump_last:
goto next_set;
if (set->variant->uref)
set->variant->uref(set, cb, true);
- /* fall through */
+ fallthrough;
default:
ret = set->variant->list(set, skb, cb);
if (!cb->args[IPSET_CB_ARG0])
@@ -1699,7 +1743,8 @@ static int
IPSET_CBFN(ip_set_dump, struct net *net, struct sock *ctnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const attr[],
- struct netlink_ext_ack *extack)
+ struct netlink_ext_ack *extack,
+ const struct nfnl_info *info)
{
if (unlikely(protocol_min_failed(attr)))
return -IPSET_ERR_PROTOCOL;
@@ -1715,11 +1760,13 @@ IPSET_CBFN(ip_set_dump, struct net *net, struct sock *ctnl,
#else
{
struct netlink_dump_control c = {
+#if HAVE_NETLINK_DUMP_START_ARGS == 4
.start = ip_set_dump_start,
+#endif
.dump = ip_set_dump_do,
.done = ip_set_dump_done,
};
- return netlink_dump_start(ctnl, skb, nlh, &c);
+ return netlink_dump_start(INFO_SK(info, ctnl), skb, INFO_NLH(info, nlh), &c);
}
#endif
}
@@ -1736,8 +1783,8 @@ static const struct nla_policy ip_set_adt_policy[IPSET_ATTR_CMD_MAX + 1] = {
};
static int
-call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
- struct nlattr *tb[], enum ipset_adt adt,
+CALL_AD(struct net *net, struct sock *ctnl, struct sk_buff *skb,
+ struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt,
u32 flags, bool use_lineno)
{
int ret;
@@ -1745,13 +1792,22 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
bool eexist = flags & IPSET_FLAG_EXIST, retried = false;
do {
- spin_lock_bh(&set->lock);
+ if (retried) {
+ __ip_set_get_netlink(set);
+ nfnl_unlock(NFNL_SUBSYS_IPSET);
+ cond_resched();
+ nfnl_lock(NFNL_SUBSYS_IPSET);
+ __ip_set_put_netlink(set);
+ }
+
+ ip_set_lock(set);
ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried);
- spin_unlock_bh(&set->lock);
+ ip_set_unlock(set);
retried = true;
- } while (ret == -EAGAIN &&
- set->variant->resize &&
- (ret = set->variant->resize(set, retried)) == 0);
+ } while (ret == -ERANGE ||
+ (ret == -EAGAIN &&
+ set->variant->resize &&
+ (ret = set->variant->resize(set, retried)) == 0));
if (!ret || (ret == -IPSET_ERR_EXIST && eexist))
return 0;
@@ -1770,11 +1826,12 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
skb2 = nlmsg_new(payload, GFP_KERNEL);
if (!skb2)
return -ENOMEM;
- rep = __nlmsg_put(skb2, NETLINK_PORTID(skb),
- nlh->nlmsg_seq, NLMSG_ERROR, payload, 0);
+ rep = nlmsg_put(skb2, NETLINK_PORTID(skb),
+ nlh->nlmsg_seq, NLMSG_ERROR, payload, 0);
errmsg = nlmsg_data(rep);
errmsg->error = ret;
- memcpy(&errmsg->msg, nlh, nlh->nlmsg_len);
+ unsafe_memcpy(&errmsg->msg, nlh, nlh->nlmsg_len,
+ /* Bounds checked by the skb layer. */);
cmdattr = (void *)&errmsg->msg + min_len;
ret = NLA_PARSE(cda, IPSET_ATTR_CMD_MAX, cmdattr,
@@ -1789,8 +1846,7 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
*errline = lineno;
- netlink_unicast(ctnl, skb2, NETLINK_PORTID(skb),
- MSG_DONTWAIT);
+ NFNETLINK_UNICAST(ctnl, skb2, net, NETLINK_PORTID(skb));
/* Signal netlink not to send its ACK/errmsg. */
return -EINTR;
}
@@ -1798,19 +1854,18 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
return ret;
}
-static int
-IPSET_CBFN_AD(ip_set_ad, struct net *net, struct sock *ctnl,
- struct sk_buff *skb,
- enum ipset_adt adt,
- const struct nlmsghdr *nlh,
- const struct nlattr * const attr[],
- struct netlink_ext_ack *extack)
-{
- struct ip_set_net *inst = ip_set_pernet(IPSET_SOCK_NET(net, ctnl));
+static int IPSET_CBFN_AD(ip_set_ad, struct net *net, struct sock *ctnl,
+ struct sk_buff *skb,
+ enum ipset_adt adt,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const attr[],
+ struct netlink_ext_ack *extack, const struct nfnl_info *info)
+{
+ struct ip_set_net *inst = ip_set_pernet(IPSET_SOCK_NET(net, ctnl, info));
struct ip_set *set;
struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {};
const struct nlattr *nla;
- u32 flags = flag_exist(nlh);
+ u32 flags = flag_exist(INFO_NLH(info, nlh));
bool use_lineno;
int ret = 0;
@@ -1835,7 +1890,7 @@ IPSET_CBFN_AD(ip_set_ad, struct net *net, struct sock *ctnl,
attr[IPSET_ATTR_DATA],
set->type->adt_policy, NULL))
return -IPSET_ERR_PROTOCOL;
- ret = call_ad(ctnl, skb, set, tb, adt, flags,
+ ret = CALL_AD(net, ctnl, skb, set, tb, adt, flags,
use_lineno);
} else {
int nla_rem;
@@ -1846,7 +1901,7 @@ IPSET_CBFN_AD(ip_set_ad, struct net *net, struct sock *ctnl,
NLA_PARSE_NESTED(tb, IPSET_ATTR_ADT_MAX, nla,
set->type->adt_policy, NULL))
return -IPSET_ERR_PROTOCOL;
- ret = call_ad(ctnl, skb, set, tb, adt,
+ ret = CALL_AD(net, ctnl, skb, set, tb, adt,
flags, use_lineno);
if (ret < 0)
return ret;
@@ -1859,20 +1914,22 @@ static int
IPSET_CBFN(ip_set_uadd, struct net *net, struct sock *ctnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const attr[],
- struct netlink_ext_ack *extack)
+ struct netlink_ext_ack *extack,
+ const struct nfnl_info *info)
{
- return IPSET_CBFN_AD(ip_set_ad, net, ctnl, skb,
- IPSET_ADD, nlh, attr, extack);
+ return IPSET_CBFN_AD(ip_set_ad, INFO_NET(info, net), INFO_SK(info, ctnl), skb,
+ IPSET_ADD, INFO_NLH(info, nlh), attr, extack, info);
}
static int
IPSET_CBFN(ip_set_udel, struct net *net, struct sock *ctnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const attr[],
- struct netlink_ext_ack *extack)
+ struct netlink_ext_ack *extack,
+ const struct nfnl_info *info)
{
- return IPSET_CBFN_AD(ip_set_ad, net, ctnl, skb,
- IPSET_DEL, nlh, attr, extack);
+ return IPSET_CBFN_AD(ip_set_ad, INFO_NET(info, net), INFO_SK(info, ctnl), skb,
+ IPSET_DEL, INFO_NLH(info, nlh), attr, extack, info);
}
static int
@@ -1880,9 +1937,10 @@ IPSET_CBFN(ip_set_utest, struct net *net, struct sock *ctnl,
struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const attr[],
- struct netlink_ext_ack *extack)
+ struct netlink_ext_ack *extack,
+ const struct nfnl_info *info)
{
- struct ip_set_net *inst = ip_set_pernet(IPSET_SOCK_NET(net, ctnl));
+ struct ip_set_net *inst = ip_set_pernet(IPSET_SOCK_NET(net, ctnl, info));
struct ip_set *set;
struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {};
int ret = 0;
@@ -1918,13 +1976,13 @@ static int
IPSET_CBFN(ip_set_header, struct net *net, struct sock *ctnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const attr[],
- struct netlink_ext_ack *extack)
+ struct netlink_ext_ack *extack,
+ const struct nfnl_info *info)
{
- struct ip_set_net *inst = ip_set_pernet(IPSET_SOCK_NET(net, ctnl));
+ struct ip_set_net *inst = ip_set_pernet(IPSET_SOCK_NET(net, ctnl, info));
const struct ip_set *set;
struct sk_buff *skb2;
struct nlmsghdr *nlh2;
- int ret = 0;
if (unlikely(protocol_min_failed(attr) ||
!attr[IPSET_ATTR_SETNAME]))
@@ -1938,7 +1996,7 @@ IPSET_CBFN(ip_set_header, struct net *net, struct sock *ctnl,
if (!skb2)
return -ENOMEM;
- nlh2 = start_msg(skb2, NETLINK_PORTID(skb), nlh->nlmsg_seq, 0,
+ nlh2 = start_msg(skb2, NETLINK_PORTID(skb), INFO_NLH(info, nlh)->nlmsg_seq, 0,
IPSET_CMD_HEADER);
if (!nlh2)
goto nlmsg_failure;
@@ -1950,11 +2008,7 @@ IPSET_CBFN(ip_set_header, struct net *net, struct sock *ctnl,
goto nla_put_failure;
nlmsg_end(skb2, nlh2);
- ret = netlink_unicast(ctnl, skb2, NETLINK_PORTID(skb), MSG_DONTWAIT);
- if (ret < 0)
- return ret;
-
- return 0;
+ return NFNETLINK_UNICAST(INFO_SK(info, ctnl), skb2, INFO_NET(info, net), NETLINK_PORTID(skb));
nla_put_failure:
nlmsg_cancel(skb2, nlh2);
@@ -1976,7 +2030,8 @@ static int
IPSET_CBFN(ip_set_type, struct net *net, struct sock *ctnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const attr[],
- struct netlink_ext_ack *extack)
+ struct netlink_ext_ack *extack,
+ const struct nfnl_info *info)
{
struct sk_buff *skb2;
struct nlmsghdr *nlh2;
@@ -1999,7 +2054,7 @@ IPSET_CBFN(ip_set_type, struct net *net, struct sock *ctnl,
if (!skb2)
return -ENOMEM;
- nlh2 = start_msg(skb2, NETLINK_PORTID(skb), nlh->nlmsg_seq, 0,
+ nlh2 = start_msg(skb2, NETLINK_PORTID(skb), INFO_NLH(info, nlh)->nlmsg_seq, 0,
IPSET_CMD_TYPE);
if (!nlh2)
goto nlmsg_failure;
@@ -2012,11 +2067,7 @@ IPSET_CBFN(ip_set_type, struct net *net, struct sock *ctnl,
nlmsg_end(skb2, nlh2);
pr_debug("Send TYPE, nlmsg_len: %u\n", nlh2->nlmsg_len);
- ret = netlink_unicast(ctnl, skb2, NETLINK_PORTID(skb), MSG_DONTWAIT);
- if (ret < 0)
- return ret;
-
- return 0;
+ return NFNETLINK_UNICAST(INFO_SK(info, ctnl), skb2, INFO_NET(info, net), NETLINK_PORTID(skb));
nla_put_failure:
nlmsg_cancel(skb2, nlh2);
@@ -2036,11 +2087,11 @@ static int
IPSET_CBFN(ip_set_protocol, struct net *net, struct sock *ctnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const attr[],
- struct netlink_ext_ack *extack)
+ struct netlink_ext_ack *extack,
+ const struct nfnl_info *info)
{
struct sk_buff *skb2;
struct nlmsghdr *nlh2;
- int ret = 0;
if (unlikely(!attr[IPSET_ATTR_PROTOCOL]))
return -IPSET_ERR_PROTOCOL;
@@ -2049,7 +2100,7 @@ IPSET_CBFN(ip_set_protocol, struct net *net, struct sock *ctnl,
if (!skb2)
return -ENOMEM;
- nlh2 = start_msg(skb2, NETLINK_PORTID(skb), nlh->nlmsg_seq, 0,
+ nlh2 = start_msg(skb2, NETLINK_PORTID(skb), INFO_NLH(info, nlh)->nlmsg_seq, 0,
IPSET_CMD_PROTOCOL);
if (!nlh2)
goto nlmsg_failure;
@@ -2059,11 +2110,7 @@ IPSET_CBFN(ip_set_protocol, struct net *net, struct sock *ctnl,
goto nla_put_failure;
nlmsg_end(skb2, nlh2);
- ret = netlink_unicast(ctnl, skb2, NETLINK_PORTID(skb), MSG_DONTWAIT);
- if (ret < 0)
- return ret;
-
- return 0;
+ return NFNETLINK_UNICAST(INFO_SK(info, ctnl), skb2, INFO_NET(info, net), NETLINK_PORTID(skb));
nla_put_failure:
nlmsg_cancel(skb2, nlh2);
@@ -2078,14 +2125,14 @@ static int
IPSET_CBFN(ip_set_byname, struct net *net, struct sock *ctnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const attr[],
- struct netlink_ext_ack *extack)
+ struct netlink_ext_ack *extack,
+ const struct nfnl_info *info)
{
- struct ip_set_net *inst = ip_set_pernet(IPSET_SOCK_NET(net, ctnl));
+ struct ip_set_net *inst = ip_set_pernet(IPSET_SOCK_NET(net, ctnl, info));
struct sk_buff *skb2;
struct nlmsghdr *nlh2;
ip_set_id_t id = IPSET_INVALID_ID;
const struct ip_set *set;
- int ret = 0;
if (unlikely(protocol_failed(attr) ||
!attr[IPSET_ATTR_SETNAME]))
@@ -2099,7 +2146,7 @@ IPSET_CBFN(ip_set_byname, struct net *net, struct sock *ctnl,
if (!skb2)
return -ENOMEM;
- nlh2 = start_msg(skb2, NETLINK_PORTID(skb), nlh->nlmsg_seq, 0,
+ nlh2 = start_msg(skb2, NETLINK_PORTID(skb), INFO_NLH(info, nlh)->nlmsg_seq, 0,
IPSET_CMD_GET_BYNAME);
if (!nlh2)
goto nlmsg_failure;
@@ -2109,11 +2156,7 @@ IPSET_CBFN(ip_set_byname, struct net *net, struct sock *ctnl,
goto nla_put_failure;
nlmsg_end(skb2, nlh2);
- ret = netlink_unicast(ctnl, skb2, NETLINK_PORTID(skb), MSG_DONTWAIT);
- if (ret < 0)
- return ret;
-
- return 0;
+ return NFNETLINK_UNICAST(INFO_SK(info, ctnl), skb2, INFO_NET(info, net), NETLINK_PORTID(skb));
nla_put_failure:
nlmsg_cancel(skb2, nlh2);
@@ -2131,14 +2174,14 @@ static int
IPSET_CBFN(ip_set_byindex, struct net *net, struct sock *ctnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const attr[],
- struct netlink_ext_ack *extack)
+ struct netlink_ext_ack *extack,
+ const struct nfnl_info *info)
{
- struct ip_set_net *inst = ip_set_pernet(IPSET_SOCK_NET(net, ctnl));
+ struct ip_set_net *inst = ip_set_pernet(IPSET_SOCK_NET(net, ctnl, info));
struct sk_buff *skb2;
struct nlmsghdr *nlh2;
ip_set_id_t id = IPSET_INVALID_ID;
const struct ip_set *set;
- int ret = 0;
if (unlikely(protocol_failed(attr) ||
!attr[IPSET_ATTR_INDEX]))
@@ -2155,7 +2198,7 @@ IPSET_CBFN(ip_set_byindex, struct net *net, struct sock *ctnl,
if (!skb2)
return -ENOMEM;
- nlh2 = start_msg(skb2, NETLINK_PORTID(skb), nlh->nlmsg_seq, 0,
+ nlh2 = start_msg(skb2, NETLINK_PORTID(skb), INFO_NLH(info, nlh)->nlmsg_seq, 0,
IPSET_CMD_GET_BYINDEX);
if (!nlh2)
goto nlmsg_failure;
@@ -2164,11 +2207,7 @@ IPSET_CBFN(ip_set_byindex, struct net *net, struct sock *ctnl,
goto nla_put_failure;
nlmsg_end(skb2, nlh2);
- ret = netlink_unicast(ctnl, skb2, NETLINK_PORTID(skb), MSG_DONTWAIT);
- if (ret < 0)
- return ret;
-
- return 0;
+ return NFNETLINK_UNICAST(INFO_SK(info, ctnl), skb2, INFO_NET(info, net), NETLINK_PORTID(skb));
nla_put_failure:
nlmsg_cancel(skb2, nlh2);
@@ -2180,80 +2219,96 @@ nlmsg_failure:
static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = {
[IPSET_CMD_NONE] = {
.call = ip_set_none,
+ SET_NFNL_CALLBACK_TYPE(NFNL_CB_MUTEX)
.attr_count = IPSET_ATTR_CMD_MAX,
},
[IPSET_CMD_CREATE] = {
.call = ip_set_create,
+ SET_NFNL_CALLBACK_TYPE(NFNL_CB_MUTEX)
.attr_count = IPSET_ATTR_CMD_MAX,
.policy = ip_set_create_policy,
},
[IPSET_CMD_DESTROY] = {
.call = ip_set_destroy,
+ SET_NFNL_CALLBACK_TYPE(NFNL_CB_MUTEX)
.attr_count = IPSET_ATTR_CMD_MAX,
.policy = ip_set_setname_policy,
},
[IPSET_CMD_FLUSH] = {
.call = ip_set_flush,
+ SET_NFNL_CALLBACK_TYPE(NFNL_CB_MUTEX)
.attr_count = IPSET_ATTR_CMD_MAX,
.policy = ip_set_setname_policy,
},
[IPSET_CMD_RENAME] = {
.call = ip_set_rename,
+ SET_NFNL_CALLBACK_TYPE(NFNL_CB_MUTEX)
.attr_count = IPSET_ATTR_CMD_MAX,
.policy = ip_set_setname2_policy,
},
[IPSET_CMD_SWAP] = {
.call = ip_set_swap,
+ SET_NFNL_CALLBACK_TYPE(NFNL_CB_MUTEX)
.attr_count = IPSET_ATTR_CMD_MAX,
.policy = ip_set_setname2_policy,
},
[IPSET_CMD_LIST] = {
.call = ip_set_dump,
+ SET_NFNL_CALLBACK_TYPE(NFNL_CB_MUTEX)
.attr_count = IPSET_ATTR_CMD_MAX,
.policy = ip_set_dump_policy,
},
[IPSET_CMD_SAVE] = {
.call = ip_set_dump,
+ SET_NFNL_CALLBACK_TYPE(NFNL_CB_MUTEX)
.attr_count = IPSET_ATTR_CMD_MAX,
.policy = ip_set_setname_policy,
},
[IPSET_CMD_ADD] = {
.call = ip_set_uadd,
+ SET_NFNL_CALLBACK_TYPE(NFNL_CB_MUTEX)
.attr_count = IPSET_ATTR_CMD_MAX,
.policy = ip_set_adt_policy,
},
[IPSET_CMD_DEL] = {
.call = ip_set_udel,
+ SET_NFNL_CALLBACK_TYPE(NFNL_CB_MUTEX)
.attr_count = IPSET_ATTR_CMD_MAX,
.policy = ip_set_adt_policy,
},
[IPSET_CMD_TEST] = {
.call = ip_set_utest,
+ SET_NFNL_CALLBACK_TYPE(NFNL_CB_MUTEX)
.attr_count = IPSET_ATTR_CMD_MAX,
.policy = ip_set_adt_policy,
},
[IPSET_CMD_HEADER] = {
.call = ip_set_header,
+ SET_NFNL_CALLBACK_TYPE(NFNL_CB_MUTEX)
.attr_count = IPSET_ATTR_CMD_MAX,
.policy = ip_set_setname_policy,
},
[IPSET_CMD_TYPE] = {
.call = ip_set_type,
+ SET_NFNL_CALLBACK_TYPE(NFNL_CB_MUTEX)
.attr_count = IPSET_ATTR_CMD_MAX,
.policy = ip_set_type_policy,
},
[IPSET_CMD_PROTOCOL] = {
.call = ip_set_protocol,
+ SET_NFNL_CALLBACK_TYPE(NFNL_CB_MUTEX)
.attr_count = IPSET_ATTR_CMD_MAX,
.policy = ip_set_protocol_policy,
},
[IPSET_CMD_GET_BYNAME] = {
.call = ip_set_byname,
+ SET_NFNL_CALLBACK_TYPE(NFNL_CB_MUTEX)
.attr_count = IPSET_ATTR_CMD_MAX,
.policy = ip_set_setname_policy,
},
[IPSET_CMD_GET_BYINDEX] = {
.call = ip_set_byindex,
+ SET_NFNL_CALLBACK_TYPE(NFNL_CB_MUTEX)
.attr_count = IPSET_ATTR_CMD_MAX,
.policy = ip_set_index_policy,
},
@@ -2426,7 +2481,6 @@ ip_set_net_init(struct net *net)
#else
goto err_alloc;
#endif
- inst->is_deleted = false;
inst->is_destroyed = false;
rcu_assign_pointer(inst->ip_set_list, list);
return 0;
@@ -2443,20 +2497,6 @@ ip_set_net_exit(struct net *net)
{
struct ip_set_net *inst = ip_set_pernet(net);
- struct ip_set *set = NULL;
- ip_set_id_t i;
-
- inst->is_deleted = true; /* flag for ip_set_nfnl_put */
-
- nfnl_lock(NFNL_SUBSYS_IPSET);
- for (i = 0; i < inst->ip_set_max; i++) {
- set = ip_set(inst, i);
- if (set) {
- ip_set(inst, i) = NULL;
- ip_set_destroy_set(set);
- }
- }
- nfnl_unlock(NFNL_SUBSYS_IPSET);
kvfree(rcu_dereference_protected(inst->ip_set_list, 1));
#ifndef HAVE_NET_OPS_ID
kvfree(inst);
@@ -2521,8 +2561,8 @@ ip_set_fini(void)
{
nf_unregister_sockopt(&so_set);
nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
-
UNREGISTER_PERNET_SUBSYS(&ip_set_net_ops);
+
pr_debug("these are the famous last words\n");
}
diff --git a/kernel/net/netfilter/ipset/ip_set_hash_gen.h b/kernel/net/netfilter/ipset/ip_set_hash_gen.h
index 1767fed..76c3894 100644
--- a/kernel/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/kernel/net/netfilter/ipset/ip_set_hash_gen.h
@@ -8,13 +8,21 @@
#include <linux/rcupdate.h>
#include <linux/jhash.h>
#include <linux/types.h>
+#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/ipset/ip_set.h>
-#define __ipset_dereference_protected(p, c) rcu_dereference_protected(p, c)
-#define ipset_dereference_protected(p, set) \
- __ipset_dereference_protected(p, lockdep_is_held(&(set)->lock))
-
-#define rcu_dereference_bh_nfnl(p) rcu_dereference_bh_check(p, 1)
+#define __ipset_dereference(p) \
+ rcu_dereference_protected(p, 1)
+#define ipset_dereference_nfnl(p) \
+ rcu_dereference_protected(p, \
+ lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET))
+#define ipset_dereference_set(p, set) \
+ rcu_dereference_protected(p, \
+ lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET) || \
+ lockdep_is_held(&(set)->lock))
+#define ipset_dereference_bh_nfnl(p) \
+ rcu_dereference_bh_check(p, \
+ lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET))
/* Hashing which uses arrays to resolve clashing. The hash table is resized
* (doubled) when searching becomes too long.
@@ -30,37 +38,12 @@
*/
/* Number of elements to store in an initial array block */
-#define AHASH_INIT_SIZE 4
+#define AHASH_INIT_SIZE 2
/* Max number of elements to store in an array block */
-#define AHASH_MAX_SIZE (3 * AHASH_INIT_SIZE)
+#define AHASH_MAX_SIZE (6 * AHASH_INIT_SIZE)
/* Max muber of elements in the array block when tuned */
#define AHASH_MAX_TUNED 64
-
-/* Max number of elements can be tuned */
-#ifdef IP_SET_HASH_WITH_MULTI
-#define AHASH_MAX(h) ((h)->ahash_max)
-
-static u8
-tune_ahash_max(u8 curr, u32 multi)
-{
- u32 n;
-
- if (multi < curr)
- return curr;
-
- n = curr + AHASH_INIT_SIZE;
- /* Currently, at listing one hash bucket must fit into a message.
- * Therefore we have a hard limit here.
- */
- return n > curr && n <= AHASH_MAX_TUNED ? n : curr;
-}
-
-#define TUNE_AHASH_MAX(h, multi) \
- ((h)->ahash_max = tune_ahash_max((h)->ahash_max, multi))
-#else
-#define AHASH_MAX(h) AHASH_MAX_SIZE
-#define TUNE_AHASH_MAX(h, multi)
-#endif
+#define AHASH_MAX(h) ((h)->bucketsize)
/* A hash bucket */
struct hbucket {
@@ -69,16 +52,40 @@ struct hbucket {
DECLARE_BITMAP(used, AHASH_MAX_TUNED);
u8 size; /* size of the array */
u8 pos; /* position of the first free entry */
- unsigned char value[0] /* the array of the values */
+ unsigned char value[] /* the array of the values */
__aligned(__alignof__(u64));
};
+/* Region size for locking == 2^HTABLE_REGION_BITS */
+#define HTABLE_REGION_BITS 10
+#define ahash_numof_locks(htable_bits) \
+ ((htable_bits) < HTABLE_REGION_BITS ? 1 \
+ : jhash_size((htable_bits) - HTABLE_REGION_BITS))
+#define ahash_sizeof_regions(htable_bits) \
+ (ahash_numof_locks(htable_bits) * sizeof(struct ip_set_region))
+#define ahash_region(n, htable_bits) \
+ ((n) % ahash_numof_locks(htable_bits))
+#define ahash_bucket_start(h, htable_bits) \
+ ((htable_bits) < HTABLE_REGION_BITS ? 0 \
+ : (h) * jhash_size(HTABLE_REGION_BITS))
+#define ahash_bucket_end(h, htable_bits) \
+ ((htable_bits) < HTABLE_REGION_BITS ? jhash_size(htable_bits) \
+ : ((h) + 1) * jhash_size(HTABLE_REGION_BITS))
+
+struct htable_gc {
+ struct delayed_work dwork;
+ struct ip_set *set; /* Set the gc belongs to */
+ u32 region; /* Last gc run position */
+};
+
/* The hash table: the table size stored here in order to make resizing easy */
struct htable {
atomic_t ref; /* References for resizing */
- atomic_t uref; /* References for dumping */
+ atomic_t uref; /* References for dumping and gc */
u8 htable_bits; /* size of hash table == 2^htable_bits */
- struct hbucket __rcu *bucket[0]; /* hashtable buckets */
+ u32 maxelem; /* Maxelem per region */
+ struct ip_set_region *hregion; /* Region locks and ext sizes */
+ struct hbucket __rcu *bucket[]; /* hashtable buckets */
};
#define hbucket(h, i) ((h)->bucket[i])
@@ -101,31 +108,17 @@ htable_size(u8 hbits)
{
size_t hsize;
- /* We must fit both into u32 in jhash and size_t */
+ /* We must fit both into u32 in jhash and INT_MAX in kvmalloc_node() */
if (hbits > 31)
return 0;
hsize = jhash_size(hbits);
- if ((((size_t)-1) - sizeof(struct htable)) / sizeof(struct hbucket *)
+ if ((INT_MAX - sizeof(struct htable)) / sizeof(struct hbucket *)
< hsize)
return 0;
return hsize * sizeof(struct hbucket *) + sizeof(struct htable);
}
-/* Compute htable_bits from the user input parameter hashsize */
-static u8
-htable_bits(u32 hashsize)
-{
- /* Assume that hashsize == 2^htable_bits */
- u8 bits = fls(hashsize - 1);
-
- if (jhash_size(bits) != hashsize)
- /* Round up to the first 2^n value */
- bits = fls(hashsize);
-
- return bits;
-}
-
#ifdef IP_SET_HASH_WITH_NETS
#if IPSET_NET_COUNT > 1
#define __CIDR(cidr, i) (cidr[i])
@@ -163,6 +156,21 @@ htable_bits(u32 hashsize)
#define NLEN 0
#endif /* IP_SET_HASH_WITH_NETS */
+#define SET_ELEM_EXPIRED(set, d) \
+ (SET_WITH_TIMEOUT(set) && \
+ ip_set_timeout_expired(ext_timeout(d, set)))
+
+#if defined(IP_SET_HASH_WITH_NETMASK) || defined(IP_SET_HASH_WITH_BITMASK)
+static const union nf_inet_addr onesmask = {
+ .all[0] = 0xffffffff,
+ .all[1] = 0xffffffff,
+ .all[2] = 0xffffffff,
+ .all[3] = 0xffffffff
+};
+
+static const union nf_inet_addr zeromask = {};
+#endif
+
#endif /* _IP_SET_HASH_GEN_H */
#ifndef MTYPE
@@ -206,12 +214,15 @@ htable_bits(u32 hashsize)
#undef mtype_test_cidrs
#undef mtype_test
#undef mtype_uref
-#undef mtype_expire
#undef mtype_resize
+#undef mtype_ext_size
+#undef mtype_resize_ad
#undef mtype_head
#undef mtype_list
+#undef mtype_gc_do
#undef mtype_gc
#undef mtype_gc_init
+#undef mtype_cancel_gc
#undef mtype_variant
#undef mtype_data_match
@@ -248,12 +259,15 @@ htable_bits(u32 hashsize)
#define mtype_test_cidrs IPSET_TOKEN(MTYPE, _test_cidrs)
#define mtype_test IPSET_TOKEN(MTYPE, _test)
#define mtype_uref IPSET_TOKEN(MTYPE, _uref)
-#define mtype_expire IPSET_TOKEN(MTYPE, _expire)
#define mtype_resize IPSET_TOKEN(MTYPE, _resize)
+#define mtype_ext_size IPSET_TOKEN(MTYPE, _ext_size)
+#define mtype_resize_ad IPSET_TOKEN(MTYPE, _resize_ad)
#define mtype_head IPSET_TOKEN(MTYPE, _head)
#define mtype_list IPSET_TOKEN(MTYPE, _list)
+#define mtype_gc_do IPSET_TOKEN(MTYPE, _gc_do)
#define mtype_gc IPSET_TOKEN(MTYPE, _gc)
#define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init)
+#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc)
#define mtype_variant IPSET_TOKEN(MTYPE, _variant)
#define mtype_data_match IPSET_TOKEN(MTYPE, _data_match)
@@ -276,36 +290,44 @@ htable_bits(u32 hashsize)
/* The generic hash structure */
struct htype {
struct htable __rcu *table; /* the hash table */
- struct timer_list gc; /* garbage collection when timeout enabled */
-#ifdef HAVE_TIMER_SETUP
- struct ip_set *set; /* attached to this ip_set */
-#endif
+ struct htable_gc gc; /* gc workqueue */
u32 maxelem; /* max elements in the hash */
u32 initval; /* random jhash init value */
#ifdef IP_SET_HASH_WITH_MARKMASK
u32 markmask; /* markmask value for mark mask to store */
#endif
-#ifdef IP_SET_HASH_WITH_MULTI
- u8 ahash_max; /* max elements in an array block */
-#endif
-#ifdef IP_SET_HASH_WITH_NETMASK
+ u8 bucketsize; /* max elements in an array block */
+#if defined(IP_SET_HASH_WITH_NETMASK) || defined(IP_SET_HASH_WITH_BITMASK)
u8 netmask; /* netmask value for subnets to store */
+ union nf_inet_addr bitmask; /* stores bitmask */
#endif
+ struct list_head ad; /* Resize add|del backlist */
struct mtype_elem next; /* temporary storage for uadd */
#ifdef IP_SET_HASH_WITH_NETS
struct net_prefixes nets[NLEN]; /* book-keeping of prefixes */
#endif
};
+/* ADD|DEL entries saved during resize */
+struct mtype_resize_ad {
+ struct list_head list;
+ enum ipset_adt ad; /* ADD|DEL element */
+ struct mtype_elem d; /* Element value */
+ struct ip_set_ext ext; /* Extensions for ADD */
+ struct ip_set_ext mext; /* Target extensions for ADD */
+ u32 flags; /* Flags for ADD */
+};
+
#ifdef IP_SET_HASH_WITH_NETS
/* Network cidr size book keeping when the hash stores different
* sized networks. cidr == real cidr + 1 to support /0.
*/
static void
-mtype_add_cidr(struct htype *h, u8 cidr, u8 n)
+mtype_add_cidr(struct ip_set *set, struct htype *h, u8 cidr, u8 n)
{
int i, j;
+ spin_lock_bh(&set->lock);
/* Add in increasing prefix order, so larger cidr first */
for (i = 0, j = -1; i < NLEN && h->nets[i].cidr[n]; i++) {
if (j != -1) {
@@ -314,7 +336,7 @@ mtype_add_cidr(struct htype *h, u8 cidr, u8 n)
j = i;
} else if (h->nets[i].cidr[n] == cidr) {
h->nets[CIDR_POS(cidr)].nets[n]++;
- return;
+ goto unlock;
}
}
if (j != -1) {
@@ -323,24 +345,29 @@ mtype_add_cidr(struct htype *h, u8 cidr, u8 n)
}
h->nets[i].cidr[n] = cidr;
h->nets[CIDR_POS(cidr)].nets[n] = 1;
+unlock:
+ spin_unlock_bh(&set->lock);
}
static void
-mtype_del_cidr(struct htype *h, u8 cidr, u8 n)
+mtype_del_cidr(struct ip_set *set, struct htype *h, u8 cidr, u8 n)
{
u8 i, j, net_end = NLEN - 1;
+ spin_lock_bh(&set->lock);
for (i = 0; i < NLEN; i++) {
if (h->nets[i].cidr[n] != cidr)
continue;
h->nets[CIDR_POS(cidr)].nets[n]--;
if (h->nets[CIDR_POS(cidr)].nets[n] > 0)
- return;
+ goto unlock;
for (j = i; j < net_end && h->nets[j].cidr[n]; j++)
h->nets[j].cidr[n] = h->nets[j + 1].cidr[n];
h->nets[j].cidr[n] = 0;
- return;
+ goto unlock;
}
+unlock:
+ spin_unlock_bh(&set->lock);
}
#endif
@@ -348,7 +375,7 @@ mtype_del_cidr(struct htype *h, u8 cidr, u8 n)
static size_t
mtype_ahash_memsize(const struct htype *h, const struct htable *t)
{
- return sizeof(*h) + sizeof(*t);
+ return sizeof(*h) + sizeof(*t) + ahash_sizeof_regions(t->htable_bits);
}
/* Get the ith element from the array block n */
@@ -372,24 +399,29 @@ mtype_flush(struct ip_set *set)
struct htype *h = set->data;
struct htable *t;
struct hbucket *n;
- u32 i;
-
- t = ipset_dereference_protected(h->table, set);
- for (i = 0; i < jhash_size(t->htable_bits); i++) {
- n = __ipset_dereference_protected(hbucket(t, i), 1);
- if (!n)
- continue;
- if (set->extensions & IPSET_EXT_DESTROY)
- mtype_ext_cleanup(set, n);
- /* FIXME: use slab cache */
- rcu_assign_pointer(hbucket(t, i), NULL);
- kfree_rcu(n, rcu);
+ u32 r, i;
+
+ t = ipset_dereference_nfnl(h->table);
+ for (r = 0; r < ahash_numof_locks(t->htable_bits); r++) {
+ spin_lock_bh(&t->hregion[r].lock);
+ for (i = ahash_bucket_start(r, t->htable_bits);
+ i < ahash_bucket_end(r, t->htable_bits); i++) {
+ n = __ipset_dereference(hbucket(t, i));
+ if (!n)
+ continue;
+ if (set->extensions & IPSET_EXT_DESTROY)
+ mtype_ext_cleanup(set, n);
+ /* FIXME: use slab cache */
+ rcu_assign_pointer(hbucket(t, i), NULL);
+ kfree_rcu(n, rcu);
+ }
+ t->hregion[r].ext_size = 0;
+ t->hregion[r].elements = 0;
+ spin_unlock_bh(&t->hregion[r].lock);
}
#ifdef IP_SET_HASH_WITH_NETS
memset(h->nets, 0, sizeof(h->nets));
#endif
- set->elements = 0;
- set->ext_size = 0;
}
/* Destroy the hashtable part of the set */
@@ -400,7 +432,7 @@ mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy)
u32 i;
for (i = 0; i < jhash_size(t->htable_bits); i++) {
- n = __ipset_dereference_protected(hbucket(t, i), 1);
+ n = (__force struct hbucket *)hbucket(t, i);
if (!n)
continue;
if (set->extensions & IPSET_EXT_DESTROY && ext_destroy)
@@ -409,6 +441,7 @@ mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy)
kfree(n);
}
+ ip_set_free(t->hregion);
ip_set_free(t);
}
@@ -417,28 +450,18 @@ static void
mtype_destroy(struct ip_set *set)
{
struct htype *h = set->data;
+ struct list_head *l, *lt;
- if (SET_WITH_TIMEOUT(set))
- del_timer_sync(&h->gc);
-
- mtype_ahash_destroy(set,
- __ipset_dereference_protected(h->table, 1), true);
+ mtype_ahash_destroy(set, (__force struct htable *)h->table, true);
+ list_for_each_safe(l, lt, &h->ad) {
+ list_del(l);
+ kfree(l);
+ }
kfree(h);
set->data = NULL;
}
-static void
-mtype_gc_init(struct ip_set *set, void (*gc)(GC_ARG))
-{
- struct htype *h = set->data;
-
- TIMER_SETUP(&h->gc, gc);
- mod_timer(&h->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ);
- pr_debug("gc initialized, run in every %u\n",
- IPSET_GC_PERIOD(set->timeout));
-}
-
static bool
mtype_same_set(const struct ip_set *a, const struct ip_set *b)
{
@@ -448,8 +471,8 @@ mtype_same_set(const struct ip_set *a, const struct ip_set *b)
/* Resizing changes htable_bits, so we ignore it */
return x->maxelem == y->maxelem &&
a->timeout == b->timeout &&
-#ifdef IP_SET_HASH_WITH_NETMASK
- x->netmask == y->netmask &&
+#if defined(IP_SET_HASH_WITH_NETMASK) || defined(IP_SET_HASH_WITH_BITMASK)
+ nf_inet_addr_cmp(&x->bitmask, &y->bitmask) &&
#endif
#ifdef IP_SET_HASH_WITH_MARKMASK
x->markmask == y->markmask &&
@@ -457,11 +480,9 @@ mtype_same_set(const struct ip_set *a, const struct ip_set *b)
a->extensions == b->extensions;
}
-/* Delete expired elements from the hashtable */
static void
-mtype_expire(struct ip_set *set, struct htype *h)
+mtype_gc_do(struct ip_set *set, struct htype *h, struct htable *t, u32 r)
{
- struct htable *t;
struct hbucket *n, *tmp;
struct mtype_elem *data;
u32 i, j, d;
@@ -469,10 +490,12 @@ mtype_expire(struct ip_set *set, struct htype *h)
#ifdef IP_SET_HASH_WITH_NETS
u8 k;
#endif
+ u8 htable_bits = t->htable_bits;
- t = ipset_dereference_protected(h->table, set);
- for (i = 0; i < jhash_size(t->htable_bits); i++) {
- n = __ipset_dereference_protected(hbucket(t, i), 1);
+ spin_lock_bh(&t->hregion[r].lock);
+ for (i = ahash_bucket_start(r, htable_bits);
+ i < ahash_bucket_end(r, htable_bits); i++) {
+ n = __ipset_dereference(hbucket(t, i));
if (!n)
continue;
for (j = 0, d = 0; j < n->pos; j++) {
@@ -488,58 +511,108 @@ mtype_expire(struct ip_set *set, struct htype *h)
smp_mb__after_atomic();
#ifdef IP_SET_HASH_WITH_NETS
for (k = 0; k < IPSET_NET_COUNT; k++)
- mtype_del_cidr(h,
+ mtype_del_cidr(set, h,
NCIDR_PUT(DCIDR_GET(data->cidr, k)),
k);
#endif
+ t->hregion[r].elements--;
ip_set_ext_destroy(set, data);
- set->elements--;
d++;
}
if (d >= AHASH_INIT_SIZE) {
if (d >= n->size) {
- set->ext_size -= ext_size(n->size, dsize);
+ t->hregion[r].ext_size -=
+ ext_size(n->size, dsize);
rcu_assign_pointer(hbucket(t, i), NULL);
kfree_rcu(n, rcu);
continue;
}
tmp = kzalloc(sizeof(*tmp) +
- (n->size - AHASH_INIT_SIZE) * dsize,
- GFP_ATOMIC);
+ (n->size - AHASH_INIT_SIZE) * dsize,
+ GFP_ATOMIC);
if (!tmp)
- /* Still try to delete expired elements */
+ /* Still try to delete expired elements. */
continue;
tmp->size = n->size - AHASH_INIT_SIZE;
for (j = 0, d = 0; j < n->pos; j++) {
if (!test_bit(j, n->used))
continue;
data = ahash_data(n, j, dsize);
- memcpy(tmp->value + d * dsize, data, dsize);
+ memcpy(tmp->value + d * dsize,
+ data, dsize);
set_bit(d, tmp->used);
d++;
}
tmp->pos = d;
- set->ext_size -= ext_size(AHASH_INIT_SIZE, dsize);
+ t->hregion[r].ext_size -=
+ ext_size(AHASH_INIT_SIZE, dsize);
rcu_assign_pointer(hbucket(t, i), tmp);
kfree_rcu(n, rcu);
}
}
+ spin_unlock_bh(&t->hregion[r].lock);
}
static void
-mtype_gc(GC_ARG)
+mtype_gc(struct work_struct *work)
{
- INIT_GC_VARS(htype, h);
+ struct htable_gc *gc;
+ struct ip_set *set;
+ struct htype *h;
+ struct htable *t;
+ u32 r, numof_locks;
+ unsigned int next_run;
+
+ gc = container_of(work, struct htable_gc, dwork.work);
+ set = gc->set;
+ h = set->data;
- pr_debug("called\n");
spin_lock_bh(&set->lock);
- mtype_expire(set, h);
+ t = ipset_dereference_set(h->table, set);
+ atomic_inc(&t->uref);
+ numof_locks = ahash_numof_locks(t->htable_bits);
+ r = gc->region++;
+ if (r >= numof_locks) {
+ r = gc->region = 0;
+ }
+ next_run = (IPSET_GC_PERIOD(set->timeout) * HZ) / numof_locks;
+ if (next_run < HZ/10)
+ next_run = HZ/10;
spin_unlock_bh(&set->lock);
- h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
- add_timer(&h->gc);
+ mtype_gc_do(set, h, t, r);
+
+ if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) {
+ pr_debug("Table destroy after resize by expire: %p\n", t);
+ mtype_ahash_destroy(set, t, false);
+ }
+
+ queue_delayed_work(system_power_efficient_wq, &gc->dwork, next_run);
}
+static void
+mtype_gc_init(struct htable_gc *gc)
+{
+ INIT_DEFERRABLE_WORK(&gc->dwork, mtype_gc);
+ queue_delayed_work(system_power_efficient_wq, &gc->dwork, HZ);
+}
+
+static void
+mtype_cancel_gc(struct ip_set *set)
+{
+ struct htype *h = set->data;
+
+ if (SET_WITH_TIMEOUT(set))
+ cancel_delayed_work_sync(&h->gc.dwork);
+}
+
+static int
+mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
+ struct ip_set_ext *mext, u32 flags);
+static int
+mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
+ struct ip_set_ext *mext, u32 flags);
+
/* Resize a hash: create a new hash table with doubling the hashsize
* and inserting the elements to it. Repeat until we succeed or
* fail due to memory pressures.
@@ -550,7 +623,7 @@ mtype_resize(struct ip_set *set, bool retried)
struct htype *h = set->data;
struct htable *t, *orig;
u8 htable_bits;
- size_t extsize, dsize = set->dsize;
+ size_t hsize, dsize = set->dsize;
#ifdef IP_SET_HASH_WITH_NETS
u8 flags;
struct mtype_elem *tmp;
@@ -558,7 +631,9 @@ mtype_resize(struct ip_set *set, bool retried)
struct mtype_elem *data;
struct mtype_elem *d;
struct hbucket *n, *m;
- u32 i, j, key;
+ struct list_head *l, *lt;
+ struct mtype_resize_ad *x;
+ u32 i, j, r, nr, key;
int ret;
#ifdef IP_SET_HASH_WITH_NETS
@@ -566,108 +641,140 @@ mtype_resize(struct ip_set *set, bool retried)
if (!tmp)
return -ENOMEM;
#endif
- rcu_read_lock_bh();
- orig = rcu_dereference_bh_nfnl(h->table);
+ orig = ipset_dereference_bh_nfnl(h->table);
htable_bits = orig->htable_bits;
- rcu_read_unlock_bh();
retry:
ret = 0;
htable_bits++;
- if (!htable_bits) {
- /* In case we have plenty of memory :-) */
- pr_warn("Cannot increase the hashsize of set %s further\n",
- set->name);
- ret = -IPSET_ERR_HASH_FULL;
+ if (!htable_bits)
+ goto hbwarn;
+ hsize = htable_size(htable_bits);
+ if (!hsize)
+ goto hbwarn;
+ t = ip_set_alloc(hsize);
+ if (!t) {
+ ret = -ENOMEM;
goto out;
}
- t = ip_set_alloc(htable_size(htable_bits));
- if (!t) {
+ t->hregion = ip_set_alloc(ahash_sizeof_regions(htable_bits));
+ if (!t->hregion) {
+ ip_set_free(t);
ret = -ENOMEM;
goto out;
}
t->htable_bits = htable_bits;
+ t->maxelem = h->maxelem / ahash_numof_locks(htable_bits);
+ for (i = 0; i < ahash_numof_locks(htable_bits); i++)
+ spin_lock_init(&t->hregion[i].lock);
- spin_lock_bh(&set->lock);
- orig = __ipset_dereference_protected(h->table, 1);
- /* There can't be another parallel resizing, but dumping is possible */
+ /* There can't be another parallel resizing,
+ * but dumping, gc, kernel side add/del are possible
+ */
+ orig = ipset_dereference_bh_nfnl(h->table);
atomic_set(&orig->ref, 1);
atomic_inc(&orig->uref);
- extsize = 0;
pr_debug("attempt to resize set %s from %u to %u, t %p\n",
set->name, orig->htable_bits, htable_bits, orig);
- for (i = 0; i < jhash_size(orig->htable_bits); i++) {
- n = __ipset_dereference_protected(hbucket(orig, i), 1);
- if (!n)
- continue;
- for (j = 0; j < n->pos; j++) {
- if (!test_bit(j, n->used))
+ for (r = 0; r < ahash_numof_locks(orig->htable_bits); r++) {
+ /* Expire may replace a hbucket with another one */
+ rcu_read_lock_bh();
+ for (i = ahash_bucket_start(r, orig->htable_bits);
+ i < ahash_bucket_end(r, orig->htable_bits); i++) {
+ n = __ipset_dereference(hbucket(orig, i));
+ if (!n)
continue;
- data = ahash_data(n, j, dsize);
+ for (j = 0; j < n->pos; j++) {
+ if (!test_bit(j, n->used))
+ continue;
+ data = ahash_data(n, j, dsize);
+ if (SET_ELEM_EXPIRED(set, data))
+ continue;
#ifdef IP_SET_HASH_WITH_NETS
- /* We have readers running parallel with us,
- * so the live data cannot be modified.
- */
- flags = 0;
- memcpy(tmp, data, dsize);
- data = tmp;
- mtype_data_reset_flags(data, &flags);
+ /* We have readers running parallel with us,
+ * so the live data cannot be modified.
+ */
+ flags = 0;
+ memcpy(tmp, data, dsize);
+ data = tmp;
+ mtype_data_reset_flags(data, &flags);
#endif
- key = HKEY(data, h->initval, htable_bits);
- m = __ipset_dereference_protected(hbucket(t, key), 1);
- if (!m) {
- m = kzalloc(sizeof(*m) +
+ key = HKEY(data, h->initval, htable_bits);
+ m = __ipset_dereference(hbucket(t, key));
+ nr = ahash_region(key, htable_bits);
+ if (!m) {
+ m = kzalloc(sizeof(*m) +
AHASH_INIT_SIZE * dsize,
GFP_ATOMIC);
- if (!m) {
- ret = -ENOMEM;
- goto cleanup;
- }
- m->size = AHASH_INIT_SIZE;
- extsize += ext_size(AHASH_INIT_SIZE, dsize);
- RCU_INIT_POINTER(hbucket(t, key), m);
- } else if (m->pos >= m->size) {
- struct hbucket *ht;
-
- if (m->size >= AHASH_MAX(h)) {
- ret = -EAGAIN;
- } else {
- ht = kzalloc(sizeof(*ht) +
+ if (!m) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+ m->size = AHASH_INIT_SIZE;
+ t->hregion[nr].ext_size +=
+ ext_size(AHASH_INIT_SIZE,
+ dsize);
+ RCU_INIT_POINTER(hbucket(t, key), m);
+ } else if (m->pos >= m->size) {
+ struct hbucket *ht;
+
+ if (m->size >= AHASH_MAX(h)) {
+ ret = -EAGAIN;
+ } else {
+ ht = kzalloc(sizeof(*ht) +
(m->size + AHASH_INIT_SIZE)
* dsize,
GFP_ATOMIC);
- if (!ht)
- ret = -ENOMEM;
+ if (!ht)
+ ret = -ENOMEM;
+ }
+ if (ret < 0)
+ goto cleanup;
+ memcpy(ht, m, sizeof(struct hbucket) +
+ m->size * dsize);
+ ht->size = m->size + AHASH_INIT_SIZE;
+ t->hregion[nr].ext_size +=
+ ext_size(AHASH_INIT_SIZE,
+ dsize);
+ kfree(m);
+ m = ht;
+ RCU_INIT_POINTER(hbucket(t, key), ht);
}
- if (ret < 0)
- goto cleanup;
- memcpy(ht, m, sizeof(struct hbucket) +
- m->size * dsize);
- ht->size = m->size + AHASH_INIT_SIZE;
- extsize += ext_size(AHASH_INIT_SIZE, dsize);
- kfree(m);
- m = ht;
- RCU_INIT_POINTER(hbucket(t, key), ht);
- }
- d = ahash_data(m, m->pos, dsize);
- memcpy(d, data, dsize);
- set_bit(m->pos++, m->used);
+ d = ahash_data(m, m->pos, dsize);
+ memcpy(d, data, dsize);
+ set_bit(m->pos++, m->used);
+ t->hregion[nr].elements++;
#ifdef IP_SET_HASH_WITH_NETS
- mtype_data_reset_flags(d, &flags);
+ mtype_data_reset_flags(d, &flags);
#endif
+ }
}
+ rcu_read_unlock_bh();
}
- rcu_assign_pointer(h->table, t);
- set->ext_size = extsize;
- spin_unlock_bh(&set->lock);
+ /* There can't be any other writer. */
+ rcu_assign_pointer(h->table, t);
/* Give time to other readers of the set */
synchronize_rcu();
pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name,
orig->htable_bits, orig, t->htable_bits, t);
- /* If there's nobody else dumping the table, destroy it */
+ /* Add/delete elements processed by the SET target during resize.
+ * Kernel-side add cannot trigger a resize and userspace actions
+ * are serialized by the mutex.
+ */
+ list_for_each_safe(l, lt, &h->ad) {
+ x = list_entry(l, struct mtype_resize_ad, list);
+ if (x->ad == IPSET_ADD) {
+ mtype_add(set, &x->d, &x->ext, &x->mext, x->flags);
+ } else {
+ mtype_del(set, &x->d, NULL, NULL, 0);
+ }
+ list_del(l);
+ kfree(l);
+ }
+ /* If there's nobody else using the table, destroy it */
if (atomic_dec_and_test(&orig->uref)) {
pr_debug("Table destroy by resize %p\n", orig);
mtype_ahash_destroy(set, orig, false);
@@ -680,13 +787,49 @@ out:
return ret;
cleanup:
+ rcu_read_unlock_bh();
atomic_set(&orig->ref, 0);
atomic_dec(&orig->uref);
- spin_unlock_bh(&set->lock);
mtype_ahash_destroy(set, t, false);
if (ret == -EAGAIN)
goto retry;
goto out;
+
+hbwarn:
+ /* In case we have plenty of memory :-) */
+ pr_warn("Cannot increase the hashsize of set %s further\n", set->name);
+ ret = -IPSET_ERR_HASH_FULL;
+ goto out;
+}
+
+/* Get the current number of elements and ext_size in the set */
+static void
+mtype_ext_size(struct ip_set *set, u32 *elements, size_t *ext_size)
+{
+ struct htype *h = set->data;
+ const struct htable *t;
+ u32 i, j, r;
+ struct hbucket *n;
+ struct mtype_elem *data;
+
+ *elements = 0;
+ t = rcu_dereference_bh(h->table);
+ for (r = 0; r < ahash_numof_locks(t->htable_bits); r++) {
+ for (i = ahash_bucket_start(r, t->htable_bits);
+ i < ahash_bucket_end(r, t->htable_bits); i++) {
+ n = rcu_dereference_bh(hbucket(t, i));
+ if (!n)
+ continue;
+ for (j = 0; j < n->pos; j++) {
+ if (!test_bit(j, n->used))
+ continue;
+ data = ahash_data(n, j, set->dsize);
+ if (!SET_ELEM_EXPIRED(set, data))
+ (*elements)++;
+ }
+ }
+ *ext_size += t->hregion[r].ext_size;
+ }
}
/* Add an element to a hash and update the internal counters when succeeded,
@@ -701,32 +844,49 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
const struct mtype_elem *d = value;
struct mtype_elem *data;
struct hbucket *n, *old = ERR_PTR(-ENOENT);
- int i, j = -1;
+ int i, j = -1, ret;
bool flag_exist = flags & IPSET_FLAG_EXIST;
bool deleted = false, forceadd = false, reuse = false;
- u32 key, multi = 0;
+ u32 r, key, multi = 0, elements, maxelem;
- if (set->elements >= h->maxelem) {
- if (SET_WITH_TIMEOUT(set))
- /* FIXME: when set is full, we slow down here */
- mtype_expire(set, h);
- if (set->elements >= h->maxelem && SET_WITH_FORCEADD(set))
+ rcu_read_lock_bh();
+ t = rcu_dereference_bh(h->table);
+ key = HKEY(value, h->initval, t->htable_bits);
+ r = ahash_region(key, t->htable_bits);
+ atomic_inc(&t->uref);
+ elements = t->hregion[r].elements;
+ maxelem = t->maxelem;
+ if (elements >= maxelem) {
+ u32 e;
+ if (SET_WITH_TIMEOUT(set)) {
+ rcu_read_unlock_bh();
+ mtype_gc_do(set, h, t, r);
+ rcu_read_lock_bh();
+ }
+ maxelem = h->maxelem;
+ elements = 0;
+ for (e = 0; e < ahash_numof_locks(t->htable_bits); e++)
+ elements += t->hregion[e].elements;
+ if (elements >= maxelem && SET_WITH_FORCEADD(set))
forceadd = true;
}
+ rcu_read_unlock_bh();
- t = ipset_dereference_protected(h->table, set);
- key = HKEY(value, h->initval, t->htable_bits);
- n = __ipset_dereference_protected(hbucket(t, key), 1);
+ spin_lock_bh(&t->hregion[r].lock);
+ n = rcu_dereference_bh(hbucket(t, key));
if (!n) {
- if (forceadd || set->elements >= h->maxelem)
+ if (forceadd || elements >= maxelem)
goto set_full;
old = NULL;
n = kzalloc(sizeof(*n) + AHASH_INIT_SIZE * set->dsize,
GFP_ATOMIC);
- if (!n)
- return -ENOMEM;
+ if (!n) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
n->size = AHASH_INIT_SIZE;
- set->ext_size += ext_size(AHASH_INIT_SIZE, set->dsize);
+ t->hregion[r].ext_size +=
+ ext_size(AHASH_INIT_SIZE, set->dsize);
goto copy_elem;
}
for (i = 0; i < n->pos; i++) {
@@ -740,67 +900,75 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
}
data = ahash_data(n, i, set->dsize);
if (mtype_data_equal(data, d, &multi)) {
- if (flag_exist ||
- (SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(data, set)))) {
+ if (flag_exist || SET_ELEM_EXPIRED(set, data)) {
/* Just the extensions could be overwritten */
j = i;
goto overwrite_extensions;
}
- return -IPSET_ERR_EXIST;
+ ret = -IPSET_ERR_EXIST;
+ goto unlock;
}
/* Reuse first timed out entry */
- if (SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(data, set)) &&
- j == -1) {
+ if (SET_ELEM_EXPIRED(set, data) && j == -1) {
j = i;
reuse = true;
}
}
if (reuse || forceadd) {
+ if (j == -1)
+ j = 0;
data = ahash_data(n, j, set->dsize);
if (!deleted) {
#ifdef IP_SET_HASH_WITH_NETS
for (i = 0; i < IPSET_NET_COUNT; i++)
- mtype_del_cidr(h,
+ mtype_del_cidr(set, h,
NCIDR_PUT(DCIDR_GET(data->cidr, i)),
i);
#endif
ip_set_ext_destroy(set, data);
- set->elements--;
+ t->hregion[r].elements--;
}
goto copy_data;
}
- if (set->elements >= h->maxelem)
+ if (elements >= maxelem)
goto set_full;
/* Create a new slot */
if (n->pos >= n->size) {
- TUNE_AHASH_MAX(h, multi);
+#ifdef IP_SET_HASH_WITH_MULTI
+ if (h->bucketsize >= AHASH_MAX_TUNED)
+ goto set_full;
+ else if (h->bucketsize <= multi)
+ h->bucketsize += AHASH_INIT_SIZE;
+#endif
if (n->size >= AHASH_MAX(h)) {
/* Trigger rehashing */
mtype_data_next(&h->next, d);
- return -EAGAIN;
+ ret = -EAGAIN;
+ goto resize;
}
old = n;
n = kzalloc(sizeof(*n) +
(old->size + AHASH_INIT_SIZE) * set->dsize,
GFP_ATOMIC);
- if (!n)
- return -ENOMEM;
+ if (!n) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
memcpy(n, old, sizeof(struct hbucket) +
old->size * set->dsize);
n->size = old->size + AHASH_INIT_SIZE;
- set->ext_size += ext_size(AHASH_INIT_SIZE, set->dsize);
+ t->hregion[r].ext_size +=
+ ext_size(AHASH_INIT_SIZE, set->dsize);
}
copy_elem:
j = n->pos++;
data = ahash_data(n, j, set->dsize);
copy_data:
- set->elements++;
+ t->hregion[r].elements++;
#ifdef IP_SET_HASH_WITH_NETS
for (i = 0; i < IPSET_NET_COUNT; i++)
- mtype_add_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), i);
+ mtype_add_cidr(set, h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), i);
#endif
memcpy(data, d, sizeof(struct mtype_elem));
overwrite_extensions:
@@ -823,13 +991,41 @@ overwrite_extensions:
if (old)
kfree_rcu(old, rcu);
}
+ ret = 0;
+resize:
+ spin_unlock_bh(&t->hregion[r].lock);
+ if (atomic_read(&t->ref) && ext->target) {
+ /* Resize is in process and kernel side add, save values */
+ struct mtype_resize_ad *x;
+
+ x = kzalloc(sizeof(struct mtype_resize_ad), GFP_ATOMIC);
+ if (!x)
+ /* Don't bother */
+ goto out;
+ x->ad = IPSET_ADD;
+ memcpy(&x->d, value, sizeof(struct mtype_elem));
+ memcpy(&x->ext, ext, sizeof(struct ip_set_ext));
+ memcpy(&x->mext, mext, sizeof(struct ip_set_ext));
+ x->flags = flags;
+ spin_lock_bh(&set->lock);
+ list_add_tail(&x->list, &h->ad);
+ spin_unlock_bh(&set->lock);
+ }
+ goto out;
- return 0;
set_full:
if (net_ratelimit())
pr_warn("Set %s is full, maxelem %u reached\n",
- set->name, h->maxelem);
- return -IPSET_ERR_HASH_FULL;
+ set->name, maxelem);
+ ret = -IPSET_ERR_HASH_FULL;
+unlock:
+ spin_unlock_bh(&t->hregion[r].lock);
+out:
+ if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) {
+ pr_debug("Table destroy after resize by add: %p\n", t);
+ mtype_ahash_destroy(set, t, false);
+ }
+ return ret;
}
/* Delete an element from the hash and free up space if possible.
@@ -843,13 +1039,23 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
const struct mtype_elem *d = value;
struct mtype_elem *data;
struct hbucket *n;
- int i, j, k, ret = -IPSET_ERR_EXIST;
+ struct mtype_resize_ad *x = NULL;
+ int i, j, k, r, ret = -IPSET_ERR_EXIST;
u32 key, multi = 0;
size_t dsize = set->dsize;
- t = ipset_dereference_protected(h->table, set);
+ /* Userspace add and resize is excluded by the mutex.
+ * Kernespace add does not trigger resize.
+ */
+ rcu_read_lock_bh();
+ t = rcu_dereference_bh(h->table);
key = HKEY(value, h->initval, t->htable_bits);
- n = __ipset_dereference_protected(hbucket(t, key), 1);
+ r = ahash_region(key, t->htable_bits);
+ atomic_inc(&t->uref);
+ rcu_read_unlock_bh();
+
+ spin_lock_bh(&t->hregion[r].lock);
+ n = rcu_dereference_bh(hbucket(t, key));
if (!n)
goto out;
for (i = 0, k = 0; i < n->pos; i++) {
@@ -860,8 +1066,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
data = ahash_data(n, i, dsize);
if (!mtype_data_equal(data, d, &multi))
continue;
- if (SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(data, set)))
+ if (SET_ELEM_EXPIRED(set, data))
goto out;
ret = 0;
@@ -869,20 +1074,33 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
smp_mb__after_atomic();
if (i + 1 == n->pos)
n->pos--;
- set->elements--;
+ t->hregion[r].elements--;
#ifdef IP_SET_HASH_WITH_NETS
for (j = 0; j < IPSET_NET_COUNT; j++)
- mtype_del_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, j)),
- j);
+ mtype_del_cidr(set, h,
+ NCIDR_PUT(DCIDR_GET(d->cidr, j)), j);
#endif
ip_set_ext_destroy(set, data);
+ if (atomic_read(&t->ref) && ext->target) {
+ /* Resize is in process and kernel side del,
+ * save values
+ */
+ x = kzalloc(sizeof(struct mtype_resize_ad),
+ GFP_ATOMIC);
+ if (x) {
+ x->ad = IPSET_DEL;
+ memcpy(&x->d, value,
+ sizeof(struct mtype_elem));
+ x->flags = flags;
+ }
+ }
for (; i < n->pos; i++) {
if (!test_bit(i, n->used))
k++;
}
if (n->pos == 0 && k == 0) {
- set->ext_size -= ext_size(n->size, dsize);
+ t->hregion[r].ext_size -= ext_size(n->size, dsize);
rcu_assign_pointer(hbucket(t, key), NULL);
kfree_rcu(n, rcu);
} else if (k >= AHASH_INIT_SIZE) {
@@ -901,7 +1119,8 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
k++;
}
tmp->pos = k;
- set->ext_size -= ext_size(AHASH_INIT_SIZE, dsize);
+ t->hregion[r].ext_size -=
+ ext_size(AHASH_INIT_SIZE, dsize);
rcu_assign_pointer(hbucket(t, key), tmp);
kfree_rcu(n, rcu);
}
@@ -909,6 +1128,16 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
}
out:
+ spin_unlock_bh(&t->hregion[r].lock);
+ if (x) {
+ spin_lock_bh(&set->lock);
+ list_add(&x->list, &h->ad);
+ spin_unlock_bh(&set->lock);
+ }
+ if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) {
+ pr_debug("Table destroy after resize by del: %p\n", t);
+ mtype_ahash_destroy(set, t, false);
+ }
return ret;
}
@@ -994,6 +1223,7 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
int i, ret = 0;
u32 key, multi = 0;
+ rcu_read_lock_bh();
t = rcu_dereference_bh(h->table);
#ifdef IP_SET_HASH_WITH_NETS
/* If we test an IP address and not a network address,
@@ -1025,6 +1255,7 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
goto out;
}
out:
+ rcu_read_unlock_bh();
return ret;
}
@@ -1036,23 +1267,14 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
const struct htable *t;
struct nlattr *nested;
size_t memsize;
+ u32 elements = 0;
+ size_t ext_size = 0;
u8 htable_bits;
- /* If any members have expired, set->elements will be wrong
- * mytype_expire function will update it with the right count.
- * we do not hold set->lock here, so grab it first.
- * set->elements can still be incorrect in the case of a huge set,
- * because elements might time out during the listing.
- */
- if (SET_WITH_TIMEOUT(set)) {
- spin_lock_bh(&set->lock);
- mtype_expire(set, h);
- spin_unlock_bh(&set->lock);
- }
-
rcu_read_lock_bh();
- t = rcu_dereference_bh_nfnl(h->table);
- memsize = mtype_ahash_memsize(h, t) + set->ext_size;
+ t = rcu_dereference_bh(h->table);
+ mtype_ext_size(set, &elements, &ext_size);
+ memsize = mtype_ahash_memsize(h, t) + ext_size + set->ext_size;
htable_bits = t->htable_bits;
rcu_read_unlock_bh();
@@ -1063,18 +1285,35 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
htonl(jhash_size(htable_bits))) ||
nla_put_net32(skb, IPSET_ATTR_MAXELEM, htonl(h->maxelem)))
goto nla_put_failure;
+#ifdef IP_SET_HASH_WITH_BITMASK
+ /* if netmask is set to anything other than HOST_MASK we know that the user supplied netmask
+ * and not bitmask. These two are mutually exclusive. */
+ if (h->netmask == HOST_MASK && !nf_inet_addr_cmp(&onesmask, &h->bitmask)) {
+ if (set->family == NFPROTO_IPV4) {
+ if (nla_put_ipaddr4(skb, IPSET_ATTR_BITMASK, h->bitmask.ip))
+ goto nla_put_failure;
+ } else if (set->family == NFPROTO_IPV6) {
+ if (nla_put_ipaddr6(skb, IPSET_ATTR_BITMASK, &h->bitmask.in6))
+ goto nla_put_failure;
+ }
+ }
+#endif
#ifdef IP_SET_HASH_WITH_NETMASK
- if (h->netmask != HOST_MASK &&
- nla_put_u8(skb, IPSET_ATTR_NETMASK, h->netmask))
+ if (h->netmask != HOST_MASK && nla_put_u8(skb, IPSET_ATTR_NETMASK, h->netmask))
goto nla_put_failure;
#endif
#ifdef IP_SET_HASH_WITH_MARKMASK
if (nla_put_u32(skb, IPSET_ATTR_MARKMASK, h->markmask))
goto nla_put_failure;
#endif
+ if (set->flags & IPSET_CREATE_FLAG_BUCKETSIZE) {
+ if (nla_put_u8(skb, IPSET_ATTR_BUCKETSIZE, h->bucketsize) ||
+ nla_put_net32(skb, IPSET_ATTR_INITVAL, htonl(h->initval)))
+ goto nla_put_failure;
+ }
if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) ||
nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) ||
- nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(set->elements)))
+ nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(elements)))
goto nla_put_failure;
if (unlikely(ip_set_put_flags(skb, set)))
goto nla_put_failure;
@@ -1094,15 +1333,15 @@ mtype_uref(struct ip_set *set, struct netlink_callback *cb, bool start)
if (start) {
rcu_read_lock_bh();
- t = rcu_dereference_bh_nfnl(h->table);
+ t = ipset_dereference_bh_nfnl(h->table);
atomic_inc(&t->uref);
cb->args[IPSET_CB_PRIVATE] = (unsigned long)t;
rcu_read_unlock_bh();
} else if (cb->args[IPSET_CB_PRIVATE]) {
t = (struct htable *)cb->args[IPSET_CB_PRIVATE];
if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) {
- /* Resizing didn't destroy the hash table */
- pr_debug("Table destroy by dump: %p\n", t);
+ pr_debug("Table destroy after resize "
+ " by dump: %p\n", t);
mtype_ahash_destroy(set, t, false);
}
cb->args[IPSET_CB_PRIVATE] = 0;
@@ -1144,8 +1383,7 @@ mtype_list(const struct ip_set *set,
if (!test_bit(i, n->used))
continue;
e = ahash_data(n, i, set->dsize);
- if (SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(e, set)))
+ if (SET_ELEM_EXPIRED(set, e))
continue;
pr_debug("list hash %lu hbucket %p i %u, data %p\n",
cb->args[IPSET_CB_ARG0], n, i, e);
@@ -1211,6 +1449,8 @@ static const struct ip_set_type_variant mtype_variant = {
.uref = mtype_uref,
.resize = mtype_resize,
.same_set = mtype_same_set,
+ .cancel_gc = mtype_cancel_gc,
+ .region_lock = true,
};
#ifdef IP_SET_EMIT_CREATE
@@ -1223,12 +1463,15 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
u32 markmask;
#endif
u8 hbits;
-#ifdef IP_SET_HASH_WITH_NETMASK
- u8 netmask;
+#if defined(IP_SET_HASH_WITH_NETMASK) || defined(IP_SET_HASH_WITH_BITMASK)
+ int ret __attribute__((unused)) = 0;
+ u8 netmask = set->family == NFPROTO_IPV4 ? 32 : 128;
+ union nf_inet_addr bitmask = onesmask;
#endif
size_t hsize;
struct htype *h;
struct htable *t;
+ u32 i;
pr_debug("Create set %s with family %s\n",
set->name, set->family == NFPROTO_IPV4 ? "inet" : "inet6");
@@ -1261,7 +1504,6 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
#endif
#ifdef IP_SET_HASH_WITH_NETMASK
- netmask = set->family == NFPROTO_IPV4 ? 32 : 128;
if (tb[IPSET_ATTR_NETMASK]) {
netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]);
@@ -1269,6 +1511,33 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
(set->family == NFPROTO_IPV6 && netmask > 128) ||
netmask == 0)
return -IPSET_ERR_INVALID_NETMASK;
+
+ /* we convert netmask to bitmask and store it */
+ if (set->family == NFPROTO_IPV4)
+ bitmask.ip = ip_set_netmask(netmask);
+ else
+ ip6_netmask(&bitmask, netmask);
+ }
+#endif
+
+#ifdef IP_SET_HASH_WITH_BITMASK
+ if (tb[IPSET_ATTR_BITMASK]) {
+ /* bitmask and netmask do the same thing, allow only one of these options */
+ if (tb[IPSET_ATTR_NETMASK])
+ return -IPSET_ERR_BITMASK_NETMASK_EXCL;
+
+ if (set->family == NFPROTO_IPV4) {
+ ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_BITMASK], &bitmask.ip);
+ if (ret || !bitmask.ip)
+ return -IPSET_ERR_INVALID_NETMASK;
+ } else if (set->family == NFPROTO_IPV6) {
+ ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_BITMASK], &bitmask);
+ if (ret || ipv6_addr_any(&bitmask.in6))
+ return -IPSET_ERR_INVALID_NETMASK;
+ }
+
+ if (nf_inet_addr_cmp(&bitmask, &zeromask))
+ return -IPSET_ERR_INVALID_NETMASK;
}
#endif
@@ -1286,7 +1555,11 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
if (!h)
return -ENOMEM;
- hbits = htable_bits(hashsize);
+ /* Compute htable_bits from the user input parameter hashsize.
+ * Assume that hashsize == 2^htable_bits,
+ * otherwise round up to the first 2^n value.
+ */
+ hbits = fls(hashsize - 1);
hsize = htable_size(hbits);
if (hsize == 0) {
kfree(h);
@@ -1297,21 +1570,42 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
kfree(h);
return -ENOMEM;
}
+ t->hregion = ip_set_alloc(ahash_sizeof_regions(hbits));
+ if (!t->hregion) {
+ ip_set_free(t);
+ kfree(h);
+ return -ENOMEM;
+ }
+ h->gc.set = set;
+ for (i = 0; i < ahash_numof_locks(hbits); i++)
+ spin_lock_init(&t->hregion[i].lock);
h->maxelem = maxelem;
-#ifdef IP_SET_HASH_WITH_NETMASK
+#if defined(IP_SET_HASH_WITH_NETMASK) || defined(IP_SET_HASH_WITH_BITMASK)
+ h->bitmask = bitmask;
h->netmask = netmask;
#endif
#ifdef IP_SET_HASH_WITH_MARKMASK
h->markmask = markmask;
#endif
- get_random_bytes(&h->initval, sizeof(h->initval));
-
+ if (tb[IPSET_ATTR_INITVAL])
+ h->initval = ntohl(nla_get_be32(tb[IPSET_ATTR_INITVAL]));
+ else
+ get_random_bytes(&h->initval, sizeof(h->initval));
+ h->bucketsize = AHASH_MAX_SIZE;
+ if (tb[IPSET_ATTR_BUCKETSIZE]) {
+ h->bucketsize = nla_get_u8(tb[IPSET_ATTR_BUCKETSIZE]);
+ if (h->bucketsize < AHASH_INIT_SIZE)
+ h->bucketsize = AHASH_INIT_SIZE;
+ else if (h->bucketsize > AHASH_MAX_SIZE)
+ h->bucketsize = AHASH_MAX_SIZE;
+ else if (h->bucketsize % 2)
+ h->bucketsize += 1;
+ }
t->htable_bits = hbits;
+ t->maxelem = h->maxelem / ahash_numof_locks(hbits);
RCU_INIT_POINTER(h->table, t);
-#ifdef HAVE_TIMER_SETUP
- h->set = set;
-#endif
+ INIT_LIST_HEAD(&h->ad);
set->data = h;
#ifndef IP_SET_PROTO_UNDEF
if (set->family == NFPROTO_IPV4) {
@@ -1334,12 +1628,10 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
#ifndef IP_SET_PROTO_UNDEF
if (set->family == NFPROTO_IPV4)
#endif
- IPSET_TOKEN(HTYPE, 4_gc_init)(set,
- IPSET_TOKEN(HTYPE, 4_gc));
+ IPSET_TOKEN(HTYPE, 4_gc_init)(&h->gc);
#ifndef IP_SET_PROTO_UNDEF
else
- IPSET_TOKEN(HTYPE, 6_gc_init)(set,
- IPSET_TOKEN(HTYPE, 6_gc));
+ IPSET_TOKEN(HTYPE, 6_gc_init)(&h->gc);
#endif
}
pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
diff --git a/kernel/net/netfilter/ipset/ip_set_hash_ip.c b/kernel/net/netfilter/ipset/ip_set_hash_ip.c
index 41c576f..5a2cb71 100644
--- a/kernel/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/kernel/net/netfilter/ipset/ip_set_hash_ip.c
@@ -24,7 +24,9 @@
/* 1 Counters support */
/* 2 Comments support */
/* 3 Forceadd support */
-#define IPSET_TYPE_REV_MAX 4 /* skbinfo support */
+/* 4 skbinfo support */
+/* 5 bucketsize, initval support */
+#define IPSET_TYPE_REV_MAX 6 /* bitmask support */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
@@ -34,6 +36,7 @@ MODULE_ALIAS("ip_set_hash:ip");
/* Type specific function prefix */
#define HTYPE hash_ip
#define IP_SET_HASH_WITH_NETMASK
+#define IP_SET_HASH_WITH_BITMASK
/* IPv4 variant */
@@ -86,7 +89,7 @@ hash_ip4_kadt(struct ip_set *set, const struct sk_buff *skb,
__be32 ip;
ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &ip);
- ip &= ip_set_netmask(h->netmask);
+ ip &= h->bitmask.ip;
if (ip == 0)
return -EINVAL;
@@ -98,11 +101,11 @@ static int
hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_ip4 *h = set->data;
+ struct hash_ip4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ip4_elem e = { 0 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip = 0, ip_to = 0, hosts;
+ u32 ip = 0, ip_to = 0, hosts, i = 0;
int ret = 0;
if (tb[IPSET_ATTR_LINENO])
@@ -119,7 +122,7 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
if (ret)
return ret;
- ip &= ip_set_hostmask(h->netmask);
+ ip &= ntohl(h->bitmask.ip);
e.ip = htonl(ip);
if (e.ip == 0)
return -IPSET_ERR_HASH_ELEM;
@@ -132,8 +135,11 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
if (ret)
return ret;
- if (ip > ip_to)
+ if (ip > ip_to) {
+ if (ip_to == 0)
+ return -IPSET_ERR_HASH_ELEM;
swap(ip, ip_to);
+ }
} else if (tb[IPSET_ATTR_CIDR]) {
u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
@@ -144,18 +150,20 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1);
- if (retried) {
+ if (retried)
ip = ntohl(h->next.ip);
+ for (; ip <= ip_to; i++) {
e.ip = htonl(ip);
- }
- for (; ip <= ip_to;) {
+ if (i > IPSET_MAX_RANGE) {
+ hash_ip4_data_next(&h->next, &e);
+ return -ERANGE;
+ }
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
ip += hosts;
- e.ip = htonl(ip);
- if (e.ip == 0)
+ if (ip == 0)
return 0;
ret = 0;
@@ -180,12 +188,6 @@ hash_ip6_data_equal(const struct hash_ip6_elem *ip1,
return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6);
}
-static void
-hash_ip6_netmask(union nf_inet_addr *ip, u8 prefix)
-{
- ip6_netmask(ip, prefix);
-}
-
static bool
hash_ip6_data_list(struct sk_buff *skb, const struct hash_ip6_elem *e)
{
@@ -222,7 +224,7 @@ hash_ip6_kadt(struct ip_set *set, const struct sk_buff *skb,
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
- hash_ip6_netmask(&e.ip, h->netmask);
+ nf_inet_addr_mask_inplace(&e.ip, &h->bitmask);
if (ipv6_addr_any(&e.ip.in6))
return -EINVAL;
@@ -261,7 +263,7 @@ hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[],
if (ret)
return ret;
- hash_ip6_netmask(&e.ip, h->netmask);
+ nf_inet_addr_mask_inplace(&e.ip, &h->bitmask);
if (ipv6_addr_any(&e.ip.in6))
return -IPSET_ERR_HASH_ELEM;
@@ -278,14 +280,17 @@ static struct ip_set_type hash_ip_type __read_mostly = {
.family = NFPROTO_UNSPEC,
.revision_min = IPSET_TYPE_REV_MIN,
.revision_max = IPSET_TYPE_REV_MAX,
+ .create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE,
.create = hash_ip_create,
.create_policy = {
[IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
[IPSET_ATTR_MAXELEM] = { .type = NLA_U32 },
- [IPSET_ATTR_PROBES] = { .type = NLA_U8 },
+ [IPSET_ATTR_INITVAL] = { .type = NLA_U32 },
+ [IPSET_ATTR_BUCKETSIZE] = { .type = NLA_U8 },
[IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
[IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
[IPSET_ATTR_NETMASK] = { .type = NLA_U8 },
+ [IPSET_ATTR_BITMASK] = { .type = NLA_NESTED },
[IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
},
.adt_policy = {
diff --git a/kernel/net/netfilter/ipset/ip_set_hash_ipmac.c b/kernel/net/netfilter/ipset/ip_set_hash_ipmac.c
index eceb7bc..467c59a 100644
--- a/kernel/net/netfilter/ipset/ip_set_hash_ipmac.c
+++ b/kernel/net/netfilter/ipset/ip_set_hash_ipmac.c
@@ -23,7 +23,7 @@
#include <linux/netfilter/ipset/ip_set_hash.h>
#define IPSET_TYPE_REV_MIN 0
-#define IPSET_TYPE_REV_MAX 0
+#define IPSET_TYPE_REV_MAX 1 /* bucketsize, initval support */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Tomasz Chilinski <tomasz.chilinski@chilan.com>");
@@ -268,11 +268,13 @@ static struct ip_set_type hash_ipmac_type __read_mostly = {
.family = NFPROTO_UNSPEC,
.revision_min = IPSET_TYPE_REV_MIN,
.revision_max = IPSET_TYPE_REV_MAX,
+ .create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE,
.create = hash_ipmac_create,
.create_policy = {
[IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
[IPSET_ATTR_MAXELEM] = { .type = NLA_U32 },
- [IPSET_ATTR_PROBES] = { .type = NLA_U8 },
+ [IPSET_ATTR_INITVAL] = { .type = NLA_U32 },
+ [IPSET_ATTR_BUCKETSIZE] = { .type = NLA_U8 },
[IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
[IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
[IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
diff --git a/kernel/net/netfilter/ipset/ip_set_hash_ipmark.c b/kernel/net/netfilter/ipset/ip_set_hash_ipmark.c
index 4ebd21b..ad2a2dc 100644
--- a/kernel/net/netfilter/ipset/ip_set_hash_ipmark.c
+++ b/kernel/net/netfilter/ipset/ip_set_hash_ipmark.c
@@ -23,7 +23,8 @@
#define IPSET_TYPE_REV_MIN 0
/* 1 Forceadd support */
-#define IPSET_TYPE_REV_MAX 2 /* skbinfo support */
+/* 2 skbinfo support */
+#define IPSET_TYPE_REV_MAX 3 /* bucketsize, initval support */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Vytas Dauksa <vytas.dauksa@smoothwall.net>");
@@ -98,11 +99,11 @@ static int
hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_ipmark4 *h = set->data;
+ struct hash_ipmark4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipmark4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip, ip_to = 0;
+ u32 ip, ip_to = 0, i = 0;
int ret;
if (tb[IPSET_ATTR_LINENO])
@@ -122,6 +123,8 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
e.mark = ntohl(nla_get_be32(tb[IPSET_ATTR_MARK]));
e.mark &= h->markmask;
+ if (e.mark == 0 && e.ip == 0)
+ return -IPSET_ERR_HASH_ELEM;
if (adt == IPSET_TEST ||
!(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR])) {
@@ -134,8 +137,11 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
if (ret)
return ret;
- if (ip > ip_to)
+ if (ip > ip_to) {
+ if (e.mark == 0 && ip_to == 0)
+ return -IPSET_ERR_HASH_ELEM;
swap(ip, ip_to);
+ }
} else if (tb[IPSET_ATTR_CIDR]) {
u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
@@ -146,8 +152,12 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
if (retried)
ip = ntohl(h->next.ip);
- for (; ip <= ip_to; ip++) {
+ for (; ip <= ip_to; ip++, i++) {
e.ip = htonl(ip);
+ if (i > IPSET_MAX_RANGE) {
+ hash_ipmark4_data_next(&h->next, &e);
+ return -ERANGE;
+ }
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
@@ -276,12 +286,14 @@ static struct ip_set_type hash_ipmark_type __read_mostly = {
.family = NFPROTO_UNSPEC,
.revision_min = IPSET_TYPE_REV_MIN,
.revision_max = IPSET_TYPE_REV_MAX,
+ .create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE,
.create = hash_ipmark_create,
.create_policy = {
[IPSET_ATTR_MARKMASK] = { .type = NLA_U32 },
[IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
[IPSET_ATTR_MAXELEM] = { .type = NLA_U32 },
- [IPSET_ATTR_PROBES] = { .type = NLA_U8 },
+ [IPSET_ATTR_INITVAL] = { .type = NLA_U32 },
+ [IPSET_ATTR_BUCKETSIZE] = { .type = NLA_U8 },
[IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
[IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
[IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
diff --git a/kernel/net/netfilter/ipset/ip_set_hash_ipport.c b/kernel/net/netfilter/ipset/ip_set_hash_ipport.c
index 7e34bc4..46f8bbf 100644
--- a/kernel/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/kernel/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -26,7 +26,9 @@
/* 2 Counters support added */
/* 3 Comments support added */
/* 4 Forceadd support added */
-#define IPSET_TYPE_REV_MAX 5 /* skbinfo support added */
+/* 5 skbinfo support added */
+/* 6 bucketsize, initval support added */
+#define IPSET_TYPE_REV_MAX 7 /* bitmask support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
@@ -35,6 +37,8 @@ MODULE_ALIAS("ip_set_hash:ip,port");
/* Type specific function prefix */
#define HTYPE hash_ipport
+#define IP_SET_HASH_WITH_NETMASK
+#define IP_SET_HASH_WITH_BITMASK
/* IPv4 variant */
@@ -92,12 +96,16 @@ hash_ipport4_kadt(struct ip_set *set, const struct sk_buff *skb,
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipport4_elem e = { .ip = 0 };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+ const struct MTYPE *h = set->data;
if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
&e.port, &e.proto))
return -EINVAL;
ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
+ e.ip &= h->bitmask.ip;
+ if (e.ip == 0)
+ return -EINVAL;
return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
}
@@ -105,11 +113,11 @@ static int
hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_ipport4 *h = set->data;
+ struct hash_ipport4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipport4_elem e = { .ip = 0 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip, ip_to = 0, p = 0, port, port_to;
+ u32 ip, ip_to = 0, p = 0, port, port_to, i = 0;
bool with_ports = false;
int ret;
@@ -129,6 +137,10 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
if (ret)
return ret;
+ e.ip &= h->bitmask.ip;
+ if (e.ip == 0)
+ return -EINVAL;
+
e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
if (tb[IPSET_ATTR_PROTO]) {
@@ -178,9 +190,13 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
for (; ip <= ip_to; ip++) {
p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
: port;
- for (; p <= port_to; p++) {
+ for (; p <= port_to; p++, i++) {
e.ip = htonl(ip);
e.port = htons(p);
+ if (i > IPSET_MAX_RANGE) {
+ hash_ipport4_data_next(&h->next, &e);
+ return -ERANGE;
+ }
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
@@ -250,12 +266,17 @@ hash_ipport6_kadt(struct ip_set *set, const struct sk_buff *skb,
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipport6_elem e = { .ip = { .all = { 0 } } };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+ const struct MTYPE *h = set->data;
if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
&e.port, &e.proto))
return -EINVAL;
ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
+ nf_inet_addr_mask_inplace(&e.ip, &h->bitmask);
+ if (ipv6_addr_any(&e.ip.in6))
+ return -EINVAL;
+
return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
}
@@ -295,6 +316,10 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
if (ret)
return ret;
+ nf_inet_addr_mask_inplace(&e.ip, &h->bitmask);
+ if (ipv6_addr_any(&e.ip.in6))
+ return -EINVAL;
+
e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
if (tb[IPSET_ATTR_PROTO]) {
@@ -342,15 +367,19 @@ static struct ip_set_type hash_ipport_type __read_mostly = {
.family = NFPROTO_UNSPEC,
.revision_min = IPSET_TYPE_REV_MIN,
.revision_max = IPSET_TYPE_REV_MAX,
+ .create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE,
.create = hash_ipport_create,
.create_policy = {
[IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
[IPSET_ATTR_MAXELEM] = { .type = NLA_U32 },
- [IPSET_ATTR_PROBES] = { .type = NLA_U8 },
+ [IPSET_ATTR_INITVAL] = { .type = NLA_U32 },
+ [IPSET_ATTR_BUCKETSIZE] = { .type = NLA_U8 },
[IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
[IPSET_ATTR_PROTO] = { .type = NLA_U8 },
[IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
[IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
+ [IPSET_ATTR_NETMASK] = { .type = NLA_U8 },
+ [IPSET_ATTR_BITMASK] = { .type = NLA_NESTED },
},
.adt_policy = {
[IPSET_ATTR_IP] = { .type = NLA_NESTED },
diff --git a/kernel/net/netfilter/ipset/ip_set_hash_ipportip.c b/kernel/net/netfilter/ipset/ip_set_hash_ipportip.c
index 81ce821..ca5fe9c 100644
--- a/kernel/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/kernel/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -26,7 +26,8 @@
/* 2 Counters support added */
/* 3 Comments support added */
/* 4 Forceadd support added */
-#define IPSET_TYPE_REV_MAX 5 /* skbinfo support added */
+/* 5 skbinfo support added */
+#define IPSET_TYPE_REV_MAX 6 /* bucketsize, initval support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
@@ -108,11 +109,11 @@ static int
hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_ipportip4 *h = set->data;
+ struct hash_ipportip4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipportip4_elem e = { .ip = 0 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip, ip_to = 0, p = 0, port, port_to;
+ u32 ip, ip_to = 0, p = 0, port, port_to, i = 0;
bool with_ports = false;
int ret;
@@ -185,9 +186,13 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
for (; ip <= ip_to; ip++) {
p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
: port;
- for (; p <= port_to; p++) {
+ for (; p <= port_to; p++, i++) {
e.ip = htonl(ip);
e.port = htons(p);
+ if (i > IPSET_MAX_RANGE) {
+ hash_ipportip4_data_next(&h->next, &e);
+ return -ERANGE;
+ }
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
@@ -357,11 +362,13 @@ static struct ip_set_type hash_ipportip_type __read_mostly = {
.family = NFPROTO_UNSPEC,
.revision_min = IPSET_TYPE_REV_MIN,
.revision_max = IPSET_TYPE_REV_MAX,
+ .create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE,
.create = hash_ipportip_create,
.create_policy = {
[IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
[IPSET_ATTR_MAXELEM] = { .type = NLA_U32 },
- [IPSET_ATTR_PROBES] = { .type = NLA_U8 },
+ [IPSET_ATTR_INITVAL] = { .type = NLA_U32 },
+ [IPSET_ATTR_BUCKETSIZE] = { .type = NLA_U8 },
[IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
[IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
[IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
diff --git a/kernel/net/netfilter/ipset/ip_set_hash_ipportnet.c b/kernel/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 4d4d443..8f44644 100644
--- a/kernel/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/kernel/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -28,7 +28,8 @@
/* 4 Counters support added */
/* 5 Comments support added */
/* 6 Forceadd support added */
-#define IPSET_TYPE_REV_MAX 7 /* skbinfo support added */
+/* 7 skbinfo support added */
+#define IPSET_TYPE_REV_MAX 8 /* bucketsize, initval support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
@@ -160,12 +161,12 @@ static int
hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_ipportnet4 *h = set->data;
+ struct hash_ipportnet4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0, p = 0, port, port_to;
- u32 ip2_from = 0, ip2_to = 0, ip2;
+ u32 ip2_from = 0, ip2_to = 0, ip2, i = 0;
bool with_ports = false;
u8 cidr;
int ret;
@@ -279,9 +280,15 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
for (; p <= port_to; p++) {
e.port = htons(p);
do {
+ i++;
e.ip2 = htonl(ip2);
ip2 = ip_set_range_to_cidr(ip2, ip2_to, &cidr);
e.cidr = cidr - 1;
+ if (i > IPSET_MAX_RANGE) {
+ hash_ipportnet4_data_next(&h->next,
+ &e);
+ return -ERANGE;
+ }
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
@@ -514,11 +521,13 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = {
.family = NFPROTO_UNSPEC,
.revision_min = IPSET_TYPE_REV_MIN,
.revision_max = IPSET_TYPE_REV_MAX,
+ .create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE,
.create = hash_ipportnet_create,
.create_policy = {
[IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
[IPSET_ATTR_MAXELEM] = { .type = NLA_U32 },
- [IPSET_ATTR_PROBES] = { .type = NLA_U8 },
+ [IPSET_ATTR_INITVAL] = { .type = NLA_U32 },
+ [IPSET_ATTR_BUCKETSIZE] = { .type = NLA_U8 },
[IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
[IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
[IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
diff --git a/kernel/net/netfilter/ipset/ip_set_hash_mac.c b/kernel/net/netfilter/ipset/ip_set_hash_mac.c
index 407c158..014bb71 100644
--- a/kernel/net/netfilter/ipset/ip_set_hash_mac.c
+++ b/kernel/net/netfilter/ipset/ip_set_hash_mac.c
@@ -17,7 +17,7 @@
#include <linux/netfilter/ipset/ip_set_hash.h>
#define IPSET_TYPE_REV_MIN 0
-#define IPSET_TYPE_REV_MAX 0
+#define IPSET_TYPE_REV_MAX 1 /* bucketsize, initval support */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
@@ -126,11 +126,13 @@ static struct ip_set_type hash_mac_type __read_mostly = {
.family = NFPROTO_UNSPEC,
.revision_min = IPSET_TYPE_REV_MIN,
.revision_max = IPSET_TYPE_REV_MAX,
+ .create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE,
.create = hash_mac_create,
.create_policy = {
[IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
[IPSET_ATTR_MAXELEM] = { .type = NLA_U32 },
- [IPSET_ATTR_PROBES] = { .type = NLA_U8 },
+ [IPSET_ATTR_INITVAL] = { .type = NLA_U32 },
+ [IPSET_ATTR_BUCKETSIZE] = { .type = NLA_U8 },
[IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
[IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
[IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
diff --git a/kernel/net/netfilter/ipset/ip_set_hash_net.c b/kernel/net/netfilter/ipset/ip_set_hash_net.c
index 923bffb..8baceea 100644
--- a/kernel/net/netfilter/ipset/ip_set_hash_net.c
+++ b/kernel/net/netfilter/ipset/ip_set_hash_net.c
@@ -25,7 +25,8 @@
/* 3 Counters support added */
/* 4 Comments support added */
/* 5 Forceadd support added */
-#define IPSET_TYPE_REV_MAX 6 /* skbinfo mapping support added */
+/* 6 skbinfo support added */
+#define IPSET_TYPE_REV_MAX 7 /* bucketsize, initval support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
@@ -136,11 +137,11 @@ static int
hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_net4 *h = set->data;
+ struct hash_net4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_net4_elem e = { .cidr = HOST_MASK };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip = 0, ip_to = 0;
+ u32 ip = 0, ip_to = 0, i = 0;
int ret;
if (tb[IPSET_ATTR_LINENO])
@@ -188,10 +189,16 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
if (ip + UINT_MAX == ip_to)
return -IPSET_ERR_HASH_RANGE;
}
+
if (retried)
ip = ntohl(h->next.ip);
do {
+ i++;
e.ip = htonl(ip);
+ if (i > IPSET_MAX_RANGE) {
+ hash_net4_data_next(&h->next, &e);
+ return -ERANGE;
+ }
ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
@@ -355,11 +362,13 @@ static struct ip_set_type hash_net_type __read_mostly = {
.family = NFPROTO_UNSPEC,
.revision_min = IPSET_TYPE_REV_MIN,
.revision_max = IPSET_TYPE_REV_MAX,
+ .create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE,
.create = hash_net_create,
.create_policy = {
[IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
[IPSET_ATTR_MAXELEM] = { .type = NLA_U32 },
- [IPSET_ATTR_PROBES] = { .type = NLA_U8 },
+ [IPSET_ATTR_INITVAL] = { .type = NLA_U32 },
+ [IPSET_ATTR_BUCKETSIZE] = { .type = NLA_U8 },
[IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
[IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
[IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
diff --git a/kernel/net/netfilter/ipset/ip_set_hash_netiface.c b/kernel/net/netfilter/ipset/ip_set_hash_netiface.c
index 4202dd2..5baa852 100644
--- a/kernel/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/kernel/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -27,7 +27,8 @@
/* 4 Comments support added */
/* 5 Forceadd support added */
/* 6 skbinfo support added */
-#define IPSET_TYPE_REV_MAX 7 /* interface wildcard support added */
+/* 7 interface wildcard support added */
+#define IPSET_TYPE_REV_MAX 8 /* bucketsize, initval support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
@@ -40,7 +41,7 @@ MODULE_ALIAS("ip_set_hash:net,iface");
#define IP_SET_HASH_WITH_MULTI
#define IP_SET_HASH_WITH_NET0
-#define STRLCPY(a, b) strlcpy(a, b, IFNAMSIZ)
+#define STRSCPY(a, b) strscpy(a, b, IFNAMSIZ)
/* IPv4 variant */
@@ -182,11 +183,11 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
if (!eiface)
return -EINVAL;
- STRLCPY(e.iface, eiface);
+ STRSCPY(e.iface, eiface);
e.physdev = 1;
#endif
} else {
- STRLCPY(e.iface, SRCDIR ? IFACE(in) : IFACE(out));
+ STRSCPY(e.iface, SRCDIR ? IFACE(in) : IFACE(out));
}
if (strlen(e.iface) == 0)
@@ -202,7 +203,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip = 0, ip_to = 0;
+ u32 ip = 0, ip_to = 0, i = 0;
int ret;
if (tb[IPSET_ATTR_LINENO])
@@ -226,7 +227,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
if (e.cidr > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
}
- nla_strlcpy(e.iface, tb[IPSET_ATTR_IFACE], IFNAMSIZ);
+ nla_strscpy(e.iface, tb[IPSET_ATTR_IFACE], IFNAMSIZ);
if (tb[IPSET_ATTR_CADT_FLAGS]) {
u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
@@ -260,7 +261,12 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
if (retried)
ip = ntohl(h->next.ip);
do {
+ i++;
e.ip = htonl(ip);
+ if (i > IPSET_MAX_RANGE) {
+ hash_netiface4_data_next(&h->next, &e);
+ return -ERANGE;
+ }
ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
ret = adtfn(set, &e, &ext, &ext, flags);
@@ -395,11 +401,11 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
if (!eiface)
return -EINVAL;
- STRLCPY(e.iface, eiface);
+ STRSCPY(e.iface, eiface);
e.physdev = 1;
#endif
} else {
- STRLCPY(e.iface, SRCDIR ? IFACE(in) : IFACE(out));
+ STRSCPY(e.iface, SRCDIR ? IFACE(in) : IFACE(out));
}
if (strlen(e.iface) == 0)
@@ -443,7 +449,7 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[],
ip6_netmask(&e.ip, e.cidr);
- nla_strlcpy(e.iface, tb[IPSET_ATTR_IFACE], IFNAMSIZ);
+ nla_strscpy(e.iface, tb[IPSET_ATTR_IFACE], IFNAMSIZ);
if (tb[IPSET_ATTR_CADT_FLAGS]) {
u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
@@ -471,11 +477,13 @@ static struct ip_set_type hash_netiface_type __read_mostly = {
.family = NFPROTO_UNSPEC,
.revision_min = IPSET_TYPE_REV_MIN,
.revision_max = IPSET_TYPE_REV_MAX,
+ .create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE,
.create = hash_netiface_create,
.create_policy = {
[IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
[IPSET_ATTR_MAXELEM] = { .type = NLA_U32 },
- [IPSET_ATTR_PROBES] = { .type = NLA_U8 },
+ [IPSET_ATTR_INITVAL] = { .type = NLA_U32 },
+ [IPSET_ATTR_BUCKETSIZE] = { .type = NLA_U8 },
[IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
[IPSET_ATTR_PROTO] = { .type = NLA_U8 },
[IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
diff --git a/kernel/net/netfilter/ipset/ip_set_hash_netnet.c b/kernel/net/netfilter/ipset/ip_set_hash_netnet.c
index da4ef91..8fbe649 100644
--- a/kernel/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/kernel/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -22,7 +22,9 @@
#define IPSET_TYPE_REV_MIN 0
/* 1 Forceadd support added */
-#define IPSET_TYPE_REV_MAX 2 /* skbinfo support added */
+/* 2 skbinfo support added */
+/* 3 bucketsize, initval support added */
+#define IPSET_TYPE_REV_MAX 4 /* bitmask support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>");
@@ -32,6 +34,8 @@ MODULE_ALIAS("ip_set_hash:net,net");
/* Type specific function prefix */
#define HTYPE hash_netnet
#define IP_SET_HASH_WITH_NETS
+#define IP_SET_HASH_WITH_NETMASK
+#define IP_SET_HASH_WITH_BITMASK
#define IPSET_NET_COUNT 2
/* IPv4 variants */
@@ -152,8 +156,8 @@ hash_netnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip[0]);
ip4addrptr(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.ip[1]);
- e.ip[0] &= ip_set_netmask(e.cidr[0]);
- e.ip[1] &= ip_set_netmask(e.cidr[1]);
+ e.ip[0] &= (ip_set_netmask(e.cidr[0]) & h->bitmask.ip);
+ e.ip[1] &= (ip_set_netmask(e.cidr[1]) & h->bitmask.ip);
return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
}
@@ -162,12 +166,12 @@ static int
hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_netnet4 *h = set->data;
+ struct hash_netnet4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netnet4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0;
- u32 ip2 = 0, ip2_from = 0, ip2_to = 0;
+ u32 ip2 = 0, ip2_from = 0, ip2_to = 0, i = 0;
int ret;
if (tb[IPSET_ATTR_LINENO])
@@ -211,8 +215,8 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
if (adt == IPSET_TEST || !(tb[IPSET_ATTR_IP_TO] ||
tb[IPSET_ATTR_IP2_TO])) {
- e.ip[0] = htonl(ip & ip_set_hostmask(e.cidr[0]));
- e.ip[1] = htonl(ip2_from & ip_set_hostmask(e.cidr[1]));
+ e.ip[0] = htonl(ip & ntohl(h->bitmask.ip) & ip_set_hostmask(e.cidr[0]));
+ e.ip[1] = htonl(ip2_from & ntohl(h->bitmask.ip) & ip_set_hostmask(e.cidr[1]));
ret = adtfn(set, &e, &ext, &ext, flags);
return ip_set_enomatch(ret, flags, adt, set) ? -ret :
ip_set_eexist(ret, flags) ? 0 : ret;
@@ -255,7 +259,12 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
e.ip[0] = htonl(ip);
ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
do {
+ i++;
e.ip[1] = htonl(ip2);
+ if (i > IPSET_MAX_RANGE) {
+ hash_netnet4_data_next(&h->next, &e);
+ return -ERANGE;
+ }
ip2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]);
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
@@ -389,6 +398,11 @@ hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
ip6_netmask(&e.ip[0], e.cidr[0]);
ip6_netmask(&e.ip[1], e.cidr[1]);
+ nf_inet_addr_mask_inplace(&e.ip[0], &h->bitmask);
+ nf_inet_addr_mask_inplace(&e.ip[1], &h->bitmask);
+ if (e.cidr[0] == HOST_MASK && ipv6_addr_any(&e.ip[0].in6))
+ return -EINVAL;
+
return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
}
@@ -399,6 +413,7 @@ hash_netnet6_uadt(struct ip_set *set, struct nlattr *tb[],
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netnet6_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+ const struct hash_netnet6 *h = set->data;
int ret;
if (tb[IPSET_ATTR_LINENO])
@@ -438,6 +453,11 @@ hash_netnet6_uadt(struct ip_set *set, struct nlattr *tb[],
ip6_netmask(&e.ip[0], e.cidr[0]);
ip6_netmask(&e.ip[1], e.cidr[1]);
+ nf_inet_addr_mask_inplace(&e.ip[0], &h->bitmask);
+ nf_inet_addr_mask_inplace(&e.ip[1], &h->bitmask);
+ if (e.cidr[0] == HOST_MASK && ipv6_addr_any(&e.ip[0].in6))
+ return -IPSET_ERR_HASH_ELEM;
+
if (tb[IPSET_ATTR_CADT_FLAGS]) {
u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
@@ -459,14 +479,18 @@ static struct ip_set_type hash_netnet_type __read_mostly = {
.family = NFPROTO_UNSPEC,
.revision_min = IPSET_TYPE_REV_MIN,
.revision_max = IPSET_TYPE_REV_MAX,
+ .create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE,
.create = hash_netnet_create,
.create_policy = {
[IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
[IPSET_ATTR_MAXELEM] = { .type = NLA_U32 },
- [IPSET_ATTR_PROBES] = { .type = NLA_U8 },
+ [IPSET_ATTR_INITVAL] = { .type = NLA_U32 },
+ [IPSET_ATTR_BUCKETSIZE] = { .type = NLA_U8 },
[IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
[IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
[IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
+ [IPSET_ATTR_NETMASK] = { .type = NLA_U8 },
+ [IPSET_ATTR_BITMASK] = { .type = NLA_NESTED },
},
.adt_policy = {
[IPSET_ATTR_IP] = { .type = NLA_NESTED },
diff --git a/kernel/net/netfilter/ipset/ip_set_hash_netport.c b/kernel/net/netfilter/ipset/ip_set_hash_netport.c
index 48ad1c4..8a2a7c5 100644
--- a/kernel/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/kernel/net/netfilter/ipset/ip_set_hash_netport.c
@@ -27,7 +27,8 @@
/* 4 Counters support added */
/* 5 Comments support added */
/* 6 Forceadd support added */
-#define IPSET_TYPE_REV_MAX 7 /* skbinfo support added */
+/* 7 skbinfo support added */
+#define IPSET_TYPE_REV_MAX 8 /* bucketsize, initval support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
@@ -154,11 +155,11 @@ static int
hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_netport4 *h = set->data;
+ struct hash_netport4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 port, port_to, p = 0, ip = 0, ip_to = 0;
+ u32 port, port_to, p = 0, ip = 0, ip_to = 0, i = 0;
bool with_ports = false;
u8 cidr;
int ret;
@@ -246,8 +247,12 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
e.ip = htonl(ip);
ip = ip_set_range_to_cidr(ip, ip_to, &cidr);
e.cidr = cidr - 1;
- for (; p <= port_to; p++) {
+ for (; p <= port_to; p++, i++) {
e.port = htons(p);
+ if (i > IPSET_MAX_RANGE) {
+ hash_netport4_data_next(&h->next, &e);
+ return -ERANGE;
+ }
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
@@ -461,11 +466,13 @@ static struct ip_set_type hash_netport_type __read_mostly = {
.family = NFPROTO_UNSPEC,
.revision_min = IPSET_TYPE_REV_MIN,
.revision_max = IPSET_TYPE_REV_MAX,
+ .create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE,
.create = hash_netport_create,
.create_policy = {
[IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
[IPSET_ATTR_MAXELEM] = { .type = NLA_U32 },
- [IPSET_ATTR_PROBES] = { .type = NLA_U8 },
+ [IPSET_ATTR_INITVAL] = { .type = NLA_U32 },
+ [IPSET_ATTR_BUCKETSIZE] = { .type = NLA_U8 },
[IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
[IPSET_ATTR_PROTO] = { .type = NLA_U8 },
[IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
diff --git a/kernel/net/netfilter/ipset/ip_set_hash_netportnet.c b/kernel/net/netfilter/ipset/ip_set_hash_netportnet.c
index 686cbbc..d5774a1 100644
--- a/kernel/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/kernel/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -24,7 +24,8 @@
#define IPSET_TYPE_REV_MIN 0
/* 0 Comments support added */
/* 1 Forceadd support added */
-#define IPSET_TYPE_REV_MAX 2 /* skbinfo support added */
+/* 2 skbinfo support added */
+#define IPSET_TYPE_REV_MAX 3 /* bucketsize, initval support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>");
@@ -36,6 +37,7 @@ MODULE_ALIAS("ip_set_hash:net,port,net");
#define IP_SET_HASH_WITH_PROTO
#define IP_SET_HASH_WITH_NETS
#define IPSET_NET_COUNT 2
+#define IP_SET_HASH_WITH_NET0
/* IPv4 variant */
@@ -173,16 +175,26 @@ hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
}
+static u32
+hash_netportnet4_range_to_cidr(u32 from, u32 to, u8 *cidr)
+{
+ if (from == 0 && to == UINT_MAX) {
+ *cidr = 0;
+ return to;
+ }
+ return ip_set_range_to_cidr(from, to, cidr);
+}
+
static int
hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_netportnet4 *h = set->data;
+ struct hash_netportnet4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netportnet4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0, p = 0, port, port_to;
- u32 ip2_from = 0, ip2_to = 0, ip2;
+ u32 ip2_from = 0, ip2_to = 0, ip2, i = 0;
bool with_ports = false;
int ret;
@@ -296,13 +308,19 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
do {
e.ip[0] = htonl(ip);
- ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
+ ip = hash_netportnet4_range_to_cidr(ip, ip_to, &e.cidr[0]);
for (; p <= port_to; p++) {
e.port = htons(p);
do {
+ i++;
e.ip[1] = htonl(ip2);
- ip2 = ip_set_range_to_cidr(ip2, ip2_to,
- &e.cidr[1]);
+ if (i > IPSET_MAX_RANGE) {
+ hash_netportnet4_data_next(&h->next,
+ &e);
+ return -ERANGE;
+ }
+ ip2 = hash_netportnet4_range_to_cidr(ip2,
+ ip2_to, &e.cidr[1]);
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
@@ -559,11 +577,13 @@ static struct ip_set_type hash_netportnet_type __read_mostly = {
.family = NFPROTO_UNSPEC,
.revision_min = IPSET_TYPE_REV_MIN,
.revision_max = IPSET_TYPE_REV_MAX,
+ .create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE,
.create = hash_netportnet_create,
.create_policy = {
[IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
[IPSET_ATTR_MAXELEM] = { .type = NLA_U32 },
- [IPSET_ATTR_PROBES] = { .type = NLA_U8 },
+ [IPSET_ATTR_INITVAL] = { .type = NLA_U32 },
+ [IPSET_ATTR_BUCKETSIZE] = { .type = NLA_U8 },
[IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
[IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
[IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
diff --git a/kernel/net/netfilter/ipset/ip_set_list_set.c b/kernel/net/netfilter/ipset/ip_set_list_set.c
index fe862fb..cc2e5b9 100644
--- a/kernel/net/netfilter/ipset/ip_set_list_set.c
+++ b/kernel/net/netfilter/ipset/ip_set_list_set.c
@@ -62,7 +62,7 @@ list_set_ktest(struct ip_set *set, const struct sk_buff *skb,
/* Don't lookup sub-counters at all */
opt->cmdflags &= ~IPSET_FLAG_MATCH_COUNTERS;
if (opt->cmdflags & IPSET_FLAG_SKIP_SUBCOUNTER_UPDATE)
- opt->cmdflags &= ~IPSET_FLAG_SKIP_COUNTER_UPDATE;
+ opt->cmdflags |= IPSET_FLAG_SKIP_COUNTER_UPDATE;
list_for_each_entry_rcu(e, &map->members, list) {
ret = ip_set_test(e->id, skb, par, opt);
if (ret <= 0)
@@ -429,9 +429,6 @@ list_set_destroy(struct ip_set *set)
struct list_set *map = set->data;
struct set_elem *e, *n;
- if (SET_WITH_TIMEOUT(set))
- del_timer_sync(&map->gc);
-
list_for_each_entry_safe(e, n, &map->members, list) {
list_del(&e->list);
ip_set_put_byindex(map->net, e->id);
@@ -548,6 +545,15 @@ list_set_same_set(const struct ip_set *a, const struct ip_set *b)
a->extensions == b->extensions;
}
+static void
+list_set_cancel_gc(struct ip_set *set)
+{
+ struct list_set *map = set->data;
+
+ if (SET_WITH_TIMEOUT(set))
+ timer_shutdown_sync(&map->gc);
+}
+
static const struct ip_set_type_variant set_variant = {
.kadt = list_set_kadt,
.uadt = list_set_uadt,
@@ -561,6 +567,7 @@ static const struct ip_set_type_variant set_variant = {
.head = list_set_head,
.list = list_set_list,
.same_set = list_set_same_set,
+ .cancel_gc = list_set_cancel_gc,
};
static void
diff --git a/kernel/net/netfilter/ipset/pfxlen.c b/kernel/net/netfilter/ipset/pfxlen.c
index 59f6e72..1e264c7 100644
--- a/kernel/net/netfilter/ipset/pfxlen.c
+++ b/kernel/net/netfilter/ipset/pfxlen.c
@@ -1,6 +1,4 @@
#include <linux/version.h>
-#define IP_SET_COMPAT_HEADERS
-#include <linux/netfilter/ipset/ip_set_compat.h>
#include <linux/netfilter/ipset/pfxlen.h>
/* Prefixlen maps for fast conversions, by Jan Engelhardt. */
diff --git a/kernel/net/sched/em_ipset.c b/kernel/net/sched/em_ipset.c
index 96fd4a3..5428f8d 100644
--- a/kernel/net/sched/em_ipset.c
+++ b/kernel/net/sched/em_ipset.c
@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* net/sched/em_ipset.c ipset ematch
*
* Copyright (c) 2012 Florian Westphal <fw@strlen.de>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
*/
#include <linux/gfp.h>
@@ -87,7 +84,7 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em,
#else
#define ACPAR_FAMILY(f) acpar.family = f
#endif
- switch (tc_skb_protocol(skb)) {
+ switch (skb_protocol(skb, true)) {
case htons(ETH_P_IP):
ACPAR_FAMILY(NFPROTO_IPV4);
if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))