From 0d32c5c070f817229110f92d7b31df9a3e4eeec5 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sun, 24 Oct 2010 21:42:48 +0200 Subject: Fixes, cleanups, comments - More comments added to the code - ICMP and ICMPv6 support added to the hash:ip,port, hash:ip,port,ip and hash:ip,port,net types - hash:net and hash:ip,port,net types are reworked - hash:net,port type added - Wrong direction parameters fixed in hash:ip,port - Helps and manpage are updated - More tests added - Ugly macros are rewritten to functions in parse.c (Holger Eitzenberger) - resize related bug in hash types fixed (Holger Eitzenberger) - autoreconf patches by Jan Engelhardt applied - netlink patch minimalized: dumping can be initialized by a second parsing of the message (thanks to David and Patrick for the suggestion) - IPv4/IPv6 address attributes are introduced in order to fix the context (suggested by David) --- kernel/include/linux/netfilter/ip_set.h | 96 ++++++++++-- kernel/include/linux/netfilter/ip_set_bitmap.h | 2 + kernel/include/linux/netfilter/ip_set_chash.h | 186 ++++++++++++++---------- kernel/include/linux/netfilter/ip_set_getport.h | 62 +++++--- kernel/include/linux/netfilter/ip_set_hash.h | 6 +- kernel/include/linux/netfilter/ip_set_kernel.h | 7 - kernel/include/linux/netfilter/ip_set_list.h | 6 + kernel/include/linux/netfilter/ip_set_slist.h | 9 +- kernel/include/linux/netfilter/ip_set_timeout.h | 27 ++-- 9 files changed, 276 insertions(+), 125 deletions(-) (limited to 'kernel/include') diff --git a/kernel/include/linux/netfilter/ip_set.h b/kernel/include/linux/netfilter/ip_set.h index 1c41396..8abf8f8 100644 --- a/kernel/include/linux/netfilter/ip_set.h +++ b/kernel/include/linux/netfilter/ip_set.h @@ -52,7 +52,7 @@ enum { IPSET_ATTR_PROTOCOL, /* 1: Protocol version */ IPSET_ATTR_SETNAME, /* 2: Name of the set */ IPSET_ATTR_TYPENAME, /* 3: Typename */ - IPSET_ATTR_SETNAME2 = IPSET_ATTR_TYPENAME, /* rename/swap */ + IPSET_ATTR_SETNAME2 = IPSET_ATTR_TYPENAME, /* Setname at rename/swap */ IPSET_ATTR_REVISION, /* 4: Settype revision */ IPSET_ATTR_FAMILY, /* 5: Settype family */ IPSET_ATTR_FLAGS, /* 6: Flags at command level */ @@ -77,7 +77,7 @@ enum { IPSET_ATTR_TIMEOUT, /* 6 */ IPSET_ATTR_PROTO, /* 7 */ IPSET_ATTR_CADT_FLAGS, /* 8 */ - IPSET_ATTR_CADT_LINENO = IPSET_ATTR_LINENO, + IPSET_ATTR_CADT_LINENO = IPSET_ATTR_LINENO, /* 9 */ /* Reserve empty slots */ IPSET_ATTR_CADT_MAX = 16, /* Create-only specific attributes */ @@ -108,6 +108,14 @@ enum { }; #define IPSET_ATTR_ADT_MAX (__IPSET_ATTR_ADT_MAX - 1) +/* IP specific attributes */ +enum { + IPSET_ATTR_IPADDR_IPV4 = IPSET_ATTR_UNSPEC + 1, + IPSET_ATTR_IPADDR_IPV6, + __IPSET_ATTR_IPADDR_MAX, +}; +#define IPSET_ATTR_IPADDR_MAX (__IPSET_ATTR_IPADDR_MAX - 1) + /* Error codes */ enum ipset_errno { IPSET_ERR_PRIVATE = 128, @@ -123,16 +131,20 @@ enum ipset_errno { IPSET_ERR_INVALID_FAMILY, IPSET_ERR_TIMEOUT, IPSET_ERR_REFERENCED, + IPSET_ERR_IPADDR_IPV4, + IPSET_ERR_IPADDR_IPV6, /* Type specific error codes */ IPSET_ERR_TYPE_SPECIFIC = 160, }; +/* Flags at command level */ enum ipset_cmd_flags { IPSET_FLAG_BIT_EXIST = 0, IPSET_FLAG_EXIST = (1 << IPSET_FLAG_BIT_EXIST), }; +/* Flags at CADT attribute level */ enum ipset_cadt_flags { IPSET_FLAG_BIT_BEFORE = 0, IPSET_FLAG_BEFORE = (1 << IPSET_FLAG_BIT_BEFORE), @@ -148,9 +160,6 @@ enum ipset_adt { IPSET_CADT_MAX, }; -#define IPSET_IPPROTO_ANY 255 -#define IPSET_IPPROTO_TCPUDP 254 - #ifdef __KERNEL__ #include #include @@ -196,7 +205,8 @@ enum ip_set_feature { IPSET_TYPE_IP2 = (1 << IPSET_TYPE_IP2_FLAG), IPSET_TYPE_NAME_FLAG = 4, IPSET_TYPE_NAME = (1 << IPSET_TYPE_NAME_FLAG), - /* Actually just a flag for dumping */ + /* Strictly speaking not a feature, but a flag for dumping: + * this settype must be dumped last */ IPSET_DUMP_LAST_FLAG = 7, IPSET_DUMP_LAST = (1 << IPSET_DUMP_LAST_FLAG), }; @@ -223,7 +233,7 @@ struct ip_set_type_variant { int (*uadt)(struct ip_set *set, struct nlattr *head, int len, enum ipset_adt adt, u32 *lineno, u32 flags); - /* Low level add/del/test entries */ + /* Low level add/del/test functions */ ipset_adtfn adt[IPSET_ADT_MAX]; /* When adding entries and set is full, try to resize the set */ @@ -241,7 +251,7 @@ struct ip_set_type_variant { struct netlink_callback *cb); /* Return true if "b" set is the same as "a" - * according to the set parameters */ + * according to the create set parameters */ bool (*same_set)(const struct ip_set *a, const struct ip_set *b); }; @@ -285,7 +295,7 @@ struct ip_set { const struct ip_set_type *type; /* The type variant doing the real job */ const struct ip_set_type_variant *variant; - /* The actual INET family */ + /* The actual INET family of the set */ u8 family; /* The type specific data */ void *data; @@ -340,6 +350,7 @@ ip_set_free(void *members) kfree(members); } +/* Ignore IPSET_ERR_EXIST errors if asked to do so? */ static inline bool ip_set_eexist(int ret, u32 flags) { @@ -379,6 +390,52 @@ ip_set_get_n16(const struct nlattr *attr) return attr->nla_type & NLA_F_NET_BYTEORDER ? value : htons(value); } +static const struct nla_policy +ipaddr_policy[IPSET_ATTR_IPADDR_MAX + 1] __read_mostly = { + [IPSET_ATTR_IPADDR_IPV4] = { .type = NLA_U32 }, + [IPSET_ATTR_IPADDR_IPV6] = { .type = NLA_BINARY, + .len = sizeof(struct in6_addr) }, +}; + +static inline int +ip_set_get_ipaddr4(struct nlattr *attr[], int type, u32 *ipaddr) +{ + struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1] = {}; + + if (!attr[type]) + return -IPSET_ERR_PROTOCOL; + + if (nla_parse(tb, IPSET_ATTR_IPADDR_MAX, + nla_data(attr[type]), nla_len(attr[type]), + ipaddr_policy)) + return -IPSET_ERR_PROTOCOL; + if (!tb[IPSET_ATTR_IPADDR_IPV4]) + return -IPSET_ERR_IPADDR_IPV4; + + *ipaddr = ip_set_get_n32(tb[IPSET_ATTR_IPADDR_IPV4]); + return 0; +} + +static inline int +ip_set_get_ipaddr6(struct nlattr *attr[], int type, union nf_inet_addr *ipaddr) +{ + struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1] = {}; + + if (!attr[type]) + return -IPSET_ERR_PROTOCOL; + + if (nla_parse(tb, IPSET_ATTR_IPADDR_MAX, + nla_data(attr[type]), nla_len(attr[type]), + ipaddr_policy)) + return -IPSET_ERR_PROTOCOL; + if (!tb[IPSET_ATTR_IPADDR_IPV6]) + return -IPSET_ERR_IPADDR_IPV6; + + memcpy(ipaddr, nla_data(tb[IPSET_ATTR_IPADDR_IPV6]), + sizeof(struct in6_addr)); + return 0; +} + #define ipset_nest_start(skb, attr) nla_nest_start(skb, attr | NLA_F_NESTED) #define ipset_nest_end(skb, start) nla_nest_end(skb, start) @@ -388,6 +445,27 @@ ip_set_get_n16(const struct nlattr *attr) #define NLA_PUT_NET16(skb, type, value) \ NLA_PUT_BE16(skb, type | NLA_F_NET_BYTEORDER, value) +#define NLA_PUT_IPADDR4(skb, type, ipaddr) \ +do { \ + struct nlattr *__nested = ipset_nest_start(skb, type); \ + \ + if (!__nested) \ + goto nla_put_failure; \ + NLA_PUT_NET32(skb, IPSET_ATTR_IPADDR_IPV4, ipaddr); \ + ipset_nest_end(skb, __nested); \ +} while (0) + +#define NLA_PUT_IPADDR6(skb, type, ipaddrptr) \ +do { \ + struct nlattr *__nested = ipset_nest_start(skb, type); \ + \ + if (!__nested) \ + goto nla_put_failure; \ + NLA_PUT(skb, IPSET_ATTR_IPADDR_IPV6, \ + sizeof(struct in6_addr), ipaddrptr); \ + ipset_nest_end(skb, __nested); \ +} while (0) + /* Get address from skbuff */ static inline u32 ip4addr(const struct sk_buff *skb, bool src) diff --git a/kernel/include/linux/netfilter/ip_set_bitmap.h b/kernel/include/linux/netfilter/ip_set_bitmap.h index 0d067d0..f3bff2c 100644 --- a/kernel/include/linux/netfilter/ip_set_bitmap.h +++ b/kernel/include/linux/netfilter/ip_set_bitmap.h @@ -3,7 +3,9 @@ /* Bitmap type specific error codes */ enum { + /* The element is out of the range of the set */ IPSET_ERR_BITMAP_RANGE = IPSET_ERR_TYPE_SPECIFIC, + /* The range exceeds the size limit of the set type */ IPSET_ERR_BITMAP_RANGE_SIZE, }; diff --git a/kernel/include/linux/netfilter/ip_set_chash.h b/kernel/include/linux/netfilter/ip_set_chash.h index 5e615e4..6fd1d32 100644 --- a/kernel/include/linux/netfilter/ip_set_chash.h +++ b/kernel/include/linux/netfilter/ip_set_chash.h @@ -5,13 +5,11 @@ #include #include -#define CONCAT(a, b, c) a##b##c -#define TOKEN(a, b, c) CONCAT(a, b, c) - -/* Cache friendly hash with resizing when linear searching becomes too long. - * Internally jhash is used with the assumption that the size of the stored - * data is a multiple of sizeof(u32). If storage supports timeout, the - * timeout field must be the last one in the data structure. +/* Cacheline friendly hash with resizing when linear searching becomes too + * long. Internally jhash is used with the assumption that the size of the + * stored data is a multiple of sizeof(u32). If storage supports timeout, + * the timeout field must be the last one in the data structure - that field + * is ignored when computing the hash key. */ /* Number of elements to store in an array block */ @@ -19,9 +17,10 @@ /* Number of arrays: max ARRAY_SIZE * CHAIN_LIMIT "long" chains */ #define CHASH_DEFAULT_CHAIN_LIMIT 3 +/* Book-keeping of the prefixes added to the set */ struct chash_nets { + u8 cidr; /* the different cidr values in the set */ u32 nets; /* number of elements per cidr */ - u8 cidr; /* the cidr values added to the set */ }; struct chash { @@ -37,14 +36,12 @@ struct chash { #ifdef IP_SET_HASH_WITH_NETMASK u8 netmask; /* netmask value for subnets to store */ #endif -#ifdef IP_SET_HASH_WITH_PROTO - u8 proto; /* default protocol for SET target */ -#endif #ifdef IP_SET_HASH_WITH_NETS - struct chash_nets nets[0]; /* book keeping of networks */ + struct chash_nets nets[0]; /* book-keeping of prefixes */ #endif }; +/* Compute htable_bits from the user input parameter hashsize */ static inline u8 htable_bits(u32 hashsize) { @@ -57,34 +54,56 @@ htable_bits(u32 hashsize) return bits; } +#ifdef IP_SET_HASH_WITH_NETS + +#define SET_HOST_MASK(family) (family == AF_INET ? 32 : 128) + +/* Network cidr size book keeping when the hash stores different + * sized networks */ static inline void -add_cidr(struct chash_nets *nets, u8 host_mask, u8 cidr) +add_cidr(struct chash *h, u8 cidr, u8 host_mask) { u8 i; - pr_debug("add_cidr %u", cidr); - for (i = 0; i < host_mask - 1 && nets[i].cidr; i++) { + ++h->nets[cidr-1].nets; + + pr_debug("add_cidr added %u: %u", cidr, h->nets[cidr-1].nets); + + if (h->nets[cidr-1].nets > 1) + return; + + /* New cidr size */ + for (i = 0; i < host_mask && h->nets[i].cidr; i++) { /* Add in increasing prefix order, so larger cidr first */ - if (nets[i].cidr < cidr) - swap(nets[i].cidr, cidr); + if (h->nets[i].cidr < cidr) + swap(h->nets[i].cidr, cidr); } - if (i < host_mask - 1) - nets[i].cidr = cidr; + if (i < host_mask) + h->nets[i].cidr = cidr; } static inline void -del_cidr(struct chash_nets *nets, u8 host_mask, u8 cidr) +del_cidr(struct chash *h, u8 cidr, u8 host_mask) { u8 i; - pr_debug("del_cidr %u", cidr); - for (i = 0; i < host_mask - 2 && nets[i].cidr; i++) { - if (nets[i].cidr == cidr) - nets[i].cidr = cidr = nets[i+1].cidr; + --h->nets[cidr-1].nets; + + pr_debug("del_cidr deleted %u: %u", cidr, h->nets[cidr-1].nets); + + if (h->nets[cidr-1].nets != 0) + return; + + /* All entries with this cidr size deleted, so cleanup h->cidr[] */ + for (i = 0; i < host_mask - 1 && h->nets[i].cidr; i++) { + if (h->nets[i].cidr == cidr) + h->nets[i].cidr = cidr = h->nets[i+1].cidr; } - nets[host_mask - 2].cidr = 0; + h->nets[i - 1].cidr = 0; } +#endif +/* Destroy the hashtable part of the set */ static void chash_destroy(struct slist *t, u8 htable_bits) { @@ -93,12 +112,13 @@ chash_destroy(struct slist *t, u8 htable_bits) for (i = 0; i < jhash_size(htable_bits); i++) slist_for_each_safe(n, tmp, &t[i]) - /* FIXME: slab cache */ + /* FIXME: use slab cache */ kfree(n); ip_set_free(t); } +/* Calculate the actual memory size of the set data */ static size_t chash_memsize(const struct chash *h, size_t dsize, u8 host_mask) { @@ -106,7 +126,7 @@ chash_memsize(const struct chash *h, size_t dsize, u8 host_mask) u32 i; size_t memsize = sizeof(*h) #ifdef IP_SET_HASH_WITH_NETS - + sizeof(struct chash_nets) * (host_mask - 1) + + sizeof(struct chash_nets) * host_mask #endif + jhash_size(h->htable_bits) * sizeof(struct slist); @@ -118,6 +138,7 @@ chash_memsize(const struct chash *h, size_t dsize, u8 host_mask) return memsize; } +/* Flush a hash type of set: destroy all elements */ static void ip_set_hash_flush(struct ip_set *set) { @@ -133,11 +154,12 @@ ip_set_hash_flush(struct ip_set *set) } #ifdef IP_SET_HASH_WITH_NETS memset(h->nets, 0, sizeof(struct chash_nets) - * (set->family == AF_INET ? 31 : 127)); + * SET_HOST_MASK(set->family)); #endif h->elements = 0; } +/* Destroy a hash type of set */ static void ip_set_hash_destroy(struct ip_set *set) { @@ -152,12 +174,15 @@ ip_set_hash_destroy(struct ip_set *set) set->data = NULL; } -#define JHASH2(data, initval, htable_bits) \ -jhash2((u32 *)(data), sizeof(struct type_pf_elem)/sizeof(u32), initval) \ - & jhash_mask(htable_bits) +#define JHASH2(data, initval, htable_bits) \ +(jhash2((u32 *)(data), sizeof(struct type_pf_elem)/sizeof(u32), initval) \ + & jhash_mask(htable_bits)) #endif /* _IP_SET_CHASH_H */ +#define CONCAT(a, b, c) a##b##c +#define TOKEN(a, b, c) CONCAT(a, b, c) + /* Type/family dependent function prototypes */ #define type_pf_data_equal TOKEN(TYPE, PF, _data_equal) @@ -208,10 +233,13 @@ jhash2((u32 *)(data), sizeof(struct type_pf_elem)/sizeof(u32), initval) \ /* Flavour without timeout */ +/* Get the ith element from the array block n */ #define chash_data(n, i) \ (struct type_pf_elem *)((char *)(n) + sizeof(struct slist) \ + (i)*sizeof(struct type_pf_elem)) +/* Add an element to the hash table when resizing the set: + * we spare the maintenance of the internal counters. */ static int type_pf_chash_readd(struct chash *h, struct slist *t, u8 htable_bits, const struct type_pf_elem *value, gfp_t gfp_flags) @@ -240,7 +268,7 @@ type_pf_chash_readd(struct chash *h, struct slist *t, u8 htable_bits, prev->next = (struct slist *) tmp; data = chash_data(tmp, 0); } else { - /* Rehashing */ + /* Trigger rehashing */ return -EAGAIN; } found: @@ -248,13 +276,16 @@ found: return 0; } +/* Delete an element from the hash table: swap it with the last + * element in the hash bucket and free up the array if it was + * completely emptied */ static void type_pf_chash_del_elem(struct chash *h, struct slist *prev, struct slist *n, int i) { struct type_pf_elem *data = chash_data(n, i); struct slist *tmp; - int j; + int j; /* Index in array */ if (n->next != NULL) { for (prev = n, tmp = n->next; @@ -276,8 +307,7 @@ type_pf_chash_del_elem(struct chash *h, struct slist *prev, type_pf_data_swap(data, chash_data(tmp, j)); } #ifdef IP_SET_HASH_WITH_NETS - if (--h->nets[data->cidr-1].nets == 0) - del_cidr(h->nets, HOST_MASK, data->cidr); + del_cidr(h, data->cidr, HOST_MASK); #endif if (j == 0) { prev->next = NULL; @@ -288,6 +318,9 @@ type_pf_chash_del_elem(struct chash *h, struct slist *prev, h->elements--; } +/* Resize a hash: create a new hash table with doubling the hashsize + * and inserting the elements to it. Repeat until we succeed or + * fail due to memory pressures. */ static int type_pf_resize(struct ip_set *set, gfp_t gfp_flags, bool retried) { @@ -299,7 +332,7 @@ type_pf_resize(struct ip_set *set, gfp_t gfp_flags, bool retried) int ret; retry: - ret = 0; + ret = i = 0; htable_bits++; if (!htable_bits) /* In case we have plenty of memory :-) */ @@ -310,8 +343,8 @@ retry: return -ENOMEM; write_lock_bh(&set->lock); - for (i = 0; i < jhash_size(h->htable_bits); i++) { next_slot: + for (; i < jhash_size(h->htable_bits); i++) { slist_for_each(n, &h->htable[i]) { for (j = 0; j < h->array_size; j++) { data = chash_data(n, j); @@ -344,6 +377,8 @@ next_slot: return 0; } +/* Add an element to a hash and update the internal counters when succeeded, + * otherwise report the proper error code. */ static int type_pf_chash_add(struct ip_set *set, void *value, gfp_t gfp_flags, u32 timeout) @@ -356,11 +391,7 @@ type_pf_chash_add(struct ip_set *set, void *value, int i = 0, j = 0; u32 hash; -#ifdef IP_SET_HASH_WITH_NETS - if (h->elements >= h->maxelem || h->nets[d->cidr-1].nets == UINT_MAX) -#else if (h->elements >= h->maxelem) -#endif return -IPSET_ERR_HASH_FULL; hash = JHASH2(value, h->initval, h->htable_bits); @@ -390,13 +421,13 @@ type_pf_chash_add(struct ip_set *set, void *value, found: type_pf_data_copy(data, d); #ifdef IP_SET_HASH_WITH_NETS - if (h->nets[d->cidr-1].nets++ == 0) - add_cidr(h->nets, HOST_MASK, d->cidr); + add_cidr(h, d->cidr, HOST_MASK); #endif h->elements++; return 0; } +/* Delete an element from the hash */ static int type_pf_chash_del(struct ip_set *set, void *value, gfp_t gfp_flags, u32 timeout) @@ -423,6 +454,9 @@ type_pf_chash_del(struct ip_set *set, void *value, } #ifdef IP_SET_HASH_WITH_NETS + +/* Special test function which takes into account the different network + * sizes added to the set */ static inline int type_pf_chash_test_cidrs(struct ip_set *set, struct type_pf_elem *d, @@ -433,11 +467,11 @@ type_pf_chash_test_cidrs(struct ip_set *set, const struct type_pf_elem *data; int i, j = 0; u32 hash; - u8 host_mask = set->family == AF_INET ? 32 : 128; + u8 host_mask = SET_HOST_MASK(set->family); retry: pr_debug("test by nets"); - for (; j < host_mask - 1 && h->nets[j].cidr; j++) { + for (; j < host_mask && h->nets[j].cidr; j++) { type_pf_data_netmask(d, h->nets[j].cidr); hash = JHASH2(d, h->initval, h->htable_bits); slist_for_each(n, &h->htable[hash]) @@ -455,6 +489,7 @@ retry: } #endif +/* Test whether the element is added to the set */ static inline int type_pf_chash_test(struct ip_set *set, void *value, gfp_t gfp_flags, u32 timeout) @@ -465,10 +500,11 @@ type_pf_chash_test(struct ip_set *set, void *value, const struct type_pf_elem *data; int i; u32 hash; -#ifdef IP_SET_HASH_WITH_NETS - u8 host_mask = set->family == AF_INET ? 32 : 128; - if (d->cidr == host_mask) +#ifdef IP_SET_HASH_WITH_NETS + /* If we test an IP address and not a network address, + * try all possible network sizes */ + if (d->cidr == SET_HOST_MASK(set->family)) return type_pf_chash_test_cidrs(set, d, gfp_flags, timeout); #endif @@ -484,6 +520,7 @@ type_pf_chash_test(struct ip_set *set, void *value, return 0; } +/* Reply a HEADER request: fill out the header part of the set */ static int type_pf_head(struct ip_set *set, struct sk_buff *skb) { @@ -507,10 +544,6 @@ type_pf_head(struct ip_set *set, struct sk_buff *skb) #ifdef IP_SET_HASH_WITH_NETMASK if (h->netmask != HOST_MASK) NLA_PUT_U8(skb, IPSET_ATTR_NETMASK, h->netmask); -#endif -#ifdef IP_SET_HASH_WITH_PROTO - if (h->proto != IPSET_IPPROTO_TCPUDP) - NLA_PUT_U8(skb, IPSET_ATTR_PROTO, h->proto); #endif NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(atomic_read(&set->ref) - 1)); @@ -524,6 +557,7 @@ nla_put_failure: return -EFAULT; } +/* Reply a LIST/SAVE request: dump the elements of the specified set */ static int type_pf_list(struct ip_set *set, struct sk_buff *skb, struct netlink_callback *cb) @@ -599,7 +633,8 @@ static const struct ip_set_type_variant type_pf_variant __read_mostly = { /* Flavour with timeout support */ #define chash_tdata(n, i) \ -(struct type_pf_elem *)((char *)(n) + sizeof(struct slist) + (i)*sizeof(struct type_pf_telem)) +(struct type_pf_elem *)((char *)(n) + sizeof(struct slist) \ + + (i)*sizeof(struct type_pf_telem)) static inline u32 type_pf_data_timeout(const struct type_pf_elem *data) @@ -666,7 +701,7 @@ type_pf_chash_treadd(struct chash *h, struct slist *t, u8 htable_bits, prev->next = (struct slist *) tmp; data = chash_tdata(tmp, 0); } else { - /* Rehashing */ + /* Trigger rehashing */ return -EAGAIN; } found: @@ -681,7 +716,7 @@ type_pf_chash_del_telem(struct chash *h, struct slist *prev, { struct type_pf_elem *d, *data = chash_tdata(n, i); struct slist *tmp; - int j; + int j; /* Index in array */ pr_debug("del %u", i); if (n->next != NULL) { @@ -706,8 +741,7 @@ type_pf_chash_del_telem(struct chash *h, struct slist *prev, type_pf_data_swap_timeout(data, d); } #ifdef IP_SET_HASH_WITH_NETS - if (--h->nets[data->cidr-1].nets == 0) - del_cidr(h->nets, HOST_MASK, data->cidr); + del_cidr(h, data->cidr, HOST_MASK); #endif if (j == 0) { prev->next = NULL; @@ -718,6 +752,7 @@ type_pf_chash_del_telem(struct chash *h, struct slist *prev, h->elements--; } +/* Delete expired elements from the hashtable */ static void type_pf_chash_expire(struct chash *h) { @@ -760,7 +795,7 @@ type_pf_tresize(struct ip_set *set, gfp_t gfp_flags, bool retried) } retry: - ret = 0; + ret = i = 0; htable_bits++; if (!htable_bits) /* In case we have plenty of memory :-) */ @@ -771,8 +806,8 @@ retry: return -ENOMEM; write_lock_bh(&set->lock); - for (i = 0; i < jhash_size(h->htable_bits); i++) { next_slot: + for (; i < jhash_size(h->htable_bits); i++) { slist_for_each(n, &h->htable[i]) { for (j = 0; j < h->array_size; j++) { data = chash_tdata(n, j); @@ -781,8 +816,8 @@ next_slot: goto next_slot; } ret = type_pf_chash_treadd(h, t, htable_bits, - data, gfp_flags, - type_pf_data_timeout(data)); + data, gfp_flags, + type_pf_data_timeout(data)); if (ret < 0) { write_unlock_bh(&set->lock); chash_destroy(t, htable_bits); @@ -821,11 +856,7 @@ type_pf_chash_tadd(struct ip_set *set, void *value, if (h->elements >= h->maxelem) /* FIXME: when set is full, we slow down here */ type_pf_chash_expire(h); -#ifdef IP_SET_HASH_WITH_NETS - if (h->elements >= h->maxelem || h->nets[d->cidr-1].nets == UINT_MAX) -#else if (h->elements >= h->maxelem) -#endif return -IPSET_ERR_HASH_FULL; hash = JHASH2(d, h->initval, h->htable_bits); @@ -854,17 +885,14 @@ type_pf_chash_tadd(struct ip_set *set, void *value, return -EAGAIN; } found: - if (type_pf_data_isnull(data)) { + if (type_pf_data_isnull(data)) h->elements++; #ifdef IP_SET_HASH_WITH_NETS - } else { - if (--h->nets[data->cidr-1].nets == 0) - del_cidr(h->nets, HOST_MASK, data->cidr); - } - if (h->nets[d->cidr-1].nets++ == 0) { - add_cidr(h->nets, HOST_MASK, d->cidr); + else + del_cidr(h, data->cidr, HOST_MASK); + + add_cidr(h, d->cidr, HOST_MASK); #endif - } type_pf_data_copy(data, d); type_pf_data_timeout_set(data, timeout); return 0; @@ -908,10 +936,10 @@ type_pf_chash_ttest_cidrs(struct ip_set *set, struct slist *n; int i, j = 0; u32 hash; - u8 host_mask = set->family == AF_INET ? 32 : 128; + u8 host_mask = SET_HOST_MASK(set->family); retry: - for (; j < host_mask - 1 && h->nets[j].cidr; j++) { + for (; j < host_mask && h->nets[j].cidr; j++) { type_pf_data_netmask(d, h->nets[j].cidr); hash = JHASH2(d, h->initval, h->htable_bits); slist_for_each(n, &h->htable[hash]) @@ -938,10 +966,9 @@ type_pf_chash_ttest(struct ip_set *set, void *value, struct slist *n; int i; u32 hash; -#ifdef IP_SET_HASH_WITH_NETS - u8 host_mask = set->family == AF_INET ? 32 : 128; - if (d->cidr == host_mask) +#ifdef IP_SET_HASH_WITH_NETS + if (d->cidr == SET_HOST_MASK(set->family)) return type_pf_chash_ttest_cidrs(set, d, gfp_flags, timeout); #endif @@ -1048,7 +1075,8 @@ type_pf_gc_init(struct ip_set *set) h->gc.function = type_pf_gc; h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ; add_timer(&h->gc); - pr_debug("gc initialized, run in every %u", IPSET_GC_PERIOD(h->timeout)); + pr_debug("gc initialized, run in every %u", + IPSET_GC_PERIOD(h->timeout)); } #undef type_pf_data_equal diff --git a/kernel/include/linux/netfilter/ip_set_getport.h b/kernel/include/linux/netfilter/ip_set_getport.h index cf150d3..e4d469d 100644 --- a/kernel/include/linux/netfilter/ip_set_getport.h +++ b/kernel/include/linux/netfilter/ip_set_getport.h @@ -2,13 +2,14 @@ #define _IP_SET_GETPORT_H #ifdef __KERNEL__ +#include +#include #include #include #define IPSET_INVALID_PORT 65536 /* We must handle non-linear skbs */ - static inline bool get_port(const struct sk_buff *skb, int protocol, unsigned int protooff, bool src, u16 *port, u8 *proto) @@ -38,13 +39,32 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff, *port = src ? uh->source : uh->dest; break; } - default: - if (*proto == IPSET_IPPROTO_TCPUDP) + case IPPROTO_ICMP: { + struct icmphdr _icmph; + const struct icmphdr *ic; + + ic = skb_header_pointer(skb, protooff, sizeof(_icmph), &_icmph); + if (ic == NULL) + return false; + + *port = (ic->type << 8) & ic->code; + break; + } + case IPPROTO_ICMPV6: { + struct icmp6hdr _icmph; + const struct icmp6hdr *ic; + + ic = skb_header_pointer(skb, protooff, sizeof(_icmph), &_icmph); + if (ic == NULL) return false; + + *port = (ic->icmp6_type << 8) & ic->icmp6_code; + break; + } + default: break; } - if (*proto != IPSET_IPPROTO_TCPUDP) - *proto = protocol; + *proto = protocol; return true; } @@ -56,9 +76,6 @@ get_ip4_port(const struct sk_buff *skb, bool src, u16 *port, u8 *proto) unsigned int protooff = ip_hdrlen(skb); int protocol = iph->protocol; - if (!(*proto >= IPSET_IPPROTO_TCPUDP || *proto == protocol)) - return false; - /* See comments at tcp_match in ip_tables.c */ if (ntohs(iph->frag_off) & IP_OFFSET) return false; @@ -77,21 +94,32 @@ get_ip6_port(const struct sk_buff *skb, bool src, u16 *port, u8 *proto) if (protocol < 0 || fragoff) return false; - if (!(*proto >= IPSET_IPPROTO_TCPUDP || *proto == protocol)) - return false; - return get_port(skb, protocol, protooff, src, port, proto); } static inline bool get_ip_port(const struct sk_buff *skb, u8 pf, bool src, u16 *port) { - u8 proto = IPSET_IPPROTO_TCPUDP; - - if (pf == AF_INET) - return get_ip4_port(skb, src, port, &proto); - else - return get_ip6_port(skb, src, port, &proto); + bool ret; + u8 proto; + + switch (pf) { + case AF_INET: + ret = get_ip4_port(skb, src, port, &proto); + case AF_INET6: + ret = get_ip6_port(skb, src, port, &proto); + default: + return false; + } + if (!ret) + return ret; + switch (proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + return true; + default: + return false; + } } #endif /* __KERNEL__ */ diff --git a/kernel/include/linux/netfilter/ip_set_hash.h b/kernel/include/linux/netfilter/ip_set_hash.h index 4003af0..e149a2b 100644 --- a/kernel/include/linux/netfilter/ip_set_hash.h +++ b/kernel/include/linux/netfilter/ip_set_hash.h @@ -1,11 +1,15 @@ #ifndef __IP_SET_HASH_H #define __IP_SET_HASH_H -/* Bitmap type specific error codes */ +/* Hash type specific error codes */ enum { + /* Hash is full */ IPSET_ERR_HASH_FULL = IPSET_ERR_TYPE_SPECIFIC, + /* Null-valued element */ IPSET_ERR_HASH_ELEM, + /* Invalid protocol */ IPSET_ERR_INVALID_PROTO, + /* Protocol missing but must be specified */ IPSET_ERR_MISSING_PROTO, }; diff --git a/kernel/include/linux/netfilter/ip_set_kernel.h b/kernel/include/linux/netfilter/ip_set_kernel.h index 0f04217..d770589 100644 --- a/kernel/include/linux/netfilter/ip_set_kernel.h +++ b/kernel/include/linux/netfilter/ip_set_kernel.h @@ -1,13 +1,6 @@ #ifndef _IP_SET_KERNEL_H #define _IP_SET_KERNEL_H -/* Copyright (C) 2003-2010 Jozsef Kadlecsik - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - #ifdef __KERNEL__ #ifdef CONFIG_DEBUG_KERNEL diff --git a/kernel/include/linux/netfilter/ip_set_list.h b/kernel/include/linux/netfilter/ip_set_list.h index c40643e..9988570 100644 --- a/kernel/include/linux/netfilter/ip_set_list.h +++ b/kernel/include/linux/netfilter/ip_set_list.h @@ -3,11 +3,17 @@ /* List type specific error codes */ enum { + /* Set name to be added/deleted/tested does not exist. */ IPSET_ERR_NAME = IPSET_ERR_TYPE_SPECIFIC, + /* list:set type is not permitted to add */ IPSET_ERR_LOOP, + /* Missing reference set */ IPSET_ERR_BEFORE, + /* Reference set does not exist */ IPSET_ERR_NAMEREF, + /* Set is full */ IPSET_ERR_LIST_FULL, + /* Reference set is not added to the set */ IPSET_ERR_REF_EXIST, }; diff --git a/kernel/include/linux/netfilter/ip_set_slist.h b/kernel/include/linux/netfilter/ip_set_slist.h index abc5afe..3e8d8b0 100644 --- a/kernel/include/linux/netfilter/ip_set_slist.h +++ b/kernel/include/linux/netfilter/ip_set_slist.h @@ -25,7 +25,8 @@ struct slist { pos = pos->next) #define slist_for_each_prev(prev, pos, head) \ - for (prev = head, pos = (head)->next; pos && ({ prefetch(pos->next); 1; }); \ + for (prev = head, pos = (head)->next; \ + pos && ({ prefetch(pos->next); 1; }); \ prev = pos, pos = pos->next) #define slist_for_each_safe(pos, n, head) \ @@ -46,7 +47,8 @@ struct slist { pos = pos->next) /** - * slist_for_each_entry_continue - iterate over a hlist continuing after current point + * slist_for_each_entry_continue - iterate over a hlist continuing + * after current point * @tpos: the type * to use as a loop cursor. * @pos: the &struct slist to use as a loop cursor. * @member: the name of the slist within the struct. @@ -58,7 +60,8 @@ struct slist { pos = pos->next) /** - * slist_for_each_entry_from - iterate over a hlist continuing from current point + * slist_for_each_entry_from - iterate over a hlist continuing + * from current point * @tpos: the type * to use as a loop cursor. * @pos: the &struct slist to use as a loop cursor. * @member: the name of the slist within the struct. diff --git a/kernel/include/linux/netfilter/ip_set_timeout.h b/kernel/include/linux/netfilter/ip_set_timeout.h index bf1cbf6..b917480 100644 --- a/kernel/include/linux/netfilter/ip_set_timeout.h +++ b/kernel/include/linux/netfilter/ip_set_timeout.h @@ -17,7 +17,7 @@ #define IPSET_GC_PERIOD(timeout) \ ((timeout/3) ? min_t(u32, (timeout)/3, IPSET_GC_TIME) : 1) -/* Set is defined without timeout support */ +/* Set is defined without timeout support: timeout value may be 0 */ #define IPSET_NO_TIMEOUT UINT_MAX #define with_timeout(timeout) ((timeout) != IPSET_NO_TIMEOUT) @@ -27,11 +27,14 @@ ip_set_timeout_uget(struct nlattr *tb) { unsigned int timeout = ip_set_get_h32(tb); + /* Userspace supplied TIMEOUT parameter: adjust crazy size */ return timeout == IPSET_NO_TIMEOUT ? IPSET_NO_TIMEOUT - 1 : timeout; } #ifdef IP_SET_BITMAP_TIMEOUT +/* Bitmap specific timeout constants and macros for the entries */ + /* Bitmap entry is unset */ #define IPSET_ELEM_UNSET 0 /* Bitmap entry is set with no timeout value */ @@ -63,6 +66,7 @@ ip_set_timeout_set(u32 timeout) t = timeout * HZ + jiffies; if (t == IPSET_ELEM_UNSET || t == IPSET_ELEM_PERMANENT) + /* Bingo! */ t++; return t; @@ -76,19 +80,23 @@ ip_set_timeout_get(unsigned long timeout) #else +/* Hash specific timeout constants and macros for the entries */ + /* Hash entry is set with no timeout value */ -#define IPSET_ELEM_UNSET 0 +#define IPSET_ELEM_PERMANENT 0 static inline bool ip_set_timeout_test(unsigned long timeout) { - return timeout == IPSET_ELEM_UNSET || time_after(timeout, jiffies); + return timeout == IPSET_ELEM_PERMANENT + || time_after(timeout, jiffies); } static inline bool ip_set_timeout_expired(unsigned long timeout) { - return timeout != IPSET_ELEM_UNSET && time_before(timeout, jiffies); + return timeout != IPSET_ELEM_PERMANENT + && time_before(timeout, jiffies); } static inline unsigned long @@ -97,10 +105,11 @@ ip_set_timeout_set(u32 timeout) unsigned long t; if (!timeout) - return IPSET_ELEM_UNSET; + return IPSET_ELEM_PERMANENT; t = timeout * HZ + jiffies; - if (t == IPSET_ELEM_UNSET) + if (t == IPSET_ELEM_PERMANENT) + /* Bingo! :-) */ t++; return t; @@ -109,10 +118,10 @@ ip_set_timeout_set(u32 timeout) static inline u32 ip_set_timeout_get(unsigned long timeout) { - return timeout == IPSET_ELEM_UNSET ? 0 : (timeout - jiffies)/HZ; + return timeout == IPSET_ELEM_PERMANENT ? 0 : (timeout - jiffies)/HZ; } -#endif /* IP_SET_BITMAP_TIMEOUT */ +#endif /* ! IP_SET_BITMAP_TIMEOUT */ #endif /* __KERNEL__ */ -#endif /*_IP_SET_TIMEOUT_H */ +#endif /* _IP_SET_TIMEOUT_H */ -- cgit v1.2.3