From 3fd6b24ace319b139ec3c4e3031a5f05d21e304e Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 15 Jun 2010 13:30:55 +0200 Subject: ipset 5 in an almost ready state - milestone Reworked protocol and internal interfaces, missing set types added, backward compatibility verified, lots of tests added (and thanks to the tests, bugs fixed), even the manpage is rewritten ;-). Countless changes everywhere... The missing bits before announcing ipset 5: - net namespace support - new iptables/ip6tables extension library - iptables/ip6tables match and target tests (backward/forward compatibility) - tests on catching syntax errors --- kernel/Kbuild | 11 +- kernel/include/linux/netfilter/ip_set.h | 246 ++--- kernel/include/linux/netfilter/ip_set_bitmap.h | 6 +- kernel/include/linux/netfilter/ip_set_chash.h | 1096 +++++++++++++++++++++++ kernel/include/linux/netfilter/ip_set_getport.h | 25 +- kernel/include/linux/netfilter/ip_set_hash.h | 5 +- kernel/include/linux/netfilter/ip_set_jhash.h | 152 ++-- kernel/include/linux/netfilter/ip_set_kernel.h | 20 + kernel/include/linux/netfilter/ip_set_list.h | 21 + kernel/include/linux/netfilter/ip_set_slist.h | 86 ++ kernel/include/linux/netfilter/ip_set_timeout.h | 35 +- kernel/include/linux/netfilter/ipt_set.h | 21 - kernel/include/linux/netfilter/xt_set.h | 55 ++ kernel/ip_set.c | 824 +++++++++++------ kernel/ip_set_bitmap_ip.c | 173 ++-- kernel/ip_set_bitmap_ipmac.c | 486 ++++++---- kernel/ip_set_bitmap_port.c | 141 +-- kernel/ip_set_hash_ip.c | 539 ++++++----- kernel/ip_set_hash_ip_src.c | 473 ---------- kernel/ip_set_hash_ipport.c | 538 ++++++++--- kernel/ip_set_hash_ipportip.c | 580 ++++++++---- kernel/ip_set_hash_ipportnet.c | 696 +++++++++----- kernel/ip_set_hash_net.c | 556 +++++++++--- kernel/ip_set_iptreemap.c | 700 --------------- kernel/ip_set_list_set.c | 731 ++++++++++----- kernel/ip_set_tree_ip.c | 464 ---------- kernel/ipt_SET.c | 242 ----- kernel/ipt_set.c | 238 ----- kernel/xt_set.c | 356 ++++++++ 29 files changed, 5328 insertions(+), 4188 deletions(-) create mode 100644 kernel/include/linux/netfilter/ip_set_chash.h create mode 100644 kernel/include/linux/netfilter/ip_set_kernel.h create mode 100644 kernel/include/linux/netfilter/ip_set_list.h create mode 100644 kernel/include/linux/netfilter/ip_set_slist.h delete mode 100644 kernel/include/linux/netfilter/ipt_set.h create mode 100644 kernel/include/linux/netfilter/xt_set.h delete mode 100644 kernel/ip_set_hash_ip_src.c delete mode 100644 kernel/ip_set_iptreemap.c delete mode 100644 kernel/ip_set_tree_ip.c delete mode 100644 kernel/ipt_SET.c delete mode 100644 kernel/ipt_set.c create mode 100644 kernel/xt_set.c (limited to 'kernel') diff --git a/kernel/Kbuild b/kernel/Kbuild index c171711..9c7771a 100644 --- a/kernel/Kbuild +++ b/kernel/Kbuild @@ -1,14 +1,11 @@ EXTRA_CFLAGS := -I$(M)/include \ -DCONFIG_IP_SET_MAX=$(IP_SET_MAX) -obj-m += ip_set.o -#ipt_set.o ipt_SET.o +obj-m += ip_set.o xt_set.o obj-m += ip_set_bitmap_ip.o ip_set_bitmap_ipmac.o ip_set_bitmap_port.o -obj-m += ip_set_hash_ip.o -#obj-m += ip_set_iphash.o ip_set_nethash.o ip_set_ipporthash.o -#obj-m += ip_set_ipportiphash.o ip_set_ipportnethash.o -#obj-m += ip_set_iptree.o ip_set_iptreemap.o -#obj-m += ip_set_setlist.o +obj-m += ip_set_hash_ip.o ip_set_hash_ipport.o ip_set_hash_ipportip.o +obj-m += ip_set_hash_net.o ip_set_hash_ipportnet.o +obj-m += ip_set_list_set.o # It's for me... incdirs := $(M) $(M)/include/linux/netfilter diff --git a/kernel/include/linux/netfilter/ip_set.h b/kernel/include/linux/netfilter/ip_set.h index d0b47a0..e700503 100644 --- a/kernel/include/linux/netfilter/ip_set.h +++ b/kernel/include/linux/netfilter/ip_set.h @@ -11,14 +11,10 @@ * published by the Free Software Foundation. */ -#if 1 -#define IP_SET_DEBUG -#endif - /* The protocol version */ #define IPSET_PROTOCOL 5 -/* The max length of strings: set and type identifiers */ +/* The max length of strings including NUL: set and type identifiers */ #define IPSET_MAXNAMELEN 32 /* Message types and commands */ @@ -43,6 +39,7 @@ enum ipset_cmd { IPSET_CMD_RESTORE = IPSET_MSG_MAX, /* Enter restore mode */ IPSET_CMD_HELP, /* Get help */ IPSET_CMD_VERSION, /* Get program version */ + IPSET_CMD_QUIT, /* Quit from interactive mode */ IPSET_CMD_MAX, @@ -58,6 +55,7 @@ enum { IPSET_ATTR_SETNAME2 = IPSET_ATTR_TYPENAME, /* rename/swap */ IPSET_ATTR_REVISION, /* Settype revision */ IPSET_ATTR_FAMILY, /* Settype family */ + IPSET_ATTR_FLAGS, /* Flags at command level */ IPSET_ATTR_DATA, /* Nested attributes */ IPSET_ATTR_ADT, /* Multiple data containers */ IPSET_ATTR_LINENO, /* Restore lineno */ @@ -77,8 +75,8 @@ enum { IPSET_ATTR_PORT_FROM = IPSET_ATTR_PORT, IPSET_ATTR_PORT_TO, IPSET_ATTR_TIMEOUT, - IPSET_ATTR_FLAGS, - /* IPSET_ATTR_LINENO */ + IPSET_ATTR_CADT_FLAGS, + IPSET_ATTR_CADT_LINENO = IPSET_ATTR_LINENO, /* Reserve empty slots */ IPSET_ATTR_CADT_MAX = 16, /* Create-only specific attributes */ @@ -123,15 +121,19 @@ enum ipset_errno { IPSET_ERR_INVALID_NETMASK, IPSET_ERR_INVALID_FAMILY, IPSET_ERR_TIMEOUT, + IPSET_ERR_REFERENCED, + /* Type specific error codes */ IPSET_ERR_TYPE_SPECIFIC = 160, }; - -enum ipset_data_flags { + +enum ipset_cmd_flags { IPSET_FLAG_BIT_EXIST = 0, IPSET_FLAG_EXIST = (1 << IPSET_FLAG_BIT_EXIST), - - IPSET_FLAG_BIT_BEFORE = 2, +}; + +enum ipset_cadt_flags { + IPSET_FLAG_BIT_BEFORE = 0, IPSET_FLAG_BEFORE = (1 << IPSET_FLAG_BIT_BEFORE), }; @@ -140,35 +142,13 @@ enum ipset_adt { IPSET_ADD, IPSET_DEL, IPSET_TEST, - IPSET_CREATE, + IPSET_ADT_MAX, + IPSET_CREATE = IPSET_ADT_MAX, IPSET_CADT_MAX, }; -#ifndef __KERNEL__ -#ifdef IP_SET_DEBUG -#include -#include -#include -#define D(format, args...) do { \ - fprintf(stderr, "%s: %s: ", __FILE__, __FUNCTION__); \ - fprintf(stderr, format "\n" , ## args); \ -} while (0) -static inline void -dump_nla(struct nlattr *nla[], int maxlen) -{ - int i; - - for (i = 0; i < maxlen; i++) - D("nla[%u] does%s exist", i, !nla[i] ? " NOT" : ""); -} - -#else -#define D(format, args...) -#define dump_nla(nla, maxlen) -#endif -#endif /* !__KERNEL__ */ - #ifdef __KERNEL__ +#include #include #include #include @@ -176,19 +156,27 @@ dump_nla(struct nlattr *nla[], int maxlen) /* Sets are identified by an index in kernel space. Tweak with ip_set_id_t * and IPSET_INVALID_ID if you want to increase the max number of sets. */ -typedef uint16_t ip_set_id_t; +typedef u16 ip_set_id_t; #define IPSET_INVALID_ID 65535 +enum ip_set_dim { + IPSET_DIM_ZERO = 0, + IPSET_DIM_ONE, + IPSET_DIM_TWO, + IPSET_DIM_THREE, + /* Max dimension in elements. + * If changed, new revision of iptables match/target is required. + */ + IPSET_DIM_MAX = 6, +}; + /* Option flags for kernel operations */ enum ip_set_kopt { - /* Bit 0 is reserved */ - IPSET_SRC_FLAG = 1, - IPSET_SRC = (1 << IPSET_SRC_FLAG), - IPSET_DST_FLAG = 2, - IPSET_DST = (1 << IPSET_DST_FLAG), - IPSET_INV_FLAG = 3, - IPSET_INV = (1 << IPSET_INV_FLAG), + IPSET_INV_MATCH = (1 << IPSET_DIM_ZERO), + IPSET_DIM_ONE_SRC = (1 << IPSET_DIM_ONE), + IPSET_DIM_TWO_SRC = (1 << IPSET_DIM_TWO), + IPSET_DIM_THREE_SRC = (1 << IPSET_DIM_THREE), }; /* Set features */ @@ -203,72 +191,60 @@ enum ip_set_feature { IPSET_TYPE_IP2 = (1 << IPSET_TYPE_IP2_FLAG), IPSET_TYPE_NAME_FLAG = 4, IPSET_TYPE_NAME = (1 << IPSET_TYPE_NAME_FLAG), + /* Actually just a flag for dumping */ + IPSET_DUMP_LAST_FLAG = 7, + IPSET_DUMP_LAST = (1 << IPSET_DUMP_LAST_FLAG), }; +/* Calculate the bytes required to store the inclusive range of a-b */ static inline int -bitmap_bytes(uint32_t a, uint32_t b) +bitmap_bytes(u32 a, u32 b) { return 4 * ((((b - a + 8) / 8) + 3) / 4); } -#define ip_set_printk(format, args...) \ - do { \ - printk("%s: %s: ", __FILE__, __FUNCTION__); \ - printk(format "\n" , ## args); \ - } while (0) - -#if defined(IP_SET_DEBUG) -#define D(format, args...) \ - do { \ - printk("%s: %s (DBG): ", __FILE__, __FUNCTION__);\ - printk(format "\n" , ## args); \ - } while (0) - -static inline void -dump_nla(const struct nlattr * const nla[], int maxlen) -{ - int i; - - for (i = 0; i < maxlen; i++) - printk("nlattr[%u] does%s exist\n", i, nla[i] ? "" : " NOT"); -} -#else -#define D(format, args...) -#define dump_nla(nla, maxlen) -#endif - struct ip_set; +typedef int (*ipset_adtfn)(struct ip_set *set, void *value, + gfp_t gfp_flags, u32 timeout); + /* Set type, variant-specific part */ struct ip_set_type_variant { /* Kernelspace: test/add/del entries */ int (*kadt)(struct ip_set *set, const struct sk_buff * skb, - enum ipset_adt adt, uint8_t pf, const uint8_t *flags); + enum ipset_adt adt, u8 pf, u8 dim, u8 flags); /* Userspace: test/add/del entries */ int (*uadt)(struct ip_set *set, struct nlattr *head, int len, - enum ipset_adt adt, uint32_t *lineno, uint32_t flags); + enum ipset_adt adt, u32 *lineno, u32 flags); + + /* Low level add/del/test entries */ + ipset_adtfn adt[IPSET_ADT_MAX]; /* When adding entries and set is full, try to resize the set */ - int (*resize)(struct ip_set *set, uint8_t retried); + int (*resize)(struct ip_set *set, gfp_t gfp_flags, bool retried); /* Destroy the set */ void (*destroy)(struct ip_set *set); /* Flush the elements */ void (*flush)(struct ip_set *set); - + /* Expire entries before listing */ + void (*expire)(struct ip_set *set); /* List set header data */ int (*head)(struct ip_set *set, struct sk_buff *skb); /* List elements */ int (*list)(struct ip_set *set, struct sk_buff *skb, struct netlink_callback *cb); + + /* Return true if "b" set is the same as "a" + * according to the set parameters */ + bool (*same_set)(const struct ip_set *a, const struct ip_set *b); }; /* Flags for the set type variants */ enum ip_set_type_flags { - IP_SET_FLAG_VMALLOC_BIT = 0, - IP_SET_FLAG_VMALLOC = (1 << IP_SET_FLAG_VMALLOC_BIT), - IP_SET_FLAG_TIMEOUT_BIT = 1, - IP_SET_FLAG_TIMEOUT = (1 << IP_SET_FLAG_TIMEOUT_BIT), + /* Set members created by kmalloc */ + IP_SET_FLAG_KMALLOC_BIT = 0, + IP_SET_FLAG_KMALLOC = (1 << IP_SET_FLAG_KMALLOC_BIT), }; /* The core set type structure */ @@ -278,17 +254,19 @@ struct ip_set_type { /* Typename */ char name[IPSET_MAXNAMELEN]; /* Protocol version */ - uint8_t protocol; + u8 protocol; /* Set features to control swapping */ - uint8_t features; + u8 features; + /* Set type dimension */ + u8 dimension; /* Supported family: may be AF_UNSPEC for both AF_INET/AF_INET6 */ - uint8_t family; + u8 family; /* Type revision */ - uint8_t revision; + u8 revision; /* Create set */ int (*create)(struct ip_set *set, - struct nlattr *head, int len, uint32_t flags); + struct nlattr *head, int len, u32 flags); /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; @@ -310,86 +288,90 @@ struct ip_set { /* The type variant doing the real job */ const struct ip_set_type_variant *variant; /* The actual INET family */ - uint8_t family; + u8 family; /* Set type flags, filled/modified by create/resize */ - uint8_t flags; + u8 flags; /* The type specific data */ void *data; }; /* register and unregister set references */ -extern ip_set_id_t ip_set_get_byname(const char name[IPSET_MAXNAMELEN]); +extern ip_set_id_t ip_set_get_byname(const char *name, struct ip_set **set); extern void ip_set_put_byindex(ip_set_id_t index); +extern const char * ip_set_name_byindex(ip_set_id_t index); +extern ip_set_id_t ip_set_nfnl_get(const char *name); +extern ip_set_id_t ip_set_nfnl_get_byindex(ip_set_id_t index); +extern void ip_set_nfnl_put(ip_set_id_t index); /* API for iptables set match, and SET target */ extern int ip_set_add(ip_set_id_t id, const struct sk_buff *skb, - uint8_t family, const uint8_t *flags); + u8 family, u8 dim, u8 flags); extern int ip_set_del(ip_set_id_t id, const struct sk_buff *skb, - uint8_t family, const uint8_t *flags); + u8 family, u8 dim, u8 flags); extern int ip_set_test(ip_set_id_t id, const struct sk_buff *skb, - uint8_t family, const uint8_t *flags); + u8 family, u8 dim, u8 flags); /* Allocate members */ static inline void * -ip_set_alloc(size_t size, gfp_t gfp_mask, uint8_t *flags) +ip_set_alloc(size_t size, gfp_t gfp_mask, u8 *flags) { - void *members = kzalloc(size, gfp_mask); + void *members = kzalloc(size, gfp_mask | __GFP_NOWARN); if (members) { - *flags &= ~IP_SET_FLAG_VMALLOC; - D("allocated with kmalloc %p", members); + *flags |= IP_SET_FLAG_KMALLOC; + pr_debug("%p: allocated with kmalloc", members); return members; } members = __vmalloc(size, gfp_mask | __GFP_ZERO, PAGE_KERNEL); if (!members) return NULL; - *flags |= IP_SET_FLAG_VMALLOC; - D("allocated with vmalloc %p", members); + *flags &= ~IP_SET_FLAG_KMALLOC; + pr_debug("%p: allocated with vmalloc", members); return members; } static inline void -ip_set_free(void *members, uint8_t flags) +ip_set_free(void *members, u8 flags) { - D("free with %s %p", flags & IP_SET_FLAG_VMALLOC ? "vmalloc" : "kmalloc", - members); - if (flags & IP_SET_FLAG_VMALLOC) - vfree(members); - else + pr_debug("%p: free with %s", members, + flags & IP_SET_FLAG_KMALLOC ? "kmalloc" : "vmalloc"); + if (flags & IP_SET_FLAG_KMALLOC) kfree(members); + else + vfree(members); } /* Useful converters */ -static inline uint32_t +static inline u32 ip_set_get_h32(const struct nlattr *attr) { - uint32_t value = nla_get_u32(attr); + u32 value = nla_get_u32(attr); return attr->nla_type & NLA_F_NET_BYTEORDER ? ntohl(value) : value; } -static inline uint16_t +static inline u16 ip_set_get_h16(const struct nlattr *attr) { - uint16_t value = nla_get_u16(attr); + u16 value = nla_get_u16(attr); return attr->nla_type & NLA_F_NET_BYTEORDER ? ntohs(value) : value; } -static inline uint32_t +static inline u32 ip_set_get_n32(const struct nlattr *attr) { - uint32_t value = nla_get_u32(attr); + u32 value = nla_get_u32(attr); return attr->nla_type & NLA_F_NET_BYTEORDER ? value : htonl(value); } -static inline uint16_t +static inline u16 ip_set_get_n16(const struct nlattr *attr) { - uint16_t value = nla_get_u16(attr); + u16 value = nla_get_u16(attr); return attr->nla_type & NLA_F_NET_BYTEORDER ? value : htons(value); } @@ -404,31 +386,49 @@ ip_set_get_n16(const struct nlattr *attr) NLA_PUT_BE16(skb, type | NLA_F_NET_BYTEORDER, value) /* Get address from skbuff */ -static inline uint32_t -ip4addr(const struct sk_buff *skb, const uint8_t *flags) +static inline u32 +ip4addr(const struct sk_buff *skb, bool src) { - return flags[0] & IPSET_SRC ? ip_hdr(skb)->saddr - : ip_hdr(skb)->daddr; + return src ? ip_hdr(skb)->saddr : ip_hdr(skb)->daddr; } static inline void -ip4addrptr(const struct sk_buff *skb, const uint8_t *flags, uint32_t *addr) +ip4addrptr(const struct sk_buff *skb, bool src, u32 *addr) { - *addr = flags[0] & IPSET_SRC ? ip_hdr(skb)->saddr - : ip_hdr(skb)->daddr; + *addr = src ? ip_hdr(skb)->saddr : ip_hdr(skb)->daddr; } static inline void -ip6addrptr(const struct sk_buff *skb, const uint8_t *flags, - struct in6_addr *addr) +ip6addrptr(const struct sk_buff *skb, bool src, struct in6_addr *addr) { - memcpy(addr, flags[0] & IPSET_SRC ? &ipv6_hdr(skb)->saddr - : &ipv6_hdr(skb)->daddr, + memcpy(addr, src ? &ipv6_hdr(skb)->saddr : &ipv6_hdr(skb)->daddr, sizeof(*addr)); } -#define pack_ip_port(map, ip, port) \ - (port + ((ip - ((map)->first_ip)) << 16)) +/* Interface to iptables/ip6tables */ + +#define SO_IP_SET 83 + +union ip_set_name_index { + char name[IPSET_MAXNAMELEN]; + ip_set_id_t index; +}; + +#define IP_SET_OP_GET_BYNAME 0x00000006 /* Get set index by name */ +struct ip_set_req_get_set { + unsigned op; + unsigned version; + union ip_set_name_index set; +}; + +#define IP_SET_OP_GET_BYINDEX 0x00000007 /* Get set name by index */ +/* Uses ip_set_req_get_set */ + +#define IP_SET_OP_VERSION 0x00000100 /* Ask kernel version */ +struct ip_set_req_version { + unsigned op; + unsigned version; +}; #endif /* __KERNEL__ */ diff --git a/kernel/include/linux/netfilter/ip_set_bitmap.h b/kernel/include/linux/netfilter/ip_set_bitmap.h index 49d0f5c..0d067d0 100644 --- a/kernel/include/linux/netfilter/ip_set_bitmap.h +++ b/kernel/include/linux/netfilter/ip_set_bitmap.h @@ -12,10 +12,10 @@ enum { /* Common functions */ -static inline uint32_t -range_to_mask(uint32_t from, uint32_t to, uint8_t *bits) +static inline u32 +range_to_mask(u32 from, u32 to, u8 *bits) { - uint32_t mask = 0xFFFFFFFE; + u32 mask = 0xFFFFFFFE; *bits = 32; while (--(*bits) > 0 && mask && (to & mask) != from) diff --git a/kernel/include/linux/netfilter/ip_set_chash.h b/kernel/include/linux/netfilter/ip_set_chash.h new file mode 100644 index 0000000..0d77a5d --- /dev/null +++ b/kernel/include/linux/netfilter/ip_set_chash.h @@ -0,0 +1,1096 @@ +#ifndef _IP_SET_CHASH_H +#define _IP_SET_CHASH_H + +#include +#include +#include + +#define CONCAT(a, b, c) a##b##c +#define TOKEN(a, b, c) CONCAT(a, b, c) + +/* Cache friendly hash with resizing when linear searching becomes too long. + * Internally jhash is used with the assumption that the size of the stored + * data is a multiple of sizeof(u32). If storage supports timeout, the + * timeout field must be the last one in the data structure. + */ + +/* Number of elements to store in an array block */ +#define CHASH_DEFAULT_ARRAY_SIZE 4 +/* Number of arrays: max ARRAY_SIZE * CHAIN_LIMIT "long" chains */ +#define CHASH_DEFAULT_CHAIN_LIMIT 3 + +struct chash_nets { + u32 nets; /* number of elements per cidr */ + u8 cidr; /* the cidr values added to the set */ +}; + +struct chash { + struct slist *htable; /* Hashtable of single linked lists */ + u32 maxelem; /* Max elements in the hash */ + u32 elements; /* Current element (vs timeout) */ + u32 initval; /* random jhash init value */ + u32 timeout; /* timeout value, if enabled */ + struct timer_list gc; /* garbage collection when timeout enabled */ + u8 htable_bits; /* size of hash table == 2^htable_bits */ + u8 array_size; /* number of elements in an array */ + u8 chain_limit; /* max number of arrays */ +#ifdef IP_SET_HASH_WITH_NETMASK + u8 netmask; /* netmask value for subnets to store */ +#endif +#ifdef IP_SET_HASH_WITH_NETS + struct chash_nets nets[0]; /* book keeping of networks */ +#endif +}; + +static inline u8 +htable_bits(u32 hashsize) +{ + /* Assume that hashsize == 2^htable_bits */ + u8 bits = fls(hashsize - 1); + if (jhash_size(bits) != hashsize) + /* Round up to the first 2^n value */ + bits = fls(hashsize); + + return bits; +} + +static inline void +add_cidr(struct chash_nets *nets, u8 host_mask, u8 cidr) +{ + u8 i; + + pr_debug("add_cidr %u", cidr); + for (i = 0; i < host_mask - 1 && nets[i].cidr; i++) { + /* Add in increasing prefix order, so larger cidr first */ + if (nets[i].cidr < cidr) + swap(nets[i].cidr, cidr); + } + if (i < host_mask - 1) + nets[i].cidr = cidr; +} + +static inline void +del_cidr(struct chash_nets *nets, u8 host_mask, u8 cidr) +{ + u8 i; + + pr_debug("del_cidr %u", cidr); + for (i = 0; i < host_mask - 2 && nets[i].cidr; i++) { + if (nets[i].cidr == cidr) + nets[i].cidr = cidr = nets[i+1].cidr; + } + nets[host_mask - 2].cidr = 0; +} + +static void +chash_destroy(struct slist *t, u8 htable_bits, u8 flags) +{ + struct slist *n, *tmp; + u32 i; + + for (i = 0; i < jhash_size(htable_bits); i++) + slist_for_each_safe(n, tmp, &t[i]) + /* FIXME: slab cache */ + kfree(n); + + ip_set_free(t, flags); +} + +static size_t +chash_memsize(const struct chash *h, size_t dsize, u8 host_mask) +{ + struct slist *n; + u32 i; + size_t memsize = sizeof(*h) +#ifdef IP_SET_HASH_WITH_NETS + + sizeof(struct chash_nets) * (host_mask - 1) +#endif + + jhash_size(h->htable_bits) * sizeof(struct slist); + + for (i = 0; i < jhash_size(h->htable_bits); i++) + slist_for_each(n, &h->htable[i]) + memsize += sizeof(struct slist) + + h->array_size * dsize; + + return memsize; +} + +static void +ip_set_hash_flush(struct ip_set *set) +{ + struct chash *h = set->data; + struct slist *n, *tmp; + u32 i; + + for (i = 0; i < jhash_size(h->htable_bits); i++) { + slist_for_each_safe(n, tmp, &h->htable[i]) + /* FIXME: slab cache */ + kfree(n); + h->htable[i].next = NULL; + } +#ifdef IP_SET_HASH_WITH_NETS + memset(h->nets, 0, sizeof(struct chash_nets) + * (set->family == AF_INET ? 31 : 127)); +#endif + h->elements = 0; +} + +static void +ip_set_hash_destroy(struct ip_set *set) +{ + struct chash *h = set->data; + + if (with_timeout(h->timeout)) + del_timer_sync(&h->gc); + + chash_destroy(h->htable, h->htable_bits, set->flags); + kfree(h); + + set->data = NULL; +} + +#define JHASH2(data, initval, htable_bits) \ +jhash2((u32 *)(data), sizeof(struct type_pf_elem)/sizeof(u32), initval) \ + & jhash_mask(htable_bits) + +#endif /* _IP_SET_CHASH_H */ + +/* Type/family dependent function prototypes */ + +#define type_pf_data_equal TOKEN(TYPE, PF, _data_equal) +#define type_pf_data_isnull TOKEN(TYPE, PF, _data_isnull) +#define type_pf_data_copy TOKEN(TYPE, PF, _data_copy) +#define type_pf_data_swap TOKEN(TYPE, PF, _data_swap) +#define type_pf_data_zero_out TOKEN(TYPE, PF, _data_zero_out) +#define type_pf_data_netmask TOKEN(TYPE, PF, _data_netmask) +#define type_pf_data_list TOKEN(TYPE, PF, _data_list) +#define type_pf_data_tlist TOKEN(TYPE, PF, _data_tlist) + +#define type_pf_elem TOKEN(TYPE, PF, _elem) +#define type_pf_telem TOKEN(TYPE, PF, _telem) +#define type_pf_data_timeout TOKEN(TYPE, PF, _data_timeout) +#define type_pf_data_expired TOKEN(TYPE, PF, _data_expired) +#define type_pf_data_swap_timeout TOKEN(TYPE, PF, _data_swap_timeout) +#define type_pf_data_timeout_set TOKEN(TYPE, PF, _data_timeout_set) + +#define type_pf_chash_readd TOKEN(TYPE, PF, _chash_readd) +#define type_pf_chash_del_elem TOKEN(TYPE, PF, _chash_del_elem) +#define type_pf_chash_add TOKEN(TYPE, PF, _chash_add) +#define type_pf_chash_del TOKEN(TYPE, PF, _chash_del) +#define type_pf_chash_test_cidrs TOKEN(TYPE, PF, _chash_test_cidrs) +#define type_pf_chash_test TOKEN(TYPE, PF, _chash_test) + +#define type_pf_chash_treadd TOKEN(TYPE, PF, _chash_treadd) +#define type_pf_chash_del_telem TOKEN(TYPE, PF, _chash_del_telem) +#define type_pf_chash_expire TOKEN(TYPE, PF, _chash_expire) +#define type_pf_chash_tadd TOKEN(TYPE, PF, _chash_tadd) +#define type_pf_chash_tdel TOKEN(TYPE, PF, _chash_tdel) +#define type_pf_chash_ttest_cidrs TOKEN(TYPE, PF, _chash_ttest_cidrs) +#define type_pf_chash_ttest TOKEN(TYPE, PF, _chash_ttest) + +#define type_pf_resize TOKEN(TYPE, PF, _resize) +#define type_pf_tresize TOKEN(TYPE, PF, _tresize) +#define type_pf_flush ip_set_hash_flush +#define type_pf_destroy ip_set_hash_destroy +#define type_pf_head TOKEN(TYPE, PF, _head) +#define type_pf_list TOKEN(TYPE, PF, _list) +#define type_pf_tlist TOKEN(TYPE, PF, _tlist) +#define type_pf_same_set TOKEN(TYPE, PF, _same_set) +#define type_pf_kadt TOKEN(TYPE, PF, _kadt) +#define type_pf_uadt TOKEN(TYPE, PF, _uadt) +#define type_pf_gc TOKEN(TYPE, PF, _gc) +#define type_pf_gc_init TOKEN(TYPE, PF, _gc_init) +#define type_pf_variant TOKEN(TYPE, PF, _variant) +#define type_pf_tvariant TOKEN(TYPE, PF, _tvariant) + +/* Flavour without timeout */ + +#define chash_data(n, i) \ +(struct type_pf_elem *)((char *)(n) + sizeof(struct slist) + (i)*sizeof(struct type_pf_elem)) + +static int +type_pf_chash_readd(struct chash *h, struct slist *t, u8 htable_bits, + const struct type_pf_elem *value, gfp_t gfp_flags) +{ + struct slist *n, *prev; + struct type_pf_elem *data; + void *tmp; + int i = 0, j = 0; + u32 hash = JHASH2(value, h->initval, htable_bits); + + slist_for_each_prev(prev, n, &t[hash]) { + for (i = 0; i < h->array_size; i++) { + data = chash_data(n, i); + if (type_pf_data_isnull(data)) { + tmp = n; + goto found; + } + } + j++; + } + if (j < h->chain_limit) { + tmp = kzalloc(h->array_size * sizeof(struct type_pf_elem) + + sizeof(struct slist), gfp_flags); + if (!tmp) + return -ENOMEM; + prev->next = (struct slist *) tmp; + data = chash_data(tmp, 0); + } else { + /* Rehashing */ + return -EAGAIN; + } +found: + type_pf_data_copy(data, value); + return 0; +} + +static void +type_pf_chash_del_elem(struct chash *h, struct slist *prev, + struct slist *n, int i) +{ + struct type_pf_elem *data = chash_data(n, i); + struct slist *tmp; + int j; + + if (n->next != NULL) { + for (prev = n, tmp = n->next; + tmp->next != NULL; + prev = tmp, tmp = tmp->next) + /* Find last array */; + j = 0; + } else { + /* Already at last array */ + tmp = n; + j = i; + } + /* Find last non-empty element */ + for (; j < h->array_size - 1; j++) + if (type_pf_data_isnull(chash_data(tmp, j + 1))) + break; + + if (!(tmp == n && i == j)) { + type_pf_data_swap(data, chash_data(tmp, j)); + } +#ifdef IP_SET_HASH_WITH_NETS + if (--h->nets[data->cidr-1].nets == 0) + del_cidr(h->nets, HOST_MASK, data->cidr); +#endif + if (j == 0) { + prev->next = NULL; + kfree(tmp); + } else + type_pf_data_zero_out(chash_data(tmp, j)); + + h->elements--; +} + +static int +type_pf_resize(struct ip_set *set, gfp_t gfp_flags, bool retried) +{ + struct chash *h = set->data; + u8 htable_bits = h->htable_bits; + struct slist *t, *n; + const struct type_pf_elem *data; + u32 i, j; + u8 oflags, flags; + int ret; + +retry: + ret = 0; + htable_bits++; + if (!htable_bits) + /* In case we have plenty of memory :-) */ + return -IPSET_ERR_HASH_FULL; + t = ip_set_alloc(jhash_size(htable_bits) * sizeof(struct slist), + gfp_flags, &flags); + if (!t) + return -ENOMEM; + + write_lock_bh(&set->lock); + flags = oflags = set->flags; + for (i = 0; i < jhash_size(h->htable_bits); i++) { +next_slot: + slist_for_each(n, &h->htable[i]) { + for (j = 0; j < h->array_size; j++) { + data = chash_data(n, j); + if (type_pf_data_isnull(data)) { + i++; + goto next_slot; + } + ret = type_pf_chash_readd(h, t, htable_bits, + data, gfp_flags); + if (ret < 0) { + write_unlock_bh(&set->lock); + chash_destroy(t, htable_bits, flags); + if (ret == -EAGAIN) + goto retry; + return ret; + } + } + } + } + + n = h->htable; + i = h->htable_bits; + + h->htable = t; + h->htable_bits = htable_bits; + set->flags = flags; + write_unlock_bh(&set->lock); + + chash_destroy(n, i, oflags); + + return 0; +} + +static int +type_pf_chash_add(struct ip_set *set, void *value, + gfp_t gfp_flags, u32 timeout) +{ + struct chash *h = set->data; + const struct type_pf_elem *d = value; + struct slist *n, *prev, *t = h->htable; + struct type_pf_elem *data; + void *tmp; + int i = 0, j = 0; + u32 hash; + +#ifdef IP_SET_HASH_WITH_NETS + if (h->elements >= h->maxelem || h->nets[d->cidr-1].nets == UINT_MAX) +#else + if (h->elements >= h->maxelem) +#endif + return -IPSET_ERR_HASH_FULL; + + hash = JHASH2(value, h->initval, h->htable_bits); + slist_for_each_prev(prev, n, &t[hash]) { + for (i = 0; i < h->array_size; i++) { + data = chash_data(n, i); + if (type_pf_data_isnull(data)) { + tmp = n; + goto found; + } + if (type_pf_data_equal(data, d)) + return -IPSET_ERR_EXIST; + } + j++; + } + if (j < h->chain_limit) { + tmp = kzalloc(h->array_size * sizeof(struct type_pf_elem) + + sizeof(struct slist), gfp_flags); + if (!tmp) + return -ENOMEM; + prev->next = (struct slist *) tmp; + data = chash_data(tmp, 0); + } else { + /* Rehashing */ + return -EAGAIN; + } +found: + type_pf_data_copy(data, d); +#ifdef IP_SET_HASH_WITH_NETS + if (h->nets[d->cidr-1].nets++ == 0) + add_cidr(h->nets, HOST_MASK, d->cidr); +#endif + h->elements++; + return 0; +} + +static int +type_pf_chash_del(struct ip_set *set, void *value, + gfp_t gfp_flags, u32 timeout) +{ + struct chash *h = set->data; + const struct type_pf_elem *d = value; + struct slist *n, *prev; + int i; + struct type_pf_elem *data; + u32 hash = JHASH2(value, h->initval, h->htable_bits); + + slist_for_each_prev(prev, n, &h->htable[hash]) + for (i = 0; i < h->array_size; i++) { + data = chash_data(n, i); + if (type_pf_data_isnull(data)) + return -IPSET_ERR_EXIST; + if (type_pf_data_equal(data, d)) { + type_pf_chash_del_elem(h, prev, n, i); + return 0; + } + } + + return -IPSET_ERR_EXIST; +} + +#ifdef IP_SET_HASH_WITH_NETS +static inline int +type_pf_chash_test_cidrs(struct ip_set *set, + struct type_pf_elem *d, + gfp_t gfp_flags, u32 timeout) +{ + struct chash *h = set->data; + struct slist *n; + const struct type_pf_elem *data; + int i, j = 0; + u32 hash; + u8 host_mask = set->family == AF_INET ? 32 : 128; + +retry: + pr_debug("test by nets"); + for (; j < host_mask - 1 && h->nets[j].cidr; j++) { + type_pf_data_netmask(d, h->nets[j].cidr); + hash = JHASH2(d, h->initval, h->htable_bits); + slist_for_each(n, &h->htable[hash]) + for (i = 0; i < h->array_size; i++) { + data = chash_data(n, i); + if (type_pf_data_isnull(data)) { + j++; + goto retry; + } + if (type_pf_data_equal(data, d)) + return 1; + } + } + return 0; +} +#endif + +static inline int +type_pf_chash_test(struct ip_set *set, void *value, + gfp_t gfp_flags, u32 timeout) +{ + struct chash *h = set->data; + struct type_pf_elem *d = value; + struct slist *n; + const struct type_pf_elem *data; + int i; + u32 hash; +#ifdef IP_SET_HASH_WITH_NETS + u8 host_mask = set->family == AF_INET ? 32 : 128; + + if (d->cidr == host_mask) + return type_pf_chash_test_cidrs(set, d, gfp_flags, timeout); +#endif + + hash = JHASH2(d, h->initval, h->htable_bits); + slist_for_each(n, &h->htable[hash]) + for (i = 0; i < h->array_size; i++) { + data = chash_data(n, i); + if (type_pf_data_isnull(data)) + return 0; + if (type_pf_data_equal(data, d)) + return 1; + } + return 0; +} + +static int +type_pf_head(struct ip_set *set, struct sk_buff *skb) +{ + const struct chash *h = set->data; + struct nlattr *nested; + size_t memsize; + + read_lock_bh(&set->lock); + memsize = chash_memsize(h, with_timeout(h->timeout) + ? sizeof(struct type_pf_telem) + : sizeof(struct type_pf_elem), + set->family == AF_INET ? 32 : 128); + read_unlock_bh(&set->lock); + + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) + goto nla_put_failure; + NLA_PUT_NET32(skb, IPSET_ATTR_HASHSIZE, + htonl(jhash_size(h->htable_bits))); + NLA_PUT_NET32(skb, IPSET_ATTR_MAXELEM, htonl(h->maxelem)); +#ifdef IP_SET_HASH_WITH_NETMASK + if (h->netmask != HOST_MASK) + NLA_PUT_U8(skb, IPSET_ATTR_NETMASK, h->netmask); +#endif + NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, + htonl(atomic_read(&set->ref) - 1)); + NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)); + if (with_timeout(h->timeout)) + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(h->timeout)); + ipset_nest_end(skb, nested); + + return 0; +nla_put_failure: + return -EFAULT; +} + +static int +type_pf_list(struct ip_set *set, + struct sk_buff *skb, struct netlink_callback *cb) +{ + const struct chash *h = set->data; + struct nlattr *atd, *nested; + struct slist *n; + const struct type_pf_elem *data; + u32 first = cb->args[2]; + int i; + + atd = ipset_nest_start(skb, IPSET_ATTR_ADT); + if (!atd) + return -EFAULT; + pr_debug("list hash set %s", set->name); + for (; cb->args[2] < jhash_size(h->htable_bits); cb->args[2]++) { + slist_for_each(n, &h->htable[cb->args[2]]) { + for (i = 0; i < h->array_size; i++) { + data = chash_data(n, i); + if (type_pf_data_isnull(data)) + break; + pr_debug("list hash %lu slist %p i %u", + cb->args[2], n, i); + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) { + if (cb->args[2] == first) { + nla_nest_cancel(skb, atd); + return -EFAULT; + } else + goto nla_put_failure; + } + if (type_pf_data_list(skb, data)) + goto nla_put_failure; + ipset_nest_end(skb, nested); + } + } + } + ipset_nest_end(skb, atd); + /* Set listing finished */ + cb->args[2] = 0; + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nested); + ipset_nest_end(skb, atd); + return 0; +} + +static int +type_pf_kadt(struct ip_set *set, const struct sk_buff * skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags); +static int +type_pf_uadt(struct ip_set *set, struct nlattr *head, int len, + enum ipset_adt adt, u32 *lineno, u32 flags); + +static const struct ip_set_type_variant type_pf_variant __read_mostly = { + .kadt = type_pf_kadt, + .uadt = type_pf_uadt, + .adt = { + [IPSET_ADD] = type_pf_chash_add, + [IPSET_DEL] = type_pf_chash_del, + [IPSET_TEST] = type_pf_chash_test, + }, + .destroy = type_pf_destroy, + .flush = type_pf_flush, + .head = type_pf_head, + .list = type_pf_list, + .resize = type_pf_resize, + .same_set = type_pf_same_set, +}; + +/* Flavour with timeout support */ + +#define chash_tdata(n, i) \ +(struct type_pf_elem *)((char *)(n) + sizeof(struct slist) + (i)*sizeof(struct type_pf_telem)) + +static inline u32 +type_pf_data_timeout(const struct type_pf_elem *data) +{ + const struct type_pf_telem *tdata = + (const struct type_pf_telem *) data; + + return tdata->timeout; +} + +static inline bool +type_pf_data_expired(const struct type_pf_elem *data) +{ + const struct type_pf_telem *tdata = + (const struct type_pf_telem *) data; + + return ip_set_timeout_expired(tdata->timeout); +} + +static inline void +type_pf_data_swap_timeout(struct type_pf_elem *src, + struct type_pf_elem *dst) +{ + struct type_pf_telem *x = (struct type_pf_telem *) src; + struct type_pf_telem *y = (struct type_pf_telem *) dst; + + swap(x->timeout, y->timeout); +} + +static inline void +type_pf_data_timeout_set(struct type_pf_elem *data, u32 timeout) +{ + struct type_pf_telem *tdata = (struct type_pf_telem *) data; + + tdata->timeout = ip_set_timeout_set(timeout); +} + +static int +type_pf_chash_treadd(struct chash *h, struct slist *t, u8 htable_bits, + const struct type_pf_elem *value, + gfp_t gfp_flags, u32 timeout) +{ + struct slist *n, *prev; + struct type_pf_elem *data; + void *tmp; + int i = 0, j = 0; + u32 hash = JHASH2(value, h->initval, htable_bits); + + slist_for_each_prev(prev, n, &t[hash]) { + for (i = 0; i < h->array_size; i++) { + data = chash_tdata(n, i); + if (type_pf_data_isnull(data)) { + tmp = n; + goto found; + } + } + j++; + } + if (j < h->chain_limit) { + tmp = kzalloc(h->array_size * sizeof(struct type_pf_telem) + + sizeof(struct slist), gfp_flags); + if (!tmp) + return -ENOMEM; + prev->next = (struct slist *) tmp; + data = chash_tdata(tmp, 0); + } else { + /* Rehashing */ + return -EAGAIN; + } +found: + type_pf_data_copy(data, value); + type_pf_data_timeout_set(data, timeout); + return 0; +} + +static void +type_pf_chash_del_telem(struct chash *h, struct slist *prev, + struct slist *n, int i) +{ + struct type_pf_elem *d, *data = chash_tdata(n, i); + struct slist *tmp; + int j; + + pr_debug("del %u", i); + if (n->next != NULL) { + for (prev = n, tmp = n->next; + tmp->next != NULL; + prev = tmp, tmp = tmp->next) + /* Find last array */; + j = 0; + } else { + /* Already at last array */ + tmp = n; + j = i; + } + /* Find last non-empty element */ + for (; j < h->array_size - 1; j++) + if (type_pf_data_isnull(chash_tdata(tmp, j + 1))) + break; + + d = chash_tdata(tmp, j); + if (!(tmp == n && i == j)) { + type_pf_data_swap(data, d); + type_pf_data_swap_timeout(data, d); + } +#ifdef IP_SET_HASH_WITH_NETS + if (--h->nets[data->cidr-1].nets == 0) + del_cidr(h->nets, HOST_MASK, data->cidr); +#endif + if (j == 0) { + prev->next = NULL; + kfree(tmp); + } else + type_pf_data_zero_out(d); + + h->elements--; +} + +static void +type_pf_chash_expire(struct chash *h) +{ + struct slist *n, *prev; + struct type_pf_elem *data; + u32 i; + int j; + + for (i = 0; i < jhash_size(h->htable_bits); i++) + slist_for_each_prev(prev, n, &h->htable[i]) + for (j = 0; j < h->array_size; j++) { + data = chash_tdata(n, j); + if (type_pf_data_isnull(data)) + break; + if (type_pf_data_expired(data)) { + pr_debug("expire %u/%u", i, j); + type_pf_chash_del_telem(h, prev, n, j); + } + } +} + +static int +type_pf_tresize(struct ip_set *set, gfp_t gfp_flags, bool retried) +{ + struct chash *h = set->data; + u8 htable_bits = h->htable_bits; + struct slist *t, *n; + const struct type_pf_elem *data; + u32 i, j; + u8 oflags, flags; + int ret; + + /* Try to cleanup once */ + if (!retried) { + i = h->elements; + write_lock_bh(&set->lock); + type_pf_chash_expire(set->data); + write_unlock_bh(&set->lock); + if (h->elements < i) + return 0; + } + +retry: + ret = 0; + htable_bits++; + if (!htable_bits) + /* In case we have plenty of memory :-) */ + return -IPSET_ERR_HASH_FULL; + t = ip_set_alloc(jhash_size(htable_bits) * sizeof(struct slist), + gfp_flags, &flags); + if (!t) + return -ENOMEM; + + write_lock_bh(&set->lock); + flags = oflags = set->flags; + for (i = 0; i < jhash_size(h->htable_bits); i++) { +next_slot: + slist_for_each(n, &h->htable[i]) { + for (j = 0; j < h->array_size; j++) { + data = chash_tdata(n, j); + if (type_pf_data_isnull(data)) { + i++; + goto next_slot; + } + ret = type_pf_chash_treadd(h, t, htable_bits, + data, gfp_flags, + type_pf_data_timeout(data)); + if (ret < 0) { + write_unlock_bh(&set->lock); + chash_destroy(t, htable_bits, flags); + if (ret == -EAGAIN) + goto retry; + return ret; + } + } + } + } + + n = h->htable; + i = h->htable_bits; + + h->htable = t; + h->htable_bits = htable_bits; + set->flags = flags; + write_unlock_bh(&set->lock); + + chash_destroy(n, i, oflags); + + return 0; +} + +static int +type_pf_chash_tadd(struct ip_set *set, void *value, + gfp_t gfp_flags, u32 timeout) +{ + struct chash *h = set->data; + const struct type_pf_elem *d = value; + struct slist *n, *prev, *t = h->htable; + struct type_pf_elem *data; + void *tmp; + int i = 0, j = 0; + u32 hash; + + if (h->elements >= h->maxelem) + /* FIXME: when set is full, we slow down here */ + type_pf_chash_expire(h); +#ifdef IP_SET_HASH_WITH_NETS + if (h->elements >= h->maxelem || h->nets[d->cidr-1].nets == UINT_MAX) +#else + if (h->elements >= h->maxelem) +#endif + return -IPSET_ERR_HASH_FULL; + + hash = JHASH2(d, h->initval, h->htable_bits); + slist_for_each_prev(prev, n, &t[hash]) { + for (i = 0; i < h->array_size; i++) { + data = chash_tdata(n, i); + if (type_pf_data_isnull(data) + || type_pf_data_expired(data)) { + tmp = n; + goto found; + } + if (type_pf_data_equal(data, d)) + return -IPSET_ERR_EXIST; + } + j++; + } + if (j < h->chain_limit) { + tmp = kzalloc(h->array_size * sizeof(struct type_pf_telem) + + sizeof(struct slist), gfp_flags); + if (!tmp) + return -ENOMEM; + prev->next = (struct slist *) tmp; + data = chash_tdata(tmp, 0); + } else { + /* Rehashing */ + return -EAGAIN; + } +found: + if (type_pf_data_isnull(data)) { + h->elements++; +#ifdef IP_SET_HASH_WITH_NETS + } else { + if (--h->nets[data->cidr-1].nets == 0) + del_cidr(h->nets, HOST_MASK, data->cidr); + } + if (h->nets[d->cidr-1].nets++ == 0) { + add_cidr(h->nets, HOST_MASK, d->cidr); +#endif + } + type_pf_data_copy(data, d); + type_pf_data_timeout_set(data, timeout); + return 0; +} + +static int +type_pf_chash_tdel(struct ip_set *set, void *value, + gfp_t gfp_flags, u32 timeout) +{ + struct chash *h = set->data; + const struct type_pf_elem *d = value; + struct slist *n, *prev; + int i, ret = 0; + struct type_pf_elem *data; + u32 hash = JHASH2(value, h->initval, h->htable_bits); + + slist_for_each_prev(prev, n, &h->htable[hash]) + for (i = 0; i < h->array_size; i++) { + data = chash_tdata(n, i); + if (type_pf_data_isnull(data)) + return -IPSET_ERR_EXIST; + if (type_pf_data_equal(data, d)) { + if (type_pf_data_expired(data)) + ret = -IPSET_ERR_EXIST; + type_pf_chash_del_telem(h, prev, n, i); + return ret; + } + } + + return -IPSET_ERR_EXIST; +} + +#ifdef IP_SET_HASH_WITH_NETS +static inline int +type_pf_chash_ttest_cidrs(struct ip_set *set, + struct type_pf_elem *d, + gfp_t gfp_flags, u32 timeout) +{ + struct chash *h = set->data; + struct type_pf_elem *data; + struct slist *n; + int i, j = 0; + u32 hash; + u8 host_mask = set->family == AF_INET ? 32 : 128; + +retry: + for (; j < host_mask - 1 && h->nets[j].cidr; j++) { + type_pf_data_netmask(d, h->nets[j].cidr); + hash = JHASH2(d, h->initval, h->htable_bits); + slist_for_each(n, &h->htable[hash]) + for (i = 0; i < h->array_size; i++) { + data = chash_tdata(n, i); + if (type_pf_data_isnull(data)) { + j++; + goto retry; + } + if (type_pf_data_equal(data, d)) + return !type_pf_data_expired(data); + } + } + return 0; +} +#endif + +static inline int +type_pf_chash_ttest(struct ip_set *set, void *value, + gfp_t gfp_flags, u32 timeout) +{ + struct chash *h = set->data; + struct type_pf_elem *data, *d = value; + struct slist *n; + int i; + u32 hash; +#ifdef IP_SET_HASH_WITH_NETS + u8 host_mask = set->family == AF_INET ? 32 : 128; + + if (d->cidr == host_mask) + return type_pf_chash_ttest_cidrs(set, d, gfp_flags, + timeout); +#endif + hash = JHASH2(d, h->initval, h->htable_bits); + slist_for_each(n, &h->htable[hash]) + for (i = 0; i < h->array_size; i++) { + data = chash_tdata(n, i); + if (type_pf_data_isnull(data)) + return 0; + if (type_pf_data_equal(data, d)) + return !type_pf_data_expired(data); + } + return 0; +} + +static int +type_pf_tlist(struct ip_set *set, + struct sk_buff *skb, struct netlink_callback *cb) +{ + const struct chash *h = set->data; + struct nlattr *atd, *nested; + struct slist *n; + const struct type_pf_elem *data; + u32 first = cb->args[2]; + int i; + + atd = ipset_nest_start(skb, IPSET_ATTR_ADT); + if (!atd) + return -EFAULT; + for (; cb->args[2] < jhash_size(h->htable_bits); cb->args[2]++) { + slist_for_each(n, &h->htable[cb->args[2]]) { + for (i = 0; i < h->array_size; i++) { + data = chash_tdata(n, i); + pr_debug("list %p %u", n, i); + if (type_pf_data_isnull(data)) + break; + if (type_pf_data_expired(data)) + continue; + pr_debug("do list %p %u", n, i); + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) { + if (cb->args[2] == first) { + nla_nest_cancel(skb, atd); + return -EFAULT; + } else + goto nla_put_failure; + } + if (type_pf_data_tlist(skb, data)) + goto nla_put_failure; + ipset_nest_end(skb, nested); + } + } + } + ipset_nest_end(skb, atd); + /* Set listing finished */ + cb->args[2] = 0; + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nested); + ipset_nest_end(skb, atd); + return 0; +} + +static const struct ip_set_type_variant type_pf_tvariant __read_mostly = { + .kadt = type_pf_kadt, + .uadt = type_pf_uadt, + .adt = { + [IPSET_ADD] = type_pf_chash_tadd, + [IPSET_DEL] = type_pf_chash_tdel, + [IPSET_TEST] = type_pf_chash_ttest, + }, + .destroy = type_pf_destroy, + .flush = type_pf_flush, + .head = type_pf_head, + .list = type_pf_tlist, + .resize = type_pf_tresize, + .same_set = type_pf_same_set, +}; + +static void +type_pf_gc(unsigned long ul_set) +{ + struct ip_set *set = (struct ip_set *) ul_set; + struct chash *h = set->data; + + pr_debug("called"); + write_lock_bh(&set->lock); + type_pf_chash_expire(h); + write_unlock_bh(&set->lock); + + h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ; + add_timer(&h->gc); +} + +static inline void +type_pf_gc_init(struct ip_set *set) +{ + struct chash *h = set->data; + + init_timer(&h->gc); + h->gc.data = (unsigned long) set; + h->gc.function = type_pf_gc; + h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ; + add_timer(&h->gc); + pr_debug("gc initialized, run in every %u", IPSET_GC_PERIOD(h->timeout)); +} + +#undef type_pf_data_equal +#undef type_pf_data_isnull +#undef type_pf_data_copy +#undef type_pf_data_swap +#undef type_pf_data_zero_out +#undef type_pf_data_list +#undef type_pf_data_tlist + +#undef type_pf_elem +#undef type_pf_telem +#undef type_pf_data_timeout +#undef type_pf_data_expired +#undef type_pf_data_swap_timeout +#undef type_pf_data_netmask +#undef type_pf_data_timeout_set + +#undef type_pf_chash_readd +#undef type_pf_chash_del_elem +#undef type_pf_chash_add +#undef type_pf_chash_del +#undef type_pf_chash_test_cidrs +#undef type_pf_chash_test + +#undef type_pf_chash_treadd +#undef type_pf_chash_del_telem +#undef type_pf_chash_expire +#undef type_pf_chash_tadd +#undef type_pf_chash_tdel +#undef type_pf_chash_ttest_cidrs +#undef type_pf_chash_ttest + +#undef type_pf_resize +#undef type_pf_tresize +#undef type_pf_flush +#undef type_pf_destroy +#undef type_pf_head +#undef type_pf_list +#undef type_pf_tlist +#undef type_pf_same_set +#undef type_pf_kadt +#undef type_pf_uadt +#undef type_pf_gc +#undef type_pf_gc_init +#undef type_pf_variant +#undef type_pf_tvariant diff --git a/kernel/include/linux/netfilter/ip_set_getport.h b/kernel/include/linux/netfilter/ip_set_getport.h index 855f12a..ffa89f1 100644 --- a/kernel/include/linux/netfilter/ip_set_getport.h +++ b/kernel/include/linux/netfilter/ip_set_getport.h @@ -8,8 +8,8 @@ #define IPSET_INVALID_PORT 65536 /* We must handle non-linear skbs */ -static uint32_t -get_port(uint8_t pf, const struct sk_buff *skb, const uint8_t *flags) +static bool +get_port(u8 pf, const struct sk_buff *skb, bool src, u16 *port) { unsigned short protocol; unsigned int protoff; @@ -30,19 +30,19 @@ get_port(uint8_t pf, const struct sk_buff *skb, const uint8_t *flags) protohdr = ipv6_find_hdr(skb, &protoff, -1, &frag_off); if (protohdr < 0) - return IPSET_INVALID_PORT; + return false; protocol = protohdr; fragoff = frag_off; break; } default: - return IPSET_INVALID_PORT; + return false; } /* See comments at tcp_match in ip_tables.c */ if (fragoff) - return IPSET_INVALID_PORT; + return false; switch (protocol) { case IPPROTO_TCP: { @@ -52,9 +52,10 @@ get_port(uint8_t pf, const struct sk_buff *skb, const uint8_t *flags) th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph); if (th == NULL) /* No choice either */ - return IPSET_INVALID_PORT; + return false; - return flags[0] & IPSET_SRC ? th->source : th->dest; + *port = src ? th->source : th->dest; + break; } case IPPROTO_UDP: { struct udphdr _udph; @@ -63,14 +64,16 @@ get_port(uint8_t pf, const struct sk_buff *skb, const uint8_t *flags) uh = skb_header_pointer(skb, protoff, sizeof(_udph), &_udph); if (uh == NULL) /* No choice either */ - return IPSET_INVALID_PORT; + return false; - return flags[0] & IPSET_SRC ? uh->source : uh->dest; + *port = src ? uh->source : uh->dest; + break; } default: - return IPSET_INVALID_PORT; + return false; } + return true; } -#endif /* __KERNEL__ */ +#endif /* __KERNEL__ */ #endif /*_IP_SET_GETPORT_H*/ diff --git a/kernel/include/linux/netfilter/ip_set_hash.h b/kernel/include/linux/netfilter/ip_set_hash.h index dd183b7..c1a6964 100644 --- a/kernel/include/linux/netfilter/ip_set_hash.h +++ b/kernel/include/linux/netfilter/ip_set_hash.h @@ -9,12 +9,11 @@ enum { #ifdef __KERNEL__ -#define initval_t uint32_t - #define IPSET_DEFAULT_HASHSIZE 1024 +#define IPSET_MIMINAL_HASHSIZE 64 #define IPSET_DEFAULT_MAXELEM 65536 #define IPSET_DEFAULT_PROBES 4 -#define IPSET_DEFAULT_RESIZE 50 +#define IPSET_DEFAULT_RESIZE 100 #endif /* __KERNEL__ */ diff --git a/kernel/include/linux/netfilter/ip_set_jhash.h b/kernel/include/linux/netfilter/ip_set_jhash.h index 90bfcc3..d5e0d6d 100644 --- a/kernel/include/linux/netfilter/ip_set_jhash.h +++ b/kernel/include/linux/netfilter/ip_set_jhash.h @@ -1,7 +1,6 @@ #ifndef _LINUX_JHASH_H #define _LINUX_JHASH_H - -/* jhash.h: Jenkins hash support. +/* jhash.c: Jenkins hash support. * * Copyright (C) 2006. Bob Jenkins (bob_jenkins@burtleburtle.net) * @@ -17,141 +16,106 @@ * if SELF_TEST is defined. You can use this free for any purpose. It's in * the public domain. It has no warranty. * - * Copyright (C) 2009 Jozsef Kadlecsik (kadlec@blackhole.kfki.hu) + * Copyright (C) 2009-2010 Jozsef Kadlecsik (kadlec@blackhole.kfki.hu) * * I've modified Bob's hash to be useful in the Linux kernel, and - * any bugs present are my fault. Jozsef + * any bugs present are my fault. The generic jhash is left out intentionally. + * Jozsef */ - -#define __rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) - -/* __jhash_mix - mix 3 32-bit values reversibly. */ -#define __jhash_mix(a,b,c) \ -{ \ - a -= c; a ^= __rot(c, 4); c += b; \ - b -= a; b ^= __rot(a, 6); a += c; \ - c -= b; c ^= __rot(b, 8); b += a; \ - a -= c; a ^= __rot(c,16); c += b; \ - b -= a; b ^= __rot(a,19); a += c; \ - c -= b; c ^= __rot(b, 4); b += a; \ +#ifdef __KERNEL__ +#include + +/* Best hash sizes are of power of two */ +#define jhash_size(n) ((u32)1<<(n)) +/* Mask the hash value, i.e (value & jhash_mask(n)) instead of (value % n) */ +#define jhash_mask(n) (jhash_size(n)-1) + +/* __jhash_rot - rotate 32 bit */ +#define __jhash_rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) + +/* __jhash_mix -- mix 3 32-bit values reversibly. */ +#define __jhash_mix(a,b,c) \ +{ \ + a -= c; a ^= __jhash_rot(c, 4); c += b; \ + b -= a; b ^= __jhash_rot(a, 6); a += c; \ + c -= b; c ^= __jhash_rot(b, 8); b += a; \ + a -= c; a ^= __jhash_rot(c,16); c += b; \ + b -= a; b ^= __jhash_rot(a,19); a += c; \ + c -= b; c ^= __jhash_rot(b, 4); b += a; \ } /* __jhash_final - final mixing of 3 32-bit values (a,b,c) into c */ -#define __jhash_final(a,b,c) \ -{ \ - c ^= b; c -= __rot(b,14); \ - a ^= c; a -= __rot(c,11); \ - b ^= a; b -= __rot(a,25); \ - c ^= b; c -= __rot(b,16); \ - a ^= c; a -= __rot(c,4); \ - b ^= a; b -= __rot(a,14); \ - c ^= b; c -= __rot(b,24); \ +#define __jhash_final(a,b,c) \ +{ \ + c ^= b; c -= __jhash_rot(b,14); \ + a ^= c; a -= __jhash_rot(c,11); \ + b ^= a; b -= __jhash_rot(a,25); \ + c ^= b; c -= __jhash_rot(b,16); \ + a ^= c; a -= __jhash_rot(c,4); \ + b ^= a; b -= __jhash_rot(a,14); \ + c ^= b; c -= __jhash_rot(b,24); \ } -/* An arbitrary value */ -#define JHASH_RANDOM_PARAM 0xdeadbeef - -/* The most generic version, hashes an arbitrary sequence - * of bytes. No alignment or length assumptions are made about - * the input key. The result depends on endianness. - */ -static inline u32 jhash(const void *key, u32 length, u32 initval) -{ - u32 a,b,c; - const u8 *k = key; - - /* Set up the internal state */ - a = b = c = JHASH_RANDOM_PARAM + length + initval; - - /* all but the last block: affect some 32 bits of (a,b,c) */ - while (length > 12) { - a += (k[0] + ((u32)k[1]<<8) + ((u32)k[2]<<16) + ((u32)k[3]<<24)); - b += (k[4] + ((u32)k[5]<<8) + ((u32)k[6]<<16) + ((u32)k[7]<<24)); - c += (k[8] + ((u32)k[9]<<8) + ((u32)k[10]<<16) + ((u32)k[11]<<24)); - __jhash_mix(a, b, c); - length -= 12; - k += 12; - } - - /* last block: affect all 32 bits of (c) */ - /* all the case statements fall through */ - switch (length) { - case 12: c += (u32)k[11]<<24; - case 11: c += (u32)k[10]<<16; - case 10: c += (u32)k[9]<<8; - case 9 : c += k[8]; - case 8 : b += (u32)k[7]<<24; - case 7 : b += (u32)k[6]<<16; - case 6 : b += (u32)k[5]<<8; - case 5 : b += k[4]; - case 4 : a += (u32)k[3]<<24; - case 3 : a += (u32)k[2]<<16; - case 2 : a += (u32)k[1]<<8; - case 1 : a += k[0]; - __jhash_final(a, b, c); - case 0 : - break; - } - - return c; -} +#define JHASH_INITVAL 0xdeadbeef -/* A special optimized version that handles 1 or more of u32s. - * The length parameter here is the number of u32s in the key. +/* jhash2 - hash an array of u32's + * @k: the key which must be an array of u32's + * @length: the number of u32's in the key + * @initval: the previous hash, or an arbitray value + * + * Returns the hash value of the key. */ static inline u32 jhash2(const u32 *k, u32 length, u32 initval) { u32 a, b, c; /* Set up the internal state */ - a = b = c = JHASH_RANDOM_PARAM + (length<<2) + initval; + a = b = c = JHASH_INITVAL + (length<<2) + initval; - /* handle most of the key */ + /* Handle most of the key */ while (length > 3) { a += k[0]; b += k[1]; c += k[2]; - __jhash_mix(a, b, c); + __jhash_mix(a,b,c); length -= 3; k += 3; } - - /* handle the last 3 u32's */ - /* all the case statements fall through */ - switch (length) { + + /* Handle the last 3 u32's: all the case statements fall through */ + switch(length) { case 3: c += k[2]; case 2: b += k[1]; case 1: a += k[0]; - __jhash_final(a, b, c); - case 0: /* case 0: nothing left to add */ + __jhash_final(a,b,c); + case 0: /* Nothing left to add */ break; } return c; } -/* A special ultra-optimized versions that knows they are hashing exactly - * 3, 2 or 1 word(s). - */ +/* jhash_3words - hash exactly 3, 2 or 1 word(s) */ static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval) { - a += JHASH_RANDOM_PARAM + initval; - b += JHASH_RANDOM_PARAM + initval; - c += JHASH_RANDOM_PARAM + initval; - - __jhash_final(a, b, c); + a += JHASH_INITVAL; + b += JHASH_INITVAL; + c += initval; + __jhash_final(a,b,c); + return c; } - static inline u32 jhash_2words(u32 a, u32 b, u32 initval) { - return jhash_3words(0, a, b, initval); + return jhash_3words(a, b, 0, initval); } static inline u32 jhash_1word(u32 a, u32 initval) { - return jhash_3words(0, 0, a, initval); + return jhash_3words(a, 0, 0, initval); } +#endif /* __KERNEL__ */ + #endif /* _LINUX_JHASH_H */ diff --git a/kernel/include/linux/netfilter/ip_set_kernel.h b/kernel/include/linux/netfilter/ip_set_kernel.h new file mode 100644 index 0000000..d6e033b --- /dev/null +++ b/kernel/include/linux/netfilter/ip_set_kernel.h @@ -0,0 +1,20 @@ +#ifndef _IP_SET_KERNEL_H +#define _IP_SET_KERNEL_H + +/* Copyright (C) 2003-2010 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifdef __KERNEL__ + +/* Complete debug messages */ +#define pr_fmt(fmt) "%s %s[%i]: " fmt "\n", __FILE__, __func__, __LINE__ + +#include + +#endif /* __KERNEL__ */ + +#endif /*_IP_SET_H */ diff --git a/kernel/include/linux/netfilter/ip_set_list.h b/kernel/include/linux/netfilter/ip_set_list.h new file mode 100644 index 0000000..c40643e --- /dev/null +++ b/kernel/include/linux/netfilter/ip_set_list.h @@ -0,0 +1,21 @@ +#ifndef __IP_SET_LIST_H +#define __IP_SET_LIST_H + +/* List type specific error codes */ +enum { + IPSET_ERR_NAME = IPSET_ERR_TYPE_SPECIFIC, + IPSET_ERR_LOOP, + IPSET_ERR_BEFORE, + IPSET_ERR_NAMEREF, + IPSET_ERR_LIST_FULL, + IPSET_ERR_REF_EXIST, +}; + +#ifdef __KERNEL__ + +#define IP_SET_LIST_DEFAULT_SIZE 8 +#define IP_SET_LIST_MIN_SIZE 4 + +#endif /* __KERNEL__ */ + +#endif /* __IP_SET_LIST_H */ diff --git a/kernel/include/linux/netfilter/ip_set_slist.h b/kernel/include/linux/netfilter/ip_set_slist.h new file mode 100644 index 0000000..abc5afe --- /dev/null +++ b/kernel/include/linux/netfilter/ip_set_slist.h @@ -0,0 +1,86 @@ +#ifndef _IP_SET_SLIST_H +#define _IP_SET_SLIST_H + +#include +#include +#include + +/* + * Single linked lists with a single pointer. + * Mostly useful for hash tables where the two pointer list head + * and list node is too wasteful. + */ + +struct slist { + struct slist *next; +}; + +#define SLIST(name) struct slist name = { .next = NULL } +#define INIT_SLIST(ptr) ((ptr)->next = NULL) + +#define slist_entry(ptr, type, member) container_of(ptr,type,member) + +#define slist_for_each(pos, head) \ + for (pos = (head)->next; pos && ({ prefetch(pos->next); 1; }); \ + pos = pos->next) + +#define slist_for_each_prev(prev, pos, head) \ + for (prev = head, pos = (head)->next; pos && ({ prefetch(pos->next); 1; }); \ + prev = pos, pos = pos->next) + +#define slist_for_each_safe(pos, n, head) \ + for (pos = (head)->next; pos && ({ n = pos->next; 1; }); \ + pos = n) + +/** + * slist_for_each_entry - iterate over list of given type + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct slist to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the slist within the struct. + */ +#define slist_for_each_entry(tpos, pos, head, member) \ + for (pos = (head)->next; \ + pos && ({ prefetch(pos->next); 1;}) && \ + ({ tpos = slist_entry(pos, typeof(*tpos), member); 1;}); \ + pos = pos->next) + +/** + * slist_for_each_entry_continue - iterate over a hlist continuing after current point + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct slist to use as a loop cursor. + * @member: the name of the slist within the struct. + */ +#define slist_for_each_entry_continue(tpos, pos, member) \ + for (pos = (pos)->next; \ + pos && ({ prefetch(pos->next); 1;}) && \ + ({ tpos = slist_entry(pos, typeof(*tpos), member); 1;}); \ + pos = pos->next) + +/** + * slist_for_each_entry_from - iterate over a hlist continuing from current point + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct slist to use as a loop cursor. + * @member: the name of the slist within the struct. + */ +#define slist_for_each_entry_from(tpos, pos, member) \ + for (; pos && ({ prefetch(pos->next); 1;}) && \ + ({ tpos = slist_entry(pos, typeof(*tpos), member); 1;}); \ + pos = pos->next) + +/** + * slist_for_each_entry_safe - iterate over list of given type safe against + * removal of list entry + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct slist to use as a loop cursor. + * @n: another &struct slist to use as temporary storage + * @head: the head for your list. + * @member: the name of the slist within the struct. + */ +#define slist_for_each_entry_safe(tpos, pos, n, head, member) \ + for (pos = (head)->next; \ + pos && ({ n = pos->next; 1; }) && \ + ({ tpos = slist_entry(pos, typeof(*tpos), member); 1;}); \ + pos = n) + +#endif /* _IP_SET_SLIST_H */ diff --git a/kernel/include/linux/netfilter/ip_set_timeout.h b/kernel/include/linux/netfilter/ip_set_timeout.h index da18875..bf1cbf6 100644 --- a/kernel/include/linux/netfilter/ip_set_timeout.h +++ b/kernel/include/linux/netfilter/ip_set_timeout.h @@ -10,21 +10,33 @@ #ifdef __KERNEL__ -/* How often should the gc be run at a minimum */ +/* How often should the gc be run by default */ #define IPSET_GC_TIME (3 * 60) /* Timeout period depending on the timeout value of the given set */ #define IPSET_GC_PERIOD(timeout) \ - max_t(uint32_t, (timeout)/10, IPSET_GC_TIME) + ((timeout/3) ? min_t(u32, (timeout)/3, IPSET_GC_TIME) : 1) -/* How much msec to sleep before retrying to destroy gc timer */ -#define IPSET_DESTROY_TIMER_SLEEP 10 +/* Set is defined without timeout support */ +#define IPSET_NO_TIMEOUT UINT_MAX -/* Timing out etries: unset and permanent */ +#define with_timeout(timeout) ((timeout) != IPSET_NO_TIMEOUT) + +static inline unsigned int +ip_set_timeout_uget(struct nlattr *tb) +{ + unsigned int timeout = ip_set_get_h32(tb); + + return timeout == IPSET_NO_TIMEOUT ? IPSET_NO_TIMEOUT - 1 : timeout; +} + +#ifdef IP_SET_BITMAP_TIMEOUT + +/* Bitmap entry is unset */ #define IPSET_ELEM_UNSET 0 +/* Bitmap entry is set with no timeout value */ #define IPSET_ELEM_PERMANENT UINT_MAX/2 -#ifdef IP_SET_BITMAP_TIMEOUT static inline bool ip_set_timeout_test(unsigned long timeout) { @@ -42,7 +54,7 @@ ip_set_timeout_expired(unsigned long timeout) } static inline unsigned long -ip_set_timeout_set(uint32_t timeout) +ip_set_timeout_set(u32 timeout) { unsigned long t; @@ -56,7 +68,7 @@ ip_set_timeout_set(uint32_t timeout) return t; } -static inline uint32_t +static inline u32 ip_set_timeout_get(unsigned long timeout) { return timeout == IPSET_ELEM_PERMANENT ? 0 : (timeout - jiffies)/HZ; @@ -64,6 +76,9 @@ ip_set_timeout_get(unsigned long timeout) #else +/* Hash entry is set with no timeout value */ +#define IPSET_ELEM_UNSET 0 + static inline bool ip_set_timeout_test(unsigned long timeout) { @@ -77,7 +92,7 @@ ip_set_timeout_expired(unsigned long timeout) } static inline unsigned long -ip_set_timeout_set(uint32_t timeout) +ip_set_timeout_set(u32 timeout) { unsigned long t; @@ -91,7 +106,7 @@ ip_set_timeout_set(uint32_t timeout) return t; } -static inline uint32_t +static inline u32 ip_set_timeout_get(unsigned long timeout) { return timeout == IPSET_ELEM_UNSET ? 0 : (timeout - jiffies)/HZ; diff --git a/kernel/include/linux/netfilter/ipt_set.h b/kernel/include/linux/netfilter/ipt_set.h deleted file mode 100644 index 2a18b93..0000000 --- a/kernel/include/linux/netfilter/ipt_set.h +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef _IPT_SET_H -#define _IPT_SET_H - -#include - -struct ipt_set_info { - ip_set_id_t index; - u_int32_t flags[IP_SET_MAX_BINDINGS + 1]; -}; - -/* match info */ -struct ipt_set_info_match { - struct ipt_set_info match_set; -}; - -struct ipt_set_info_target { - struct ipt_set_info add_set; - struct ipt_set_info del_set; -}; - -#endif /*_IPT_SET_H*/ diff --git a/kernel/include/linux/netfilter/xt_set.h b/kernel/include/linux/netfilter/xt_set.h new file mode 100644 index 0000000..949fa59 --- /dev/null +++ b/kernel/include/linux/netfilter/xt_set.h @@ -0,0 +1,55 @@ +#ifndef _XT_SET_H +#define _XT_SET_H + +#include + +/* Revision 0 interface: backward compatible with netfilter/iptables */ + +/* + * Option flags for kernel operations (xt_set_info_v0) + */ +#define IPSET_SRC 0x01 /* Source match/add */ +#define IPSET_DST 0x02 /* Destination match/add */ +#define IPSET_MATCH_INV 0x04 /* Inverse matching */ + +struct xt_set_info_v0 { + ip_set_id_t index; + union { + u_int32_t flags[IPSET_DIM_MAX + 1]; + struct { + u_int32_t __flags[IPSET_DIM_MAX]; + u_int8_t dim; + u_int8_t flags; + } compat; + } u; +}; + +/* match and target infos */ +struct xt_set_info_match_v0 { + struct xt_set_info_v0 match_set; +}; + +struct xt_set_info_target_v0 { + struct xt_set_info_v0 add_set; + struct xt_set_info_v0 del_set; +}; + +/* Revision 1: current interface to netfilter/iptables */ + +struct xt_set_info { + ip_set_id_t index; + u_int8_t dim; + u_int8_t flags; +}; + +/* match and target infos */ +struct xt_set_info_match { + struct xt_set_info match_set; +}; + +struct xt_set_info_target { + struct xt_set_info add_set; + struct xt_set_info del_set; +}; + +#endif /*_XT_SET_H*/ diff --git a/kernel/ip_set.c b/kernel/ip_set.c index 3af8fce..5bf331e 100644 --- a/kernel/ip_set.c +++ b/kernel/ip_set.c @@ -9,25 +9,26 @@ /* Kernel module for IP set management */ +#include #include #include #include -#include #include #include #include #include +#include #include #include #include #include -#include -static struct list_head ip_set_type_list; /* all registered sets */ +static struct list_head ip_set_type_list; /* all registered set types */ +static DEFINE_MUTEX(ip_set_type_mutex); /* protects ip_set_type_list */ + static struct ip_set **ip_set_list; /* all individual sets */ -static DEFINE_MUTEX(ip_set_type_mutex); /* protects ip_set_type_lists */ -static ip_set_id_t ip_set_max = CONFIG_IP_SET_MAX; +static ip_set_id_t ip_set_max = CONFIG_IP_SET_MAX; /* max number of sets */ #define STREQ(a,b) (strncmp(a,b,IPSET_MAXNAMELEN) == 0) @@ -43,31 +44,146 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET); /* * The set types are implemented in modules and registered set types * can be found in ip_set_type_list. Adding/deleting types is - * serialized by ip_set_type_list_lock/ip_set_type_list_unlock. + * serialized by ip_set_type_mutex. */ static inline void -ip_set_type_list_lock(void) +ip_set_type_lock(void) { mutex_lock(&ip_set_type_mutex); } static inline void -ip_set_type_list_unlock(void) +ip_set_type_unlock(void) { mutex_unlock(&ip_set_type_mutex); } +/* Register and deregister settype */ + +static inline struct ip_set_type * +find_set_type(const char *name, u8 family, u8 revision) +{ + struct ip_set_type *type; + + list_for_each_entry_rcu(type, &ip_set_type_list, list) + if (STREQ(type->name, name) + && (type->family == family || type->family == AF_UNSPEC) + && type->revision == revision) + return type; + return NULL; +} + +/* Find a set type so that rcu_read_lock() is called by the function. + * If we succeeded, the RCU lock is NOT released and the caller + * must release it later. + */ +static struct ip_set_type * +find_set_type_rcu(const char *name, u8 family, u8 revision) +{ + struct ip_set_type *type; + + rcu_read_lock(); + type = find_set_type(name, family, revision); + if (type == NULL) + rcu_read_unlock(); + + return type; +} + +/* Find a given set type by name and family together + * with the supported minimal and maximum revisions. + */ +static bool +find_set_type_minmax(const char *name, u8 family, + u8 *min, u8 *max) +{ + struct ip_set_type *type; + bool ret = false; + + *min = *max = 0; + rcu_read_lock(); + list_for_each_entry_rcu(type, &ip_set_type_list, list) + if (STREQ(type->name, name) + && (type->family == family || type->family == AF_UNSPEC)) { + ret = true; + if (type->revision < *min) + *min = type->revision; + else if (type->revision > *max) + *max = type->revision; + } + rcu_read_unlock(); + + return ret; +} + +#define family_name(f) ((f) == AF_INET ? "inet" : \ + (f) == AF_INET6 ? "inet6" : "any") + +/* Register a set type structure. The type is identified by + * the unique triple of name, family and revision. + */ +int +ip_set_type_register(struct ip_set_type *type) +{ + int ret = 0; + + if (type->protocol != IPSET_PROTOCOL) { + pr_warning("ip_set type %s, family %s, revision %u uses " + "wrong protocol version %u (want %u)\n", + type->name, family_name(type->family), + type->revision, type->protocol, IPSET_PROTOCOL); + return -EINVAL; + } + + ip_set_type_lock(); + if (find_set_type(type->name, type->family, type->revision)) { + /* Duplicate! */ + pr_warning("ip_set type %s, family %s, revision %u " + "already registered!\n", type->name, + family_name(type->family), type->revision); + ret = -EINVAL; + goto unlock; + } + list_add_rcu(&type->list, &ip_set_type_list); + pr_debug("type %s, family %s, revision %u registered.", + type->name, family_name(type->family), type->revision); +unlock: + ip_set_type_unlock(); + return ret; +} +EXPORT_SYMBOL(ip_set_type_register); + +/* Unregister a set type. There's a small race with ip_set_create */ +void +ip_set_type_unregister(struct ip_set_type *type) +{ + ip_set_type_lock(); + if (!find_set_type(type->name, type->family, type->revision)) { + pr_warning("ip_set type %s, family %s, revision %u " + "not registered\n", type->name, + family_name(type->family), type->revision); + goto unlock; + } + list_del_rcu(&type->list); + pr_debug("type %s, family %s, revision %u unregistered.", + type->name, family_name(type->family), type->revision); +unlock: + ip_set_type_unlock(); + + synchronize_rcu(); +} +EXPORT_SYMBOL(ip_set_type_unregister); + /* * Creating/destroying/renaming/swapping affect the existence and - * integrity of a set. All of these can be executed from userspace only - * and serialized by nfnl_lock/nfnl_unlock indirectly from nfnetlink. + * the properties of a set. All of these can be executed from userspace + * only and serialized by the nfnl mutex indirectly from nfnetlink. * * Sets are identified by their index in ip_set_list and the index * is used by the external references (set/SET netfilter modules). * - * The set behind an index may change by swapping. - * + * The set behind an index may change by swapping only, from userspace. */ static inline void @@ -82,264 +198,306 @@ __ip_set_put(ip_set_id_t index) atomic_dec(&ip_set_list[index]->ref); } -/* Add, del and test set entries from kernel */ +/* + * Add, del and test set entries from kernel. + * + * The set behind the index must exist and must be referenced + * so it can't be destroyed (or changed) under our foot. + */ int ip_set_test(ip_set_id_t index, const struct sk_buff *skb, - uint8_t family, const uint8_t *flags) + u8 family, u8 dim, u8 flags) { - struct ip_set *set; + struct ip_set *set = ip_set_list[index]; int ret = 0; - rcu_read_lock(); - set = rcu_dereference(ip_set_list[index]); - D("set %s, index %u", set->name, index); + BUG_ON(set == NULL || atomic_read(&set->ref) == 0); + pr_debug("set %s, index %u", set->name, index); + + if (dim < set->type->dimension + || !(family == set->family || set->family == AF_UNSPEC)) + return 0; read_lock_bh(&set->lock); - ret = set->variant->kadt(set, skb, IPSET_TEST, family, flags); + ret = set->variant->kadt(set, skb, IPSET_TEST, family, dim, flags); read_unlock_bh(&set->lock); if (ret == -EAGAIN) { /* Type requests element to be re-added */ + pr_debug("element must be competed, ADD is triggered"); write_lock_bh(&set->lock); - set->variant->kadt(set, skb, IPSET_ADD, family, flags); + set->variant->kadt(set, skb, IPSET_ADD, family, dim, flags); write_unlock_bh(&set->lock); ret = 1; } - rcu_read_unlock(); - + /* Convert error codes to nomatch */ return (ret < 0 ? 0 : ret); } +EXPORT_SYMBOL(ip_set_test); int ip_set_add(ip_set_id_t index, const struct sk_buff *skb, - uint8_t family, const uint8_t *flags) + u8 family, u8 dim, u8 flags) { - struct ip_set *set; + struct ip_set *set = ip_set_list[index]; int ret = 0, retried = 0; -retry: - rcu_read_lock(); - set = rcu_dereference(ip_set_list[index]); - D("set %s, index %u", set->name, index); + BUG_ON(set == NULL || atomic_read(&set->ref) == 0); + pr_debug("set %s, index %u", set->name, index); + if (dim < set->type->dimension + || !(family == set->family || set->family == AF_UNSPEC)) + return 0; + +retry: write_lock_bh(&set->lock); - ret = set->variant->kadt(set, skb, IPSET_ADD, family, flags); + ret = set->variant->kadt(set, skb, IPSET_ADD, family, dim, flags); write_unlock_bh(&set->lock); - rcu_read_unlock(); /* Retry function must be called without holding any lock */ if (ret == -EAGAIN && set->variant->resize - && (ret = set->variant->resize(set, retried++)) == 0) + && (ret = set->variant->resize(set, GFP_ATOMIC, retried++)) == 0) goto retry; return ret; } +EXPORT_SYMBOL(ip_set_add); int ip_set_del(ip_set_id_t index, const struct sk_buff *skb, - uint8_t family, const uint8_t *flags) + u8 family, u8 dim, u8 flags) { - struct ip_set *set; + struct ip_set *set = ip_set_list[index]; int ret = 0; - rcu_read_lock(); - set = rcu_dereference(ip_set_list[index]); - D("set %s, index %u", set->name, index); + BUG_ON(set == NULL || atomic_read(&set->ref) == 0); + pr_debug("set %s, index %u", set->name, index); + + if (dim < set->type->dimension + || !(family == set->family || set->family == AF_UNSPEC)) + return 0; write_lock_bh(&set->lock); - ret = set->variant->kadt(set, skb, IPSET_DEL, family, flags); + ret = set->variant->kadt(set, skb, IPSET_DEL, family, dim, flags); write_unlock_bh(&set->lock); - - rcu_read_unlock(); return ret; } +EXPORT_SYMBOL(ip_set_del); -/* Register and deregister settype */ - -#define family_name(f) ((f) == AF_INET ? "inet" : \ - (f) == AF_INET6 ? "inet6" : "any") - -static inline struct ip_set_type * -find_set_type(const char *name, uint8_t family, uint8_t revision) +/* + * Find set by name, reference it once. The reference makes sure the + * thing pointed to, does not go away under our feet. + * + * The nfnl mutex must already be activated. + */ +ip_set_id_t +ip_set_get_byname(const char *name, struct ip_set **set) { - struct ip_set_type *type; - - list_for_each_entry(type, &ip_set_type_list, list) - if (STREQ(type->name, name) - && (type->family == family || type->family == AF_UNSPEC) - && type->revision == revision) - return type; - return NULL; -} + ip_set_id_t i, index = IPSET_INVALID_ID; + struct ip_set *s; -int -ip_set_type_register(struct ip_set_type *type) -{ - int ret = 0; - - if (type->protocol != IPSET_PROTOCOL) { - printk("set type %s, family %s, revision %u uses " - "wrong protocol version %u (want %u)\n", - type->name, family_name(type->family), type->revision, - type->protocol, IPSET_PROTOCOL); - return -EINVAL; + for (i = 0; i < ip_set_max; i++) { + s = ip_set_list[i]; + if (s != NULL && STREQ(s->name, name)) { + __ip_set_get(i); + index = i; + *set = s; + } } - ip_set_type_list_lock(); - if (find_set_type(type->name, type->family, type->revision)) { - /* Duplicate! */ - printk("type %s, family %s, revision %u already registered!\n", - type->name, family_name(type->family), type->revision); - ret = -EINVAL; - goto unlock; - } - list_add(&type->list, &ip_set_type_list); - D("type %s, family %s, revision %u registered.", - type->name, family_name(type->family), type->revision); -unlock: - ip_set_type_list_unlock(); - return ret; + return index; } +EXPORT_SYMBOL(ip_set_get_byname); +/* + * If the given set pointer points to a valid set, decrement + * reference count by 1. The caller shall not assume the index + * to be valid, after calling this function. + * + * The nfnl mutex must already be activated. + */ void -ip_set_type_unregister(struct ip_set_type *type) +ip_set_put_byindex(ip_set_id_t index) { - ip_set_type_list_lock(); - if (!find_set_type(type->name, type->family, type->revision)) { - printk("type %s, family %s, revision %u not registered\n", - type->name, family_name(type->family), type->revision); - goto unlock; + if (ip_set_list[index] != NULL) { + BUG_ON(atomic_read(&ip_set_list[index]->ref) == 0); + __ip_set_put(index); } - list_del(&type->list); - D("type %s, family %s, revision %u unregistered.", - type->name, family_name(type->family), type->revision); -unlock: - ip_set_type_list_unlock(); } +EXPORT_SYMBOL(ip_set_put_byindex); + +/* + * Get the name of a set behind a set index. + * We assume the set is referenced, so it does exist and + * can't be destroyed. The set cannot be renamed due to + * the referencing either. + * + * The nfnl mutex must already be activated. + */ +const char * +ip_set_name_byindex(ip_set_id_t index) +{ + struct ip_set *set = ip_set_list[index]; + + BUG_ON(set == NULL); + BUG_ON(atomic_read(&set->ref) == 0); + + /* Referenced, so it's safe */ + return set->name; +} +EXPORT_SYMBOL(ip_set_name_byindex); -/* Get/put a set with referencing */ +/* + * Routines to call by external subsystems, which do not + * call nfnl_lock for us. + */ /* * Find set by name, reference it once. The reference makes sure the - * thing pointed to, does not go away under our feet. Drop the reference - * later, using ip_set_put*(). + * thing pointed to, does not go away under our feet. + * + * The nfnl mutex is used in the function. */ ip_set_id_t -ip_set_get_byname(const char *name) +ip_set_nfnl_get(const char *name) { - ip_set_id_t i, index = IPSET_INVALID_ID; - - nfnl_lock(); - for (i = 0; index == IPSET_INVALID_ID && i < ip_set_max; i++) - if (STREQ(ip_set_list[i]->name, name)) { - __ip_set_get(i); - index = i; - } + struct ip_set *s; + ip_set_id_t index; + + nfnl_lock(); + index = ip_set_get_byname(name, &s); + nfnl_unlock(); + + return index; +} +EXPORT_SYMBOL(ip_set_nfnl_get); + +/* + * Find set by index, reference it once. The reference makes sure the + * thing pointed to, does not go away under our feet. + * + * The nfnl mutex is used in the function. + */ +ip_set_id_t +ip_set_nfnl_get_byindex(ip_set_id_t index) +{ + if (index > ip_set_max) + return IPSET_INVALID_ID; + + nfnl_lock(); + if (ip_set_list[index]) + __ip_set_get(index); + else + index = IPSET_INVALID_ID; nfnl_unlock(); return index; } +EXPORT_SYMBOL(ip_set_nfnl_get_byindex); /* * If the given set pointer points to a valid set, decrement * reference count by 1. The caller shall not assume the index * to be valid, after calling this function. + * + * The nfnl mutex is used in the function. */ void -ip_set_put_byindex(ip_set_id_t index) +ip_set_nfnl_put(ip_set_id_t index) { nfnl_lock(); - if (ip_set_list[index]) + if (ip_set_list[index] != NULL) { + BUG_ON(atomic_read(&ip_set_list[index]->ref) == 0); __ip_set_put(index); + } nfnl_unlock(); } +EXPORT_SYMBOL(ip_set_nfnl_put); -static ip_set_id_t -find_set_id(const char *name) +/* + * Communication protocol with userspace over netlink. + * + * We already locked by nfnl_lock. + */ + +static inline bool +protocol_failed(const struct nlattr * const tb[]) { - ip_set_id_t i, index = IPSET_INVALID_ID; - - for (i = 0; index == IPSET_INVALID_ID && i < ip_set_max; i++) { - if (ip_set_list[i] != NULL - && STREQ(ip_set_list[i]->name, name)) - index = i; - } - return index; + return !tb[IPSET_ATTR_PROTOCOL] + || nla_get_u8(tb[IPSET_ATTR_PROTOCOL]) != IPSET_PROTOCOL; } -static ip_set_id_t -find_set_id_rcu(const char *name) +static inline u32 +flag_exist(const struct nlmsghdr *nlh) { - ip_set_id_t i, index = IPSET_INVALID_ID; - struct ip_set *set; - - for (i = 0; index == IPSET_INVALID_ID && i < ip_set_max; i++) { - set = rcu_dereference(ip_set_list[i]); - if (set != NULL && STREQ(set->name, name)) - index = i; - } - return index; + return nlh->nlmsg_flags & NLM_F_EXCL ? 0 : IPSET_FLAG_EXIST; } -static struct ip_set * -find_set(const char *name) +static inline bool +flag_nested(const struct nlattr *nla) { - ip_set_id_t index = find_set_id(name); - - return index == IPSET_INVALID_ID ? NULL : ip_set_list[index]; + return nla->nla_type & NLA_F_NESTED; } -/* Communication protocol with userspace over netlink */ +static struct nlmsghdr * +start_msg(struct sk_buff *skb, u32 pid, u32 seq, unsigned int flags, + enum ipset_cmd cmd) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + + nlh = nlmsg_put(skb, pid, seq, cmd | (NFNL_SUBSYS_IPSET << 8), + sizeof(*nfmsg), flags); + if (nlh == NULL) + return NULL; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = AF_INET; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + return nlh; +} /* Create a set */ static const struct nla_policy ip_set_create_policy[IPSET_ATTR_CMD_MAX + 1] __read_mostly = { [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, - [IPSET_ATTR_SETNAME] = { .type = NLA_STRING, - .len = IPSET_MAXNAMELEN }, - [IPSET_ATTR_TYPENAME] = { .type = NLA_STRING, - .len = IPSET_MAXNAMELEN }, + [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING, + .len = IPSET_MAXNAMELEN -1 }, + [IPSET_ATTR_TYPENAME] = { .type = NLA_NUL_STRING, + .len = IPSET_MAXNAMELEN - 1}, [IPSET_ATTR_REVISION] = { .type = NLA_U8 }, [IPSET_ATTR_FAMILY] = { .type = NLA_U8 }, [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_DATA] = { .type = NLA_NESTED }, }; -static inline bool -protocol_failed(const struct nlattr * const tb[]) -{ - return !tb[IPSET_ATTR_PROTOCOL] - || nla_get_u8(tb[IPSET_ATTR_PROTOCOL]) != IPSET_PROTOCOL; -} - -static inline uint32_t -flag_exist(const struct nlmsghdr *nlh) -{ - return nlh->nlmsg_flags & NLM_F_EXCL ? 0 : IPSET_FLAG_EXIST; -} - -static inline bool -flag_nested(const struct nlattr *nla) +static ip_set_id_t +find_set_id(const char *name) { - return nla->nla_type & NLA_F_NESTED; + ip_set_id_t i, index = IPSET_INVALID_ID; + struct ip_set *set; + + for (i = 0; index == IPSET_INVALID_ID && i < ip_set_max; i++) { + set = ip_set_list[i]; + if (set != NULL && STREQ(set->name, name)) + index = i; + } + return index; } -static struct ip_set_type * -find_set_type_lock(const char *name, uint8_t family, uint8_t revision) +static inline struct ip_set * +find_set(const char *name) { - struct ip_set_type *type; - - ip_set_type_list_lock(); - type = find_set_type(name, family, revision); - if (type == NULL) - ip_set_type_list_unlock(); + ip_set_id_t index = find_set_id(name); - return type; + return index == IPSET_INVALID_ID ? NULL : ip_set_list[index]; } static int @@ -364,30 +522,10 @@ find_free_id(const char *name, ip_set_id_t *index, struct ip_set **set) return 0; } -static struct nlmsghdr * -start_msg(struct sk_buff *skb, u32 pid, u32 seq, unsigned int flags, - enum ipset_cmd cmd) -{ - struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; - - nlh = nlmsg_put(skb, pid, seq, cmd | (NFNL_SUBSYS_IPSET << 8), - sizeof(*nfmsg), flags); - if (nlh == NULL) - return NULL; - - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = AF_INET; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - - return nlh; -} - static inline void load_type_module(const char *typename) { - D("try to load ip_set_%s", typename); + pr_debug("try to load ip_set_%s", typename); request_module("ip_set_%s", typename); } @@ -399,8 +537,8 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, *clash; ip_set_id_t index = IPSET_INVALID_ID; const char *name, *typename; - uint8_t family, revision; - uint32_t flags = flag_exist(nlh); + u8 family, revision; + u32 flags = flag_exist(nlh); int ret = 0, len; if (unlikely(protocol_failed(attr) @@ -416,8 +554,8 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, typename = nla_data(attr[IPSET_ATTR_TYPENAME]); family = nla_get_u8(attr[IPSET_ATTR_FAMILY]); revision = nla_get_u8(attr[IPSET_ATTR_REVISION]); - D("setname: %s, typename: %s, family: %s, revision: %u", - name, typename, family_name(family), revision); + pr_debug("setname: %s, typename: %s, family: %s, revision: %u", + name, typename, family_name(family), revision); /* * First, and without any locks, allocate and initialize @@ -429,6 +567,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, rwlock_init(&set->lock); strncpy(set->name, name, IPSET_MAXNAMELEN); atomic_set(&set->ref, 0); + set->family = family; /* * Next, check that we know the type, and take @@ -438,31 +577,32 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, * After referencing the type, we try to create the type * specific part of the set without holding any locks. */ - set->type = find_set_type_lock(typename, family, revision); + set->type = find_set_type_rcu(typename, family, revision); if (set->type == NULL) { /* Try loading the module */ load_type_module(typename); - set->type = find_set_type_lock(typename, family, revision); + set->type = find_set_type_rcu(typename, family, revision); if (set->type == NULL) { - printk("Can't find type %s, family %s, revision %u:" - " set '%s' not created", - typename, family_name(family), revision, name); + pr_warning("Can't find ip_set type %s, family %s, " + "revision %u: set '%s' not created", + typename, family_name(family), revision, + name); ret = -IPSET_ERR_FIND_TYPE; goto out; } } if (!try_module_get(set->type->me)) { - ip_set_type_list_unlock(); + rcu_read_unlock(); ret = -EFAULT; goto out; } - ip_set_type_list_unlock(); + rcu_read_unlock(); /* * Without holding any locks, create private part. */ len = attr[IPSET_ATTR_DATA] ? nla_len(attr[IPSET_ATTR_DATA]) : 0; - D("data len: %u", len); + pr_debug("data len: %u", len); ret = set->type->create(set, attr[IPSET_ATTR_DATA] ? nla_data(attr[IPSET_ATTR_DATA]) : NULL, len, flags); @@ -482,7 +622,8 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, && (flags & IPSET_FLAG_EXIST) && STREQ(set->type->name, clash->type->name) && set->type->family == clash->type->family - && set->type->revision == clash->type->revision) + && set->type->revision == clash->type->revision + && set->variant->same_set(set, clash)) ret = 0; goto cleanup; } @@ -490,7 +631,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, /* * Finally! Add our shiny new set to the list, and be done. */ - D("create: '%s' created with index %u!", set->name, index); + pr_debug("create: '%s' created with index %u!", set->name, index); ip_set_list[index] = set; return ret; @@ -509,8 +650,8 @@ out: static const struct nla_policy ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] __read_mostly = { [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, - [IPSET_ATTR_SETNAME] = { .type = NLA_STRING, - .len = IPSET_MAXNAMELEN }, + [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING, + .len = IPSET_MAXNAMELEN - 1 }, }; static inline void @@ -518,7 +659,7 @@ ip_set_destroy_set(ip_set_id_t index) { struct ip_set *set = ip_set_list[index]; - D("set: %s", set->name); + pr_debug("set: %s", set->name); ip_set_list[index] = NULL; /* Must call it without holding any lock */ @@ -565,7 +706,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, static inline void ip_set_flush_set(struct ip_set *set) { - D("set: %s", set->name); + pr_debug("set: %s", set->name); write_lock_bh(&set->lock); set->variant->flush(set); @@ -602,10 +743,10 @@ ip_set_flush(struct sock *ctnl, struct sk_buff *skb, static const struct nla_policy ip_set_setname2_policy[IPSET_ATTR_CMD_MAX + 1] __read_mostly = { [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, - [IPSET_ATTR_SETNAME] = { .type = NLA_STRING, - .len = IPSET_MAXNAMELEN }, - [IPSET_ATTR_SETNAME2] = { .type = NLA_STRING, - .len = IPSET_MAXNAMELEN }, + [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING, + .len = IPSET_MAXNAMELEN - 1 }, + [IPSET_ATTR_SETNAME2] = { .type = NLA_NUL_STRING, + .len = IPSET_MAXNAMELEN - 1 }, }; static int @@ -625,6 +766,8 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb, set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); if (set == NULL) return -EEXIST; + if (atomic_read(&set->ref) != 0) + return -IPSET_ERR_REFERENCED; name2 = nla_data(attr[IPSET_ATTR_SETNAME2]); for (i = 0; i < ip_set_max; i++) { @@ -638,7 +781,13 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb, } /* Swap two sets so that name/index points to the other. - * References are also swapped. */ + * References and set names are also swapped. + * + * We are protected by the nfnl mutex and references are + * manipulated only by holding the mutex. The kernel interfaces + * do not hold the mutex but the pointer settings are atomic + * so the ip_set_list always contains valid pointers to the sets. + */ static int ip_set_swap(struct sock *ctnl, struct sk_buff *skb, @@ -648,7 +797,7 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, struct ip_set *from, *to; ip_set_id_t from_id, to_id; char from_name[IPSET_MAXNAMELEN]; - uint32_t from_ref; + u32 from_ref; if (unlikely(protocol_failed(attr) || attr[IPSET_ATTR_SETNAME] == NULL @@ -673,7 +822,7 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, && from->type->family == to->type->family)) return -IPSET_ERR_TYPE_MISMATCH; - /* No magic here: ref munging protected by the mutex */ + /* No magic here: ref munging protected by the nfnl_lock */ strncpy(from_name, from->name, IPSET_MAXNAMELEN); from_ref = atomic_read(&from->ref); @@ -682,20 +831,29 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, strncpy(to->name, from_name, IPSET_MAXNAMELEN); atomic_set(&to->ref, from_ref); - rcu_assign_pointer(ip_set_list[from_id], to); - rcu_assign_pointer(ip_set_list[to_id], from); - synchronize_rcu(); + ip_set_list[from_id] = to; + ip_set_list[to_id] = from; + + /* Avoid possible race between ongoing slow add/del in kernel space + * and next destroy command. */ + synchronize_net(); return 0; } /* List/save set data */ +#define DUMP_ALL 0L +#define DUMP_ONE 1L +#define DUMP_LAST 2L + static int ip_set_dump_done(struct netlink_callback *cb) { - if (cb->args[2]) + if (cb->args[2]) { + pr_debug("release set %s", ip_set_list[cb->args[1]]->name); __ip_set_put((ip_set_id_t) cb->args[1]); + } return 0; } @@ -705,9 +863,9 @@ dump_attrs(struct nlmsghdr *nlh) struct nlattr *attr; int rem; - D("dump nlmsg"); + pr_debug("dump nlmsg"); nlmsg_for_each_attr(attr, nlh, sizeof(struct nfgenmsg), rem) { - D("type: %u, len %u", nla_type(attr), attr->nla_len); + pr_debug("type: %u, len %u", nla_type(attr), attr->nla_len); } } @@ -720,22 +878,32 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) unsigned int flags = NETLINK_CB(cb->skb).pid ? NLM_F_MULTI : 0; int ret = 0; - max = cb->args[0] ? cb->args[1] + 1 : ip_set_max; - rcu_read_lock(); + if (cb->args[1] >= ip_set_max) + goto out; + + pr_debug("args[0]: %ld args[1]: %ld\n", cb->args[0], cb->args[1]); + max = cb->args[0] == DUMP_ONE ? cb->args[1] + 1 : ip_set_max; for (; cb->args[1] < max; cb->args[1]++) { index = (ip_set_id_t) cb->args[1]; - set = rcu_dereference(ip_set_list[index]); + set = ip_set_list[index]; if (set == NULL) { - if (cb->args[0]) { + if (cb->args[0] == DUMP_ONE) { ret = -EEXIST; - goto unlock; + goto out; } continue; } - D("List set: %s", set->name); + /* When dumping all sets, we must dump "sorted" + * so that lists (unions of sets) are dumped last. + */ + if (cb->args[0] != DUMP_ONE + && !((cb->args[0] == DUMP_ALL) + ^ (set->type->features & IPSET_DUMP_LAST))) + continue; + pr_debug("List set: %s", set->name); if (!cb->args[2]) { /* Start listing: make sure set won't be destroyed */ - D("reference set"); + pr_debug("reference set"); __ip_set_get(index); } nlh = start_msg(skb, NETLINK_CB(cb->skb).pid, @@ -753,7 +921,7 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) NLA_PUT_STRING(skb, IPSET_ATTR_TYPENAME, set->type->name); NLA_PUT_U8(skb, IPSET_ATTR_FAMILY, - set->type->family); + set->family); NLA_PUT_U8(skb, IPSET_ATTR_REVISION, set->type->revision); ret = set->variant->head(set, skb); @@ -764,28 +932,35 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) read_lock_bh(&set->lock); ret = set->variant->list(set, skb, cb); read_unlock_bh(&set->lock); - if (!cb->args[2]) + if (!cb->args[2]) { /* Set is done, proceed with next one */ - cb->args[1]++; + if (cb->args[0] == DUMP_ONE) + cb->args[1] = IPSET_INVALID_ID; + else + cb->args[1]++; + } goto release_refcount; } } - goto unlock; + goto out; nla_put_failure: ret = -EFAULT; release_refcount: /* If there was an error or set is done, release set */ if (ret || !cb->args[2]) { - D("release set"); + pr_debug("release set %s", ip_set_list[index]->name); __ip_set_put(index); } -unlock: - rcu_read_unlock(); + /* If we dump all sets, continue with dumping last ones */ + if (cb->args[0] == DUMP_ALL && cb->args[1] >= max && !cb->args[2]) + cb->args[0] = DUMP_LAST; + +out: if (nlh) { nlmsg_end(skb, nlh); - D("nlmsg_len: %u", nlh->nlmsg_len); + pr_debug("nlmsg_len: %u", nlh->nlmsg_len); dump_attrs(nlh); } @@ -807,23 +982,18 @@ ip_set_dump(struct sock *ctnl, struct sk_buff *skb, ip_set_dump_start, ip_set_dump_done); - rcu_read_lock(); - index = find_set_id_rcu(nla_data(attr[IPSET_ATTR_SETNAME])); - if (index == IPSET_INVALID_ID) { - rcu_read_unlock(); + index = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME])); + if (index == IPSET_INVALID_ID) return -EEXIST; - } - rcu_read_unlock(); - /* cb->args[0] : 1 => dump single set, - * : 0 => dump all sets + /* cb->args[0] : dump single set/all sets * [1] : set index * [..]: type specific */ return netlink_dump_init(ctnl, skb, nlh, ip_set_dump_start, ip_set_dump_done, - 2, 1, index); + 2, DUMP_ONE, index); } /* Add, del and test */ @@ -831,8 +1001,8 @@ ip_set_dump(struct sock *ctnl, struct sk_buff *skb, static const struct nla_policy ip_set_adt_policy[IPSET_ATTR_CMD_MAX + 1] __read_mostly = { [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, - [IPSET_ATTR_SETNAME] = { .type = NLA_STRING, - .len = IPSET_MAXNAMELEN }, + [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING, + .len = IPSET_MAXNAMELEN - 1 }, [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_DATA] = { .type = NLA_NESTED }, [IPSET_ATTR_ADT] = { .type = NLA_NESTED }, @@ -842,11 +1012,11 @@ static int call_ad(struct sock *ctnl, struct sk_buff *skb, const struct nlattr * const attr[], struct ip_set *set, const struct nlattr *nla, - enum ipset_adt adt, uint32_t flags) + enum ipset_adt adt, u32 flags) { struct nlattr *head = nla_data(nla); int ret, len = nla_len(nla), retried = 0; - uint32_t lineno = 0; + u32 lineno = 0; bool eexist = flags & IPSET_FLAG_EXIST; do { @@ -856,13 +1026,13 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, write_unlock_bh(&set->lock); } while (ret == -EAGAIN && set->variant->resize - && (ret = set->variant->resize(set, retried++)) == 0); + && (ret = set->variant->resize(set, GFP_KERNEL, retried++)) == 0); if (!ret || (ret == -IPSET_ERR_EXIST && eexist)) return 0; if (lineno && attr[IPSET_ATTR_LINENO]) { /* Error in restore/batch mode: send back lineno */ - uint32_t *errline = nla_data(attr[IPSET_ATTR_LINENO]); + u32 *errline = nla_data(attr[IPSET_ATTR_LINENO]); *errline = lineno; } @@ -877,7 +1047,7 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb, { struct ip_set *set; const struct nlattr *nla; - uint32_t flags = flag_exist(nlh); + u32 flags = flag_exist(nlh); int ret = 0; if (unlikely(protocol_failed(attr) @@ -921,7 +1091,7 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb, { struct ip_set *set; const struct nlattr *nla; - uint32_t flags = flag_exist(nlh); + u32 flags = flag_exist(nlh); int ret = 0; if (unlikely(protocol_failed(attr) @@ -1022,7 +1192,7 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb, NLA_PUT_U8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL); NLA_PUT_STRING(skb2, IPSET_ATTR_SETNAME, set->name); NLA_PUT_STRING(skb2, IPSET_ATTR_TYPENAME, set->type->name); - NLA_PUT_U8(skb2, IPSET_ATTR_FAMILY, set->type->family); + NLA_PUT_U8(skb2, IPSET_ATTR_FAMILY, set->family); NLA_PUT_U8(skb2, IPSET_ATTR_REVISION, set->type->revision); nlmsg_end(skb2, nlh2); @@ -1044,34 +1214,11 @@ nlmsg_failure: static const struct nla_policy ip_set_type_policy[IPSET_ATTR_CMD_MAX + 1] __read_mostly = { [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, - [IPSET_ATTR_TYPENAME] = { .type = NLA_STRING, - .len = IPSET_MAXNAMELEN }, + [IPSET_ATTR_TYPENAME] = { .type = NLA_NUL_STRING, + .len = IPSET_MAXNAMELEN - 1 }, [IPSET_ATTR_FAMILY] = { .type = NLA_U8 }, }; -static bool -find_set_type_minmax(const char *name, uint8_t family, - uint8_t *min, uint8_t *max) -{ - struct ip_set_type *type; - bool ret = false; - - *min = *max = 0; - ip_set_type_list_lock(); - list_for_each_entry(type, &ip_set_type_list, list) - if (STREQ(type->name, name) - && (type->family == family || type->family == AF_UNSPEC)) { - ret = true; - if (type->revision < *min) - *min = type->revision; - else if (type->revision > *max) - *max = type->revision; - } - ip_set_type_list_unlock(); - - return ret; -} - static int ip_set_type(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, @@ -1079,7 +1226,7 @@ ip_set_type(struct sock *ctnl, struct sk_buff *skb, { struct sk_buff *skb2; struct nlmsghdr *nlh2; - uint8_t family, min, max; + u8 family, min, max; const char *typename; int ret = 0; @@ -1094,7 +1241,7 @@ ip_set_type(struct sock *ctnl, struct sk_buff *skb, /* Try to load in the type module */ load_type_module(typename); if (!find_set_type_minmax(typename, family, &min, &max)) { - D("can't find: %s, family: %u", typename, family); + pr_debug("can't find: %s, family: %u", typename, family); return -EEXIST; } } @@ -1114,7 +1261,7 @@ ip_set_type(struct sock *ctnl, struct sk_buff *skb, NLA_PUT_U8(skb2, IPSET_ATTR_REVISION_MIN, min); nlmsg_end(skb2, nlh2); - D("Send TYPE, nlmsg_len: %u", nlh2->nlmsg_len); + pr_debug("Send TYPE, nlmsg_len: %u", nlh2->nlmsg_len); ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); if (ret < 0) return -EFAULT; @@ -1171,7 +1318,8 @@ nlmsg_failure: return -EFAULT; } -static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = { +static const struct nfnl_callback __read_mostly +ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = { [IPSET_CMD_CREATE] = { .call = ip_set_create, .attr_count = IPSET_ATTR_CMD_MAX, @@ -1246,6 +1394,106 @@ static struct nfnetlink_subsystem ip_set_netlink_subsys = { .cb = ip_set_netlink_subsys_cb, }; +/* Interface to iptables/ip6tables */ + +static int +ip_set_sockfn_get(struct sock *sk, int optval, void *user, int *len) +{ + unsigned *op; + void *data; + int copylen = *len, ret = 0; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (optval != SO_IP_SET) + return -EBADF; + if (*len < sizeof(unsigned)) { + return -EINVAL; + } + data = vmalloc(*len); + if (!data) + return -ENOMEM; + if (copy_from_user(data, user, *len) != 0) { + ret = -EFAULT; + goto done; + } + op = (unsigned *) data; + + if (*op < IP_SET_OP_VERSION) { + /* Check the version at the beginning of operations */ + struct ip_set_req_version *req_version = data; + if (req_version->version != IPSET_PROTOCOL) { + ret = -EPROTO; + goto done; + } + } + + switch (*op) { + case IP_SET_OP_VERSION: { + struct ip_set_req_version *req_version = data; + + if (*len != sizeof(struct ip_set_req_version)) { + ret = -EINVAL; + goto done; + } + + req_version->version = IPSET_PROTOCOL; + ret = copy_to_user(user, req_version, + sizeof(struct ip_set_req_version)); + goto done; + } + case IP_SET_OP_GET_BYNAME: { + struct ip_set_req_get_set *req_get = data; + + if (*len != sizeof(struct ip_set_req_get_set)) { + ret = -EINVAL; + goto done; + } + req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0'; + nfnl_lock(); + req_get->set.index = find_set_id(req_get->set.name); + nfnl_unlock(); + goto copy; + } + case IP_SET_OP_GET_BYINDEX: { + struct ip_set_req_get_set *req_get = data; + + if (*len != sizeof(struct ip_set_req_get_set) + || req_get->set.index >= ip_set_max) { + ret = -EINVAL; + goto done; + } + nfnl_lock(); + strncpy(req_get->set.name, + ip_set_list[req_get->set.index] + ? ip_set_list[req_get->set.index]->name : "", + IPSET_MAXNAMELEN); + nfnl_unlock(); + goto copy; + } + default: + ret = -EBADMSG; + goto done; + } /* end of switch(op) */ + + copy: + ret = copy_to_user(user, data, copylen); + + done: + vfree(data); + if (ret > 0) + ret = 0; + return ret; +} + +static struct nf_sockopt_ops so_set = { + .pf = PF_INET, + .get_optmin = SO_IP_SET, + .get_optmax = SO_IP_SET + 1, + .get = &ip_set_sockfn_get, + .owner = THIS_MODULE, +}; + static int __init ip_set_init(void) { @@ -1258,7 +1506,7 @@ ip_set_init(void) ip_set_list = kzalloc(sizeof(struct ip_set *) * ip_set_max, GFP_KERNEL); if (!ip_set_list) { - printk(KERN_ERR "Unable to create ip_set_list\n"); + pr_err("ip_set: Unable to create ip_set_list"); return -ENOMEM; } @@ -1266,12 +1514,19 @@ ip_set_init(void) ret = nfnetlink_subsys_register(&ip_set_netlink_subsys); if (ret != 0) { - printk("ip_set_init: cannot register with nfnetlink.\n"); + pr_err("ip_set: cannot register with nfnetlink."); + kfree(ip_set_list); + return ret; + } + ret = nf_register_sockopt(&so_set); + if (ret != 0) { + pr_err("SO_SET registry failed: %d", ret); + nfnetlink_subsys_unregister(&ip_set_netlink_subsys); kfree(ip_set_list); return ret; } - printk("ip_set with protocol version %u loaded\n", IPSET_PROTOCOL); + pr_notice("ip_set with protocol version %u loaded", IPSET_PROTOCOL); return 0; } @@ -1279,20 +1534,11 @@ static void __exit ip_set_fini(void) { /* There can't be any existing set */ + nf_unregister_sockopt(&so_set); nfnetlink_subsys_unregister(&ip_set_netlink_subsys); kfree(ip_set_list); - D("these are the famous last words"); + pr_debug("these are the famous last words"); } -EXPORT_SYMBOL(ip_set_type_register); -EXPORT_SYMBOL(ip_set_type_unregister); - -EXPORT_SYMBOL(ip_set_get_byname); -EXPORT_SYMBOL(ip_set_put_byindex); - -EXPORT_SYMBOL(ip_set_add); -EXPORT_SYMBOL(ip_set_del); -EXPORT_SYMBOL(ip_set_test); - module_init(ip_set_init); module_exit(ip_set_fini); diff --git a/kernel/ip_set_bitmap_ip.c b/kernel/ip_set_bitmap_ip.c index ccb5473..66d3979 100644 --- a/kernel/ip_set_bitmap_ip.c +++ b/kernel/ip_set_bitmap_ip.c @@ -9,6 +9,7 @@ /* Kernel module implementing an IP set type: the bitmap:ip type */ +#include #include #include #include @@ -17,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -38,28 +38,28 @@ MODULE_ALIAS("ip_set_bitmap:ip"); struct bitmap_ip { void *members; /* the set members */ - uint32_t first_ip; /* host byte order, included in range */ - uint32_t last_ip; /* host byte order, included in range */ - uint32_t elements; /* number of max elements in the set */ - uint32_t hosts; /* number of hosts in a subnet */ + u32 first_ip; /* host byte order, included in range */ + u32 last_ip; /* host byte order, included in range */ + u32 elements; /* number of max elements in the set */ + u32 hosts; /* number of hosts in a subnet */ size_t memsize; /* members size */ - uint8_t netmask; /* subnet netmask */ + u8 netmask; /* subnet netmask */ }; -static inline uint32_t -ip_to_id(const struct bitmap_ip *map, uint32_t ip) +static inline u32 +ip_to_id(const struct bitmap_ip *map, u32 ip) { return ((ip & HOSTMASK(map->netmask)) - map->first_ip)/map->hosts; } static inline int -bitmap_ip_test(const struct bitmap_ip *map, uint32_t id) +bitmap_ip_test(const struct bitmap_ip *map, u32 id) { return !!test_bit(id, map->members); } static inline int -bitmap_ip_add(struct bitmap_ip *map, uint32_t id) +bitmap_ip_add(struct bitmap_ip *map, u32 id) { if (test_and_set_bit(id, map->members)) return -IPSET_ERR_EXIST; @@ -68,7 +68,7 @@ bitmap_ip_add(struct bitmap_ip *map, uint32_t id) } static inline int -bitmap_ip_del(struct bitmap_ip *map, uint32_t id) +bitmap_ip_del(struct bitmap_ip *map, u32 id) { if (!test_and_clear_bit(id, map->members)) return -IPSET_ERR_EXIST; @@ -78,14 +78,12 @@ bitmap_ip_del(struct bitmap_ip *map, uint32_t id) static int bitmap_ip_kadt(struct ip_set *set, const struct sk_buff *skb, - enum ipset_adt adt, uint8_t pf, const uint8_t *flags) + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) { struct bitmap_ip *map = set->data; - uint32_t ip = ntohl(ip4addr(skb, flags)); + u32 ip; - if (pf != AF_INET) - return -EINVAL; - + ip = ntohl(ip4addr(skb, flags & IPSET_DIM_ONE_SRC)); if (ip < map->first_ip || ip > map->last_ip) return -IPSET_ERR_BITMAP_RANGE; @@ -113,12 +111,12 @@ bitmap_ip_adt_policy[IPSET_ATTR_ADT_MAX+1] __read_mostly = { static int bitmap_ip_uadt(struct ip_set *set, struct nlattr *head, int len, - enum ipset_adt adt, uint32_t *lineno, uint32_t flags) + enum ipset_adt adt, u32 *lineno, u32 flags) { struct bitmap_ip *map = set->data; struct nlattr *tb[IPSET_ATTR_ADT_MAX]; bool eexist = flags & IPSET_FLAG_EXIST; - uint32_t ip, ip_to, id; + u32 ip, ip_to, id; int ret = 0; if (nla_parse(tb, IPSET_ATTR_ADT_MAX, head, len, @@ -133,6 +131,8 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *head, int len, if (ip < map->first_ip || ip > map->last_ip) return -IPSET_ERR_BITMAP_RANGE; + /* Set was defined without timeout support, + * don't ignore attribute silently */ if (tb[IPSET_ATTR_TIMEOUT]) return -IPSET_ERR_TIMEOUT; @@ -147,10 +147,11 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *head, int len, return -IPSET_ERR_BITMAP_RANGE; } } else if (tb[IPSET_ATTR_CIDR]) { - uint8_t cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); if (cidr > 32) return -IPSET_ERR_INVALID_CIDR; + ip &= HOSTMASK(cidr); ip_to = ip | ~HOSTMASK(cidr); } else ip_to = ip; @@ -168,7 +169,7 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *head, int len, *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); return ret; } - }; + } return ret; } @@ -196,11 +197,6 @@ bitmap_ip_head(struct ip_set *set, struct sk_buff *skb) { const struct bitmap_ip *map = set->data; struct nlattr *nested; - uint32_t id, elements; - - for (id = 0, elements = 0; id < map->elements; id++) - if (bitmap_ip_test(map, id)) - elements++; nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) @@ -209,10 +205,10 @@ bitmap_ip_head(struct ip_set *set, struct sk_buff *skb) NLA_PUT_NET32(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip)); if (map->netmask != 32) NLA_PUT_U8(skb, IPSET_ATTR_NETMASK, map->netmask); - NLA_PUT_NET32(skb, IPSET_ATTR_ELEMENTS, htonl(elements)); NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(atomic_read(&set->ref) - 1)); - NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(map->memsize)); + NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, + htonl(sizeof(*map) + map->memsize)); ipset_nest_end(skb, nested); return 0; @@ -226,7 +222,7 @@ bitmap_ip_list(struct ip_set *set, { const struct bitmap_ip *map = set->data; struct nlattr *atd, *nested; - uint32_t id, first = cb->args[2]; + u32 id, first = cb->args[2]; atd = ipset_nest_start(skb, IPSET_ATTR_ADT); if (!atd) @@ -245,8 +241,6 @@ bitmap_ip_list(struct ip_set *set, } NLA_PUT_NET32(skb, IPSET_ATTR_IP, htonl(map->first_ip + id * map->hosts)); - if (map->netmask != 32) - NLA_PUT_U8(skb, IPSET_ATTR_CIDR, map->netmask); ipset_nest_end(skb, nested); } ipset_nest_end(skb, atd); @@ -260,6 +254,17 @@ nla_put_failure: return 0; } +static bool +bitmap_ip_same_set(const struct ip_set *a, const struct ip_set *b) +{ + struct bitmap_ip *x = a->data; + struct bitmap_ip *y = b->data; + + return x->first_ip == y->first_ip + && x->last_ip == y->last_ip + && x->netmask == y->netmask; +} + static const struct ip_set_type_variant bitmap_ip __read_mostly = { .kadt = bitmap_ip_kadt, .uadt = bitmap_ip_uadt, @@ -267,34 +272,35 @@ static const struct ip_set_type_variant bitmap_ip __read_mostly = { .flush = bitmap_ip_flush, .head = bitmap_ip_head, .list = bitmap_ip_list, + .same_set = bitmap_ip_same_set, }; /* Timeout variant */ struct bitmap_ip_timeout { void *members; /* the set members */ - uint32_t first_ip; /* host byte order, included in range */ - uint32_t last_ip; /* host byte order, included in range */ - uint32_t elements; /* number of max elements in the set */ - uint32_t hosts; /* number of hosts in a subnet */ + u32 first_ip; /* host byte order, included in range */ + u32 last_ip; /* host byte order, included in range */ + u32 elements; /* number of max elements in the set */ + u32 hosts; /* number of hosts in a subnet */ size_t memsize; /* members size */ - uint8_t netmask; /* subnet netmask */ + u8 netmask; /* subnet netmask */ - uint32_t timeout; /* timeout parameter */ + u32 timeout; /* timeout parameter */ struct timer_list gc; /* garbage collection */ }; static inline bool -bitmap_ip_timeout_test(const struct bitmap_ip_timeout *map, uint32_t id) +bitmap_ip_timeout_test(const struct bitmap_ip_timeout *map, u32 id) { unsigned long *table = map->members; return ip_set_timeout_test(table[id]); } -static int +static inline int bitmap_ip_timeout_add(struct bitmap_ip_timeout *map, - uint32_t id, uint32_t timeout) + u32 id, u32 timeout) { unsigned long *table = map->members; @@ -306,8 +312,8 @@ bitmap_ip_timeout_add(struct bitmap_ip_timeout *map, return 0; } -static int -bitmap_ip_timeout_del(struct bitmap_ip_timeout *map, uint32_t id) +static inline int +bitmap_ip_timeout_del(struct bitmap_ip_timeout *map, u32 id) { unsigned long *table = map->members; int ret = -IPSET_ERR_EXIST; @@ -321,14 +327,12 @@ bitmap_ip_timeout_del(struct bitmap_ip_timeout *map, uint32_t id) static int bitmap_ip_timeout_kadt(struct ip_set *set, const struct sk_buff *skb, - enum ipset_adt adt, uint8_t pf, const uint8_t *flags) + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) { struct bitmap_ip_timeout *map = set->data; - uint32_t ip = ntohl(ip4addr(skb, flags)); - - if (pf != AF_INET) - return -EINVAL; + u32 ip; + ip = ntohl(ip4addr(skb, flags & IPSET_DIM_ONE_SRC)); if (ip < map->first_ip || ip > map->last_ip) return -IPSET_ERR_BITMAP_RANGE; @@ -348,12 +352,12 @@ bitmap_ip_timeout_kadt(struct ip_set *set, const struct sk_buff *skb, static int bitmap_ip_timeout_uadt(struct ip_set *set, struct nlattr *head, int len, - enum ipset_adt adt, uint32_t *lineno, uint32_t flags) + enum ipset_adt adt, u32 *lineno, u32 flags) { struct bitmap_ip_timeout *map = set->data; struct nlattr *tb[IPSET_ATTR_ADT_MAX]; bool eexist = flags & IPSET_FLAG_EXIST; - uint32_t ip, ip_to, id, timeout = map->timeout; + u32 ip, ip_to, id, timeout = map->timeout; int ret = 0; if (nla_parse(tb, IPSET_ATTR_ADT_MAX, head, len, @@ -380,10 +384,11 @@ bitmap_ip_timeout_uadt(struct ip_set *set, struct nlattr *head, int len, return -IPSET_ERR_BITMAP_RANGE; } } else if (tb[IPSET_ATTR_CIDR]) { - uint8_t cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); if (cidr > 32) return -IPSET_ERR_INVALID_CIDR; + ip &= HOSTMASK(cidr); ip_to = ip | ~HOSTMASK(cidr); } else ip_to = ip; @@ -391,9 +396,8 @@ bitmap_ip_timeout_uadt(struct ip_set *set, struct nlattr *head, int len, if (ip_to > map->last_ip) return -IPSET_ERR_BITMAP_RANGE; - if (tb[IPSET_ATTR_TIMEOUT]) { - timeout = ip_set_get_h32(tb[IPSET_ATTR_TIMEOUT]); - } + if (tb[IPSET_ATTR_TIMEOUT]) + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); for (; !before(ip_to, ip); ip += map->hosts) { id = ip_to_id((const struct bitmap_ip *)map, ip); @@ -414,11 +418,8 @@ static void bitmap_ip_timeout_destroy(struct ip_set *set) { struct bitmap_ip_timeout *map = set->data; - - /* gc might be running: del_timer_sync can't be used */ - while (!del_timer(&map->gc)) - msleep(IPSET_DESTROY_TIMER_SLEEP); - + + del_timer_sync(&map->gc); ip_set_free(map->members, set->flags); kfree(map); @@ -438,11 +439,6 @@ bitmap_ip_timeout_head(struct ip_set *set, struct sk_buff *skb) { const struct bitmap_ip_timeout *map = set->data; struct nlattr *nested; - uint32_t id, elements; - - for (id = 0, elements = 0; id < map->elements; id++) - if (bitmap_ip_timeout_test(map, id)) - elements++; nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) @@ -452,10 +448,10 @@ bitmap_ip_timeout_head(struct ip_set *set, struct sk_buff *skb) if (map->netmask != 32) NLA_PUT_U8(skb, IPSET_ATTR_NETMASK, map->netmask); NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT , htonl(map->timeout)); - NLA_PUT_NET32(skb, IPSET_ATTR_ELEMENTS, htonl(elements)); NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(atomic_read(&set->ref) - 1)); - NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(map->memsize)); + NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, + htonl(sizeof(*map) + map->memsize)); ipset_nest_end(skb, nested); return 0; @@ -469,7 +465,7 @@ bitmap_ip_timeout_list(struct ip_set *set, { const struct bitmap_ip_timeout *map = set->data; struct nlattr *adt, *nested; - uint32_t id, first = cb->args[2]; + u32 id, first = cb->args[2]; unsigned long *table = map->members; adt = ipset_nest_start(skb, IPSET_ATTR_ADT); @@ -489,8 +485,6 @@ bitmap_ip_timeout_list(struct ip_set *set, } NLA_PUT_NET32(skb, IPSET_ATTR_IP, htonl(map->first_ip + id * map->hosts)); - if (map->netmask != 32) - NLA_PUT_U8(skb, IPSET_ATTR_CIDR, map->netmask); NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(ip_set_timeout_get(table[id]))); ipset_nest_end(skb, nested); @@ -508,6 +502,18 @@ nla_put_failure: return 0; } +static bool +bitmap_ip_timeout_same_set(const struct ip_set *a, const struct ip_set *b) +{ + struct bitmap_ip_timeout *x = a->data; + struct bitmap_ip_timeout *y = b->data; + + return x->first_ip == y->first_ip + && x->last_ip == y->last_ip + && x->netmask == y->netmask + && x->timeout == y->timeout; +} + static const struct ip_set_type_variant bitmap_ip_timeout __read_mostly = { .kadt = bitmap_ip_timeout_kadt, .uadt = bitmap_ip_timeout_uadt, @@ -515,15 +521,16 @@ static const struct ip_set_type_variant bitmap_ip_timeout __read_mostly = { .flush = bitmap_ip_timeout_flush, .head = bitmap_ip_timeout_head, .list = bitmap_ip_timeout_list, + .same_set = bitmap_ip_timeout_same_set, }; static void -bitmap_ip_timeout_gc(unsigned long ul_set) +bitmap_ip_gc(unsigned long ul_set) { struct ip_set *set = (struct ip_set *) ul_set; struct bitmap_ip_timeout *map = set->data; unsigned long *table = map->members; - uint32_t id; + u32 id; /* We run parallel with other readers (test element) * but adding/deleting new entries is locked out */ @@ -544,7 +551,7 @@ bitmap_ip_gc_init(struct ip_set *set) init_timer(&map->gc); map->gc.data = (unsigned long) set; - map->gc.function = bitmap_ip_timeout_gc; + map->gc.function = bitmap_ip_gc; map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; add_timer(&map->gc); } @@ -562,8 +569,8 @@ bitmap_ip_create_policy[IPSET_ATTR_CREATE_MAX+1] __read_mostly = { static bool init_map_ip(struct ip_set *set, struct bitmap_ip *map, - uint32_t first_ip, uint32_t last_ip, - uint32_t elements, uint32_t hosts, uint8_t netmask) + u32 first_ip, u32 last_ip, + u32 elements, u32 hosts, u8 netmask) { map->members = ip_set_alloc(map->memsize, GFP_KERNEL, &set->flags); if (!map->members) @@ -582,11 +589,11 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map, static int bitmap_ip_create(struct ip_set *set, struct nlattr *head, int len, - uint32_t flags) + u32 flags) { struct nlattr *tb[IPSET_ATTR_CREATE_MAX]; - uint32_t first_ip, last_ip, hosts, elements; - uint8_t netmask = 32; + u32 first_ip, last_ip, hosts, elements; + u8 netmask = 32; if (nla_parse(tb, IPSET_ATTR_CREATE_MAX, head, len, bitmap_ip_create_policy)) @@ -600,13 +607,13 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *head, int len, if (tb[IPSET_ATTR_IP_TO]) { last_ip = ip_set_get_h32(tb[IPSET_ATTR_IP_TO]); if (first_ip > last_ip) { - uint32_t tmp = first_ip; + u32 tmp = first_ip; first_ip = last_ip; last_ip = tmp; } } else if (tb[IPSET_ATTR_CIDR]) { - uint8_t cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); if (cidr >= 32) return -IPSET_ERR_INVALID_CIDR; @@ -628,8 +635,8 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *head, int len, hosts = 1; elements = last_ip - first_ip + 1; } else { - uint8_t mask_bits; - uint32_t mask; + u8 mask_bits; + u32 mask; mask = range_to_mask(first_ip, last_ip, &mask_bits); @@ -637,14 +644,14 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *head, int len, || netmask <= mask_bits) return -IPSET_ERR_BITMAP_RANGE; - D("mask_bits %u, netmask %u", mask_bits, netmask); + pr_debug("mask_bits %u, netmask %u", mask_bits, netmask); hosts = 2 << (32 - netmask - 1); elements = 2 << (netmask - mask_bits - 1); } if (elements > IPSET_BITMAP_MAX_RANGE + 1) { return -IPSET_ERR_BITMAP_RANGE_SIZE; } - D("hosts %u, elements %u", hosts, elements); + pr_debug("hosts %u, elements %u", hosts, elements); if (tb[IPSET_ATTR_TIMEOUT]) { struct bitmap_ip_timeout *map; @@ -662,8 +669,7 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *head, int len, return -ENOMEM; } - map->timeout = ip_set_get_h32(tb[IPSET_ATTR_TIMEOUT]); - set->flags |= IP_SET_FLAG_TIMEOUT; + map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); set->variant = &bitmap_ip_timeout; bitmap_ip_gc_init(set); @@ -692,6 +698,7 @@ static struct ip_set_type bitmap_ip_type = { .name = "bitmap:ip", .protocol = IPSET_PROTOCOL, .features = IPSET_TYPE_IP, + .dimension = IPSET_DIM_ONE, .family = AF_INET, .revision = 0, .create = bitmap_ip_create, diff --git a/kernel/ip_set_bitmap_ipmac.c b/kernel/ip_set_bitmap_ipmac.c index 45335dd..d036862 100644 --- a/kernel/ip_set_bitmap_ipmac.c +++ b/kernel/ip_set_bitmap_ipmac.c @@ -10,8 +10,10 @@ /* Kernel module implementing an IP set type: the bitmap:ip,mac type */ +#include #include #include +#include #include #include #include @@ -19,7 +21,6 @@ #include #include #include -#include #include #include #include @@ -40,119 +41,235 @@ enum { MAC_UNSET, /* element is set, without MAC */ }; -/* Member element without and with timeout */ +/* Type structure */ +struct bitmap_ipmac { + void *members; /* the set members */ + u32 first_ip; /* host byte order, included in range */ + u32 last_ip; /* host byte order, included in range */ + u32 timeout; /* timeout value */ + struct timer_list gc; /* garbage collector */ + size_t dsize; /* size of element */ +}; struct ipmac { + u32 id; /* id in array */ + unsigned char *ether; /* ethernet address */ +}; + +/* Member element without and with timeout */ + +struct ipmac_elem { unsigned char ether[ETH_ALEN]; unsigned char match; }; -struct ipmac_timeout { +struct ipmac_telem { unsigned char ether[ETH_ALEN]; unsigned char match; unsigned long timeout; }; -struct bitmap_ipmac { - void *members; /* the set members */ - uint32_t first_ip; /* host byte order, included in range */ - uint32_t last_ip; /* host byte order, included in range */ - uint32_t timeout; /* timeout value */ - struct timer_list gc; /* garbage collector */ - size_t elem_size; /* size of element */ -}; - static inline void * -bitmap_ipmac_elem(const struct bitmap_ipmac *map, uint32_t id) +bitmap_ipmac_elem(const struct bitmap_ipmac *map, u32 id) { - return (void *)((char *)map->members + id * map->elem_size); + return (void *)((char *)map->members + id * map->dsize); } static inline bool -bitmap_timeout(const struct bitmap_ipmac *map, uint32_t id) +bitmap_timeout(const struct bitmap_ipmac *map, u32 id) { - const struct ipmac_timeout *elem = bitmap_ipmac_elem(map, id); + const struct ipmac_telem *elem = bitmap_ipmac_elem(map, id); return ip_set_timeout_test(elem->timeout); } static inline bool -bitmap_expired(const struct bitmap_ipmac *map, uint32_t id) +bitmap_expired(const struct bitmap_ipmac *map, u32 id) { - const struct ipmac_timeout *elem = bitmap_ipmac_elem(map, id); + const struct ipmac_telem *elem = bitmap_ipmac_elem(map, id); return ip_set_timeout_expired(elem->timeout); } static inline int -bitmap_ipmac_exist(const struct ipmac *elem, bool with_timeout) +bitmap_ipmac_exist(const struct ipmac_telem *elem) { - const struct ipmac_timeout *e = (const struct ipmac_timeout *) elem; - return elem->match == MAC_UNSET || (elem->match == MAC_FILLED - && !(with_timeout && ip_set_timeout_expired(e->timeout))); + && !ip_set_timeout_expired(elem->timeout)); } -static inline int -bitmap_ipmac_test(const struct bitmap_ipmac *map, bool with_timeout, - uint32_t id, const unsigned char *ether) +/* Base variant */ + +static int +bitmap_ipmac_test(struct ip_set *set, void *value, + gfp_t gfp_flags, u32 timeout) { - const struct ipmac *elem = bitmap_ipmac_elem(map, id); + const struct bitmap_ipmac *map = set->data; + const struct ipmac *data = value; + const struct ipmac_elem *elem = bitmap_ipmac_elem(map, data->id); switch (elem->match) { case MAC_UNSET: /* Trigger kernel to fill out the ethernet address */ return -EAGAIN; case MAC_FILLED: - return (ether == NULL - || memcmp(ether, elem->ether, ETH_ALEN) == 0) - && (!with_timeout || bitmap_timeout(map, id)); + return data->ether == NULL + || compare_ether_addr(data->ether, elem->ether) == 0; } return 0; } static int -bitmap_ipmac_add(struct bitmap_ipmac *map, bool with_timeout, - uint32_t id, const unsigned char *ether, - uint32_t timeout) +bitmap_ipmac_add(struct ip_set *set, void *value, + gfp_t gfp_flags, u32 timeout) { - struct ipmac *elem = bitmap_ipmac_elem(map, id); - struct ipmac_timeout *e = (struct ipmac_timeout *) elem; + struct bitmap_ipmac *map = set->data; + const struct ipmac *data = value; + struct ipmac_elem *elem = bitmap_ipmac_elem(map, data->id); switch (elem->match) { case MAC_UNSET: - if (!ether) + if (!data->ether) /* Already added without ethernet address */ return -IPSET_ERR_EXIST; /* Fill the MAC address and activate the timer */ - memcpy(elem->ether, ether, ETH_ALEN); + memcpy(elem->ether, data->ether, ETH_ALEN); elem->match = MAC_FILLED; - if (with_timeout) { - if (timeout == map->timeout) - /* Timeout was not specified, get stored one */ - timeout = e->timeout; - e->timeout = ip_set_timeout_set(timeout); + break; + case MAC_FILLED: + return -IPSET_ERR_EXIST; + case MAC_EMPTY: + if (data->ether) { + memcpy(elem->ether, data->ether, ETH_ALEN); + elem->match = MAC_FILLED; + } else + elem->match = MAC_UNSET; + } + + return 0; +} + +static int +bitmap_ipmac_del(struct ip_set *set, void *value, + gfp_t gfp_flags, u32 timeout) +{ + struct bitmap_ipmac *map = set->data; + const struct ipmac *data = value; + struct ipmac_elem *elem = bitmap_ipmac_elem(map, data->id); + + if (elem->match == MAC_EMPTY) + return -IPSET_ERR_EXIST; + + elem->match = MAC_EMPTY; + + return 0; +} + +static int +bitmap_ipmac_list(struct ip_set *set, + struct sk_buff *skb, struct netlink_callback *cb) +{ + const struct bitmap_ipmac *map = set->data; + const struct ipmac_elem *elem; + struct nlattr *atd, *nested; + u32 id, first = cb->args[2]; + u32 last = map->last_ip - map->first_ip; + + atd = ipset_nest_start(skb, IPSET_ATTR_ADT); + if (!atd) + return -EFAULT; + for (; cb->args[2] <= last; cb->args[2]++) { + id = cb->args[2]; + elem = bitmap_ipmac_elem(map, id); + if (elem->match == MAC_EMPTY) + continue; + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) { + if (id == first) { + nla_nest_cancel(skb, atd); + return -EFAULT; + } else + goto nla_put_failure; } + NLA_PUT_NET32(skb, IPSET_ATTR_IP, + htonl(map->first_ip + id)); + if (elem->match == MAC_FILLED) + NLA_PUT(skb, IPSET_ATTR_ETHER, ETH_ALEN, + elem->ether); + ipset_nest_end(skb, nested); + } + ipset_nest_end(skb, atd); + /* Set listing finished */ + cb->args[2] = 0; + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nested); + ipset_nest_end(skb, atd); + return 0; +} + +/* Timeout variant */ + +static int +bitmap_ipmac_ttest(struct ip_set *set, void *value, + gfp_t gfp_flags, u32 timeout) +{ + const struct bitmap_ipmac *map = set->data; + const struct ipmac *data = value; + const struct ipmac_elem *elem = bitmap_ipmac_elem(map, data->id); + + switch (elem->match) { + case MAC_UNSET: + /* Trigger kernel to fill out the ethernet address */ + return -EAGAIN; + case MAC_FILLED: + return (data->ether == NULL + || compare_ether_addr(data->ether, elem->ether) == 0) + && !bitmap_expired(map, data->id); + } + return 0; +} + +static int +bitmap_ipmac_tadd(struct ip_set *set, void *value, + gfp_t gfp_flags, u32 timeout) +{ + struct bitmap_ipmac *map = set->data; + const struct ipmac *data = value; + struct ipmac_telem *elem = bitmap_ipmac_elem(map, data->id); + + switch (elem->match) { + case MAC_UNSET: + if (!data->ether) + /* Already added without ethernet address */ + return -IPSET_ERR_EXIST; + /* Fill the MAC address and activate the timer */ + memcpy(elem->ether, data->ether, ETH_ALEN); + elem->match = MAC_FILLED; + if (timeout == map->timeout) + /* Timeout was not specified, get stored one */ + timeout = elem->timeout; + elem->timeout = ip_set_timeout_set(timeout); break; case MAC_FILLED: - if (!(with_timeout && bitmap_expired(map, id))) + if (bitmap_expired(map, data->id)) return -IPSET_ERR_EXIST; /* Fall through */ case MAC_EMPTY: - if (ether) { - memcpy(elem->ether, ether, ETH_ALEN); + if (data->ether) { + memcpy(elem->ether, data->ether, ETH_ALEN); elem->match = MAC_FILLED; } else elem->match = MAC_UNSET; - if (with_timeout) { - /* If MAC is unset yet, we store plain timeout - * because the timer is not activated yet - * and we can reuse it later when MAC is filled out, - * possibly by the kernel */ - e->timeout = ether ? ip_set_timeout_set(timeout) - : timeout; - } + /* If MAC is unset yet, we store plain timeout + * because the timer is not activated yet + * and we can reuse it later when MAC is filled out, + * possibly by the kernel */ + elem->timeout = data->ether ? ip_set_timeout_set(timeout) + : timeout; break; } @@ -160,13 +277,14 @@ bitmap_ipmac_add(struct bitmap_ipmac *map, bool with_timeout, } static int -bitmap_ipmac_del(struct bitmap_ipmac *map, bool with_timeout, - uint32_t id) +bitmap_ipmac_tdel(struct ip_set *set, void *value, + gfp_t gfp_flags, u32 timeout) { - struct ipmac *elem = bitmap_ipmac_elem(map, id); + struct bitmap_ipmac *map = set->data; + const struct ipmac *data = value; + struct ipmac_telem *elem = bitmap_ipmac_elem(map, data->id); - if (elem->match == MAC_EMPTY - || (with_timeout && bitmap_expired(map, id))) + if (elem->match == MAC_EMPTY || bitmap_expired(map, data->id)) return -IPSET_ERR_EXIST; elem->match = MAC_EMPTY; @@ -174,39 +292,75 @@ bitmap_ipmac_del(struct bitmap_ipmac *map, bool with_timeout, return 0; } +static int +bitmap_ipmac_tlist(struct ip_set *set, + struct sk_buff *skb, struct netlink_callback *cb) +{ + const struct bitmap_ipmac *map = set->data; + const struct ipmac_telem *elem; + struct nlattr *atd, *nested; + u32 id, first = cb->args[2]; + u32 timeout, last = map->last_ip - map->first_ip; + + atd = ipset_nest_start(skb, IPSET_ATTR_ADT); + if (!atd) + return -EFAULT; + for (; cb->args[2] <= last; cb->args[2]++) { + id = cb->args[2]; + elem = bitmap_ipmac_elem(map, id); + if (!bitmap_ipmac_exist(elem)) + continue; + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) { + if (id == first) { + nla_nest_cancel(skb, atd); + return -EFAULT; + } else + goto nla_put_failure; + } + NLA_PUT_NET32(skb, IPSET_ATTR_IP, + htonl(map->first_ip + id)); + if (elem->match == MAC_FILLED) + NLA_PUT(skb, IPSET_ATTR_ETHER, ETH_ALEN, + elem->ether); + timeout = elem->match == MAC_UNSET ? elem->timeout + : ip_set_timeout_get(elem->timeout); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(timeout)); + ipset_nest_end(skb, nested); + } + ipset_nest_end(skb, atd); + /* Set listing finished */ + cb->args[2] = 0; + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nested); + ipset_nest_end(skb, atd); + return 0; +} + static int bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb, - enum ipset_adt adt, uint8_t pf, const uint8_t *flags) + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) { struct bitmap_ipmac *map = set->data; - uint32_t ip = ntohl(ip4addr(skb, flags)); - bool with_timeout = set->flags & IP_SET_FLAG_TIMEOUT; - - if (pf != AF_INET) - return -EINVAL; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct ipmac data; - if (ip < map->first_ip || ip > map->last_ip) + data.id = ntohl(ip4addr(skb, flags & IPSET_DIM_ONE_SRC)); + if (data.id < map->first_ip || data.id > map->last_ip) return -IPSET_ERR_BITMAP_RANGE; + /* Backward compatibility: we don't check the second flag */ if (skb_mac_header(skb) < skb->head || (skb_mac_header(skb) + ETH_HLEN) > skb->data) return -EINVAL; - ip -= map->first_ip; - - switch (adt) { - case IPSET_TEST: - return bitmap_ipmac_test(map, with_timeout, - ip, eth_hdr(skb)->h_source); - case IPSET_ADD: - return bitmap_ipmac_add(map, with_timeout, - ip, eth_hdr(skb)->h_source, - map->timeout); - case IPSET_DEL: - return bitmap_ipmac_del(map, with_timeout, ip); - default: - return -EINVAL; - } + data.id -= map->first_ip; + data.ether = eth_hdr(skb)->h_source; + + return adtfn(set, &data, GFP_ATOMIC, map->timeout); } static const struct nla_policy @@ -218,14 +372,14 @@ bitmap_ipmac_adt_policy[IPSET_ATTR_ADT_MAX + 1] __read_mostly = { static int bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *head, int len, - enum ipset_adt adt, uint32_t *lineno, uint32_t flags) + enum ipset_adt adt, u32 *lineno, u32 flags) { struct bitmap_ipmac *map = set->data; struct nlattr *tb[IPSET_ATTR_ADT_MAX]; + ipset_adtfn adtfn = set->variant->adt[adt]; bool eexist = flags & IPSET_FLAG_EXIST; - bool with_timeout = set->flags & IP_SET_FLAG_TIMEOUT; - uint32_t ip, timeout = map->timeout; - unsigned char *ether = NULL; + struct ipmac data; + u32 timeout = map->timeout; int ret = 0; if (nla_parse(tb, IPSET_ATTR_ADT_MAX, head, len, @@ -233,35 +387,31 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *head, int len, return -IPSET_ERR_PROTOCOL; if (tb[IPSET_ATTR_IP]) - ip = ip_set_get_h32(tb[IPSET_ATTR_IP]); + data.id = ip_set_get_h32(tb[IPSET_ATTR_IP]); else return -IPSET_ERR_PROTOCOL; - if (ip < map->first_ip || ip > map->last_ip) + if (data.id < map->first_ip || data.id > map->last_ip) return -IPSET_ERR_BITMAP_RANGE; if (tb[IPSET_ATTR_ETHER]) - ether = nla_data(tb[IPSET_ATTR_ETHER]); + data.ether = nla_data(tb[IPSET_ATTR_ETHER]); + else + data.ether = NULL; if (tb[IPSET_ATTR_TIMEOUT]) { - if (!with_timeout) + if (!with_timeout(map->timeout)) return -IPSET_ERR_TIMEOUT; - timeout = ip_set_get_h32(tb[IPSET_ATTR_TIMEOUT]); + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); } - ip -= map->first_ip; - - if (adt == IPSET_TEST) - return bitmap_ipmac_test(map, with_timeout, ip, ether); + data.id -= map->first_ip; - ret = adt == IPSET_ADD ? bitmap_ipmac_add(map, with_timeout, - ip, ether, timeout) - : bitmap_ipmac_del(map, with_timeout, ip); + ret = adtfn(set, &data, GFP_KERNEL, timeout); if (ret && !(ret == -IPSET_ERR_EXIST && eexist)) { if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - return ret; } return ret; } @@ -271,10 +421,8 @@ bitmap_ipmac_destroy(struct ip_set *set) { struct bitmap_ipmac *map = set->data; - /* gc might be running: del_timer_sync can't be used */ - if (set->flags & IP_SET_FLAG_TIMEOUT) - while (!del_timer(&map->gc)) - msleep(IPSET_DESTROY_TIMER_SLEEP); + if (with_timeout(map->timeout)) + del_timer_sync(&map->gc); ip_set_free(map->members, set->flags); kfree(map); @@ -288,7 +436,7 @@ bitmap_ipmac_flush(struct ip_set *set) struct bitmap_ipmac *map = set->data; memset(map->members, 0, - (map->last_ip - map->first_ip + 1) * map->elem_size); + (map->last_ip - map->first_ip + 1) * map->dsize); } static int @@ -296,28 +444,18 @@ bitmap_ipmac_head(struct ip_set *set, struct sk_buff *skb) { const struct bitmap_ipmac *map = set->data; struct nlattr *nested; - const struct ipmac *elem; - uint32_t id, elements = 0, last = map->last_ip - map->first_ip; - bool with_timeout = set->flags & IP_SET_FLAG_TIMEOUT; - - for (id = 0; id <= last; id++) { - elem = bitmap_ipmac_elem(map, id); - if (bitmap_ipmac_exist(elem, with_timeout)) - elements++; - } nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) goto nla_put_failure; NLA_PUT_NET32(skb, IPSET_ATTR_IP, htonl(map->first_ip)); NLA_PUT_NET32(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip)); - NLA_PUT_NET32(skb, IPSET_ATTR_ELEMENTS, htonl(elements)); NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(atomic_read(&set->ref) - 1)); NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, - htonl((map->last_ip - map->first_ip + 1) - * map->elem_size)); - if (with_timeout) + htonl(sizeof(*map) + + (map->last_ip - map->first_ip + 1) * map->dsize)); + if (with_timeout(map->timeout)) NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout)); ipset_nest_end(skb, nested); @@ -326,77 +464,54 @@ nla_put_failure: return -EFAULT; } -static int -bitmap_ipmac_list(struct ip_set *set, - struct sk_buff *skb, struct netlink_callback *cb) +static bool +bitmap_ipmac_same_set(const struct ip_set *a, const struct ip_set *b) { - const struct bitmap_ipmac *map = set->data; - const struct ipmac *elem; - struct nlattr *atd, *nested; - uint32_t id, first = cb->args[2]; - uint32_t last = map->last_ip - map->first_ip; - bool with_timeout = set->flags & IP_SET_FLAG_TIMEOUT; - - atd = ipset_nest_start(skb, IPSET_ATTR_ADT); - if (!atd) - return -EFAULT; - for (; cb->args[2] <= last; cb->args[2]++) { - id = cb->args[2]; - elem = bitmap_ipmac_elem(map, id); - if (!bitmap_ipmac_exist(elem, with_timeout)) - continue; - nested = ipset_nest_start(skb, IPSET_ATTR_DATA); - if (!nested) { - if (id == first) { - nla_nest_cancel(skb, atd); - return -EFAULT; - } else - goto nla_put_failure; - } - NLA_PUT_NET32(skb, IPSET_ATTR_IP, - htonl(map->first_ip + id)); - if (elem->match == MAC_FILLED) - NLA_PUT(skb, IPSET_ATTR_ETHER, ETH_ALEN, - elem->ether); - if (with_timeout) { - const struct ipmac_timeout *e = - (const struct ipmac_timeout *)elem; - uint32_t timeout = e->match == MAC_UNSET ? e->timeout - : ip_set_timeout_get(e->timeout); + struct bitmap_ipmac *x = a->data; + struct bitmap_ipmac *y = b->data; - NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, - htonl(timeout)); - } - ipset_nest_end(skb, nested); - } - ipset_nest_end(skb, atd); - /* Set listing finished */ - cb->args[2] = 0; - - return 0; - -nla_put_failure: - nla_nest_cancel(skb, nested); - ipset_nest_end(skb, atd); - return 0; + return x->first_ip == y->first_ip + && x->last_ip == y->last_ip + && x->timeout == y->timeout; } const struct ip_set_type_variant bitmap_ipmac __read_mostly = { .kadt = bitmap_ipmac_kadt, .uadt = bitmap_ipmac_uadt, + .adt = { + [IPSET_ADD] = bitmap_ipmac_add, + [IPSET_DEL] = bitmap_ipmac_del, + [IPSET_TEST] = bitmap_ipmac_test, + }, .destroy = bitmap_ipmac_destroy, .flush = bitmap_ipmac_flush, .head = bitmap_ipmac_head, .list = bitmap_ipmac_list, + .same_set = bitmap_ipmac_same_set, +}; + +const struct ip_set_type_variant bitmap_tipmac __read_mostly = { + .kadt = bitmap_ipmac_kadt, + .uadt = bitmap_ipmac_uadt, + .adt = { + [IPSET_ADD] = bitmap_ipmac_tadd, + [IPSET_DEL] = bitmap_ipmac_tdel, + [IPSET_TEST] = bitmap_ipmac_ttest, + }, + .destroy = bitmap_ipmac_destroy, + .flush = bitmap_ipmac_flush, + .head = bitmap_ipmac_head, + .list = bitmap_ipmac_tlist, + .same_set = bitmap_ipmac_same_set, }; static void -bitmap_ipmac_timeout_gc(unsigned long ul_set) +bitmap_ipmac_gc(unsigned long ul_set) { struct ip_set *set = (struct ip_set *) ul_set; struct bitmap_ipmac *map = set->data; - struct ipmac_timeout *elem; - uint32_t id, last = map->last_ip - map->first_ip; + struct ipmac_telem *elem; + u32 id, last = map->last_ip - map->first_ip; /* We run parallel with other readers (test element) * but adding/deleting new entries is locked out */ @@ -414,13 +529,13 @@ bitmap_ipmac_timeout_gc(unsigned long ul_set) } static inline void -bitmap_ipmac_timeout_gc_init(struct ip_set *set) +bitmap_ipmac_gc_init(struct ip_set *set) { struct bitmap_ipmac *map = set->data; init_timer(&map->gc); map->gc.data = (unsigned long) set; - map->gc.function = bitmap_ipmac_timeout_gc; + map->gc.function = bitmap_ipmac_gc; map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; add_timer(&map->gc); } @@ -436,14 +551,15 @@ bitmap_ipmac_create_policy[IPSET_ATTR_CREATE_MAX+1] __read_mostly = { static bool init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map, - uint32_t first_ip, uint32_t last_ip) + u32 first_ip, u32 last_ip) { - map->members = ip_set_alloc((last_ip - first_ip + 1) * map->elem_size, + map->members = ip_set_alloc((last_ip - first_ip + 1) * map->dsize, GFP_KERNEL, &set->flags); if (!map->members) return false; map->first_ip = first_ip; map->last_ip = last_ip; + map->timeout = IPSET_NO_TIMEOUT; set->data = map; set->family = AF_INET; @@ -453,10 +569,10 @@ init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map, static int bitmap_ipmac_create(struct ip_set *set, struct nlattr *head, int len, - uint32_t flags) + u32 flags) { struct nlattr *tb[IPSET_ATTR_CREATE_MAX]; - uint32_t first_ip, last_ip, elements; + u32 first_ip, last_ip, elements; struct bitmap_ipmac *map; if (nla_parse(tb, IPSET_ATTR_CREATE_MAX, head, len, @@ -471,13 +587,13 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *head, int len, if (tb[IPSET_ATTR_IP_TO]) { last_ip = ip_set_get_h32(tb[IPSET_ATTR_IP_TO]); if (first_ip > last_ip) { - uint32_t tmp = first_ip; + u32 tmp = first_ip; first_ip = last_ip; last_ip = tmp; } } else if (tb[IPSET_ATTR_CIDR]) { - uint8_t cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); if (cidr >= 32) return -IPSET_ERR_INVALID_CIDR; @@ -490,31 +606,32 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *head, int len, if (elements > IPSET_BITMAP_MAX_RANGE + 1) return -IPSET_ERR_BITMAP_RANGE_SIZE; - set->variant = &bitmap_ipmac; - map = kzalloc(sizeof(*map), GFP_KERNEL); if (!map) return -ENOMEM; if (tb[IPSET_ATTR_TIMEOUT]) { - map->elem_size = sizeof(struct ipmac_timeout); + map->dsize = sizeof(struct ipmac_telem); if (!init_map_ipmac(set, map, first_ip, last_ip)) { kfree(map); return -ENOMEM; } - map->timeout = ip_set_get_h32(tb[IPSET_ATTR_TIMEOUT]); - set->flags |= IP_SET_FLAG_TIMEOUT; + map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); - bitmap_ipmac_timeout_gc_init(set); + set->variant = &bitmap_tipmac; + + bitmap_ipmac_gc_init(set); } else { - map->elem_size = sizeof(struct ipmac); + map->dsize = sizeof(struct ipmac_elem); if (!init_map_ipmac(set, map, first_ip, last_ip)) { kfree(map); return -ENOMEM; } + set->variant = &bitmap_ipmac; + } return 0; } @@ -522,7 +639,8 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *head, int len, struct ip_set_type bitmap_ipmac_type = { .name = "bitmap:ip,mac", .protocol = IPSET_PROTOCOL, - .features = IPSET_TYPE_IP, + .features = IPSET_TYPE_IP | IPSET_TYPE_MAC, + .dimension = IPSET_DIM_TWO, .family = AF_INET, .revision = 0, .create = bitmap_ipmac_create, diff --git a/kernel/ip_set_bitmap_port.c b/kernel/ip_set_bitmap_port.c index 3afd031..f3e498a 100644 --- a/kernel/ip_set_bitmap_port.c +++ b/kernel/ip_set_bitmap_port.c @@ -7,6 +7,7 @@ /* Kernel module implementing an IP set type: the bitmap:port type */ +#include #include #include #include @@ -17,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -38,19 +38,19 @@ MODULE_ALIAS("ip_set_bitmap:port"); struct bitmap_port { void *members; /* the set members */ - uint16_t first_port; /* host byte order, included in range */ - uint16_t last_port; /* host byte order, included in range */ + u16 first_port; /* host byte order, included in range */ + u16 last_port; /* host byte order, included in range */ size_t memsize; /* members size */ }; static inline int -bitmap_port_test(const struct bitmap_port *map, uint16_t id) +bitmap_port_test(const struct bitmap_port *map, u16 id) { return !!test_bit(id, map->members); } static inline int -bitmap_port_add(struct bitmap_port *map, uint16_t id) +bitmap_port_add(struct bitmap_port *map, u16 id) { if (test_and_set_bit(id, map->members)) return -IPSET_ERR_EXIST; @@ -59,7 +59,7 @@ bitmap_port_add(struct bitmap_port *map, uint16_t id) } static int -bitmap_port_del(struct bitmap_port *map, uint16_t id) +bitmap_port_del(struct bitmap_port *map, u16 id) { if (!test_and_clear_bit(id, map->members)) return -IPSET_ERR_EXIST; @@ -69,13 +69,13 @@ bitmap_port_del(struct bitmap_port *map, uint16_t id) static int bitmap_port_kadt(struct ip_set *set, const struct sk_buff *skb, - enum ipset_adt adt, uint8_t pf, const uint8_t *flags) + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) { struct bitmap_port *map = set->data; - uint32_t port = get_port(pf, skb, flags); - - if (port == IPSET_INVALID_PORT) - return 0; + u16 port; + + if (!get_port(pf, skb, flags & IPSET_DIM_ONE_SRC, &port)) + return -EINVAL; port = ntohs(port); @@ -105,13 +105,13 @@ bitmap_port_adt_policy[IPSET_ATTR_ADT_MAX+1] __read_mostly = { static int bitmap_port_uadt(struct ip_set *set, struct nlattr *head, int len, - enum ipset_adt adt, uint32_t *lineno, uint32_t flags) + enum ipset_adt adt, u32 *lineno, u32 flags) { struct bitmap_port *map = set->data; struct nlattr *tb[IPSET_ATTR_ADT_MAX]; bool eexist = flags & IPSET_FLAG_EXIST; - uint32_t port; - uint16_t id, port_to; + u32 port; /* wraparound */ + u16 id, port_to; int ret = 0; if (nla_parse(tb, IPSET_ATTR_ADT_MAX, head, len, @@ -183,22 +183,16 @@ bitmap_port_head(struct ip_set *set, struct sk_buff *skb) { struct bitmap_port *map = set->data; struct nlattr *nested; - uint32_t id; - uint16_t elements, last = map->last_port - map->first_port; - - for (id = 0, elements = 0; id <= last; id++) - if (test_bit(id, map->members)) - elements++; nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) goto nla_put_failure; NLA_PUT_NET16(skb, IPSET_ATTR_PORT, htons(map->first_port)); NLA_PUT_NET16(skb, IPSET_ATTR_PORT_TO, htons(map->last_port)); - NLA_PUT_NET32(skb, IPSET_ATTR_ELEMENTS, htonl(elements)); NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(atomic_read(&set->ref) - 1)); - NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(map->memsize)); + NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, + htonl(sizeof(*map) + map->memsize)); ipset_nest_end(skb, nested); return 0; @@ -212,8 +206,8 @@ bitmap_port_list(struct ip_set *set, { struct bitmap_port *map = set->data; struct nlattr *atd, *nested; - uint16_t id, first = cb->args[2]; - uint16_t last = map->last_port - map->first_port; + u16 id, first = cb->args[2]; + u16 last = map->last_port - map->first_port; atd = ipset_nest_start(skb, IPSET_ATTR_ADT); if (!atd) @@ -246,6 +240,16 @@ nla_put_failure: return 0; } +static bool +bitmap_port_same_set(const struct ip_set *a, const struct ip_set *b) +{ + struct bitmap_port *x = a->data; + struct bitmap_port *y = b->data; + + return x->first_port == y->first_port + && x->last_port == y->last_port; +} + const struct ip_set_type_variant bitmap_port __read_mostly = { .kadt = bitmap_port_kadt, .uadt = bitmap_port_uadt, @@ -253,22 +257,23 @@ const struct ip_set_type_variant bitmap_port __read_mostly = { .flush = bitmap_port_flush, .head = bitmap_port_head, .list = bitmap_port_list, + .same_set = bitmap_port_same_set, }; /* Timeout variant */ struct bitmap_port_timeout { void *members; /* the set members */ - uint16_t first_port; /* host byte order, included in range */ - uint16_t last_port; /* host byte order, included in range */ + u16 first_port; /* host byte order, included in range */ + u16 last_port; /* host byte order, included in range */ size_t memsize; /* members size */ - uint32_t timeout; /* timeout parameter */ + u32 timeout; /* timeout parameter */ struct timer_list gc; /* garbage collection */ }; static inline bool -bitmap_port_timeout_test(const struct bitmap_port_timeout *map, uint16_t id) +bitmap_port_timeout_test(const struct bitmap_port_timeout *map, u16 id) { unsigned long *timeout = map->members; @@ -277,7 +282,7 @@ bitmap_port_timeout_test(const struct bitmap_port_timeout *map, uint16_t id) static int bitmap_port_timeout_add(const struct bitmap_port_timeout *map, - uint16_t id, uint32_t timeout) + u16 id, u32 timeout) { unsigned long *table = map->members; @@ -291,7 +296,7 @@ bitmap_port_timeout_add(const struct bitmap_port_timeout *map, static int bitmap_port_timeout_del(const struct bitmap_port_timeout *map, - uint16_t id) + u16 id) { unsigned long *table = map->members; int ret = -IPSET_ERR_EXIST; @@ -305,13 +310,13 @@ bitmap_port_timeout_del(const struct bitmap_port_timeout *map, static int bitmap_port_timeout_kadt(struct ip_set *set, const struct sk_buff *skb, - enum ipset_adt adt, uint8_t pf, const uint8_t *flags) + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) { struct bitmap_port_timeout *map = set->data; - uint32_t port = get_port(pf, skb, flags); - - if (port == IPSET_INVALID_PORT) - return 0; + u16 port; + + if (!get_port(pf, skb, flags & IPSET_DIM_ONE_SRC, &port)) + return -EINVAL; port = ntohs(port); @@ -334,13 +339,13 @@ bitmap_port_timeout_kadt(struct ip_set *set, const struct sk_buff *skb, static int bitmap_port_timeout_uadt(struct ip_set *set, struct nlattr *head, int len, - enum ipset_adt adt, uint32_t *lineno, uint32_t flags) + enum ipset_adt adt, u32 *lineno, u32 flags) { const struct bitmap_port_timeout *map = set->data; struct nlattr *tb[IPSET_ATTR_ADT_MAX]; bool eexist = flags & IPSET_FLAG_EXIST; - uint16_t port_to, id; - uint32_t port, timeout = map->timeout; + u16 id, port_to; + u32 port, timeout = map->timeout; /* wraparound */ int ret = 0; if (nla_parse(tb, IPSET_ATTR_ADT_MAX, head, len, @@ -372,7 +377,7 @@ bitmap_port_timeout_uadt(struct ip_set *set, struct nlattr *head, int len, return -IPSET_ERR_BITMAP_RANGE; if (tb[IPSET_ATTR_TIMEOUT]) - timeout = ip_set_get_h32(tb[IPSET_ATTR_TIMEOUT]); + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); for (; port <= port_to; port++) { id = port - map->first_port; @@ -394,10 +399,7 @@ bitmap_port_timeout_destroy(struct ip_set *set) { struct bitmap_port_timeout *map = set->data; - /* gc might be running: del_timer_sync can't be used */ - while (!del_timer(&map->gc)) - msleep(IPSET_DESTROY_TIMER_SLEEP); - + del_timer_sync(&map->gc); ip_set_free(map->members, set->flags); kfree(map); @@ -417,23 +419,17 @@ bitmap_port_timeout_head(struct ip_set *set, struct sk_buff *skb) { struct bitmap_port_timeout *map = set->data; struct nlattr *nested; - uint32_t id; - uint16_t elements, last = map->last_port - map->first_port; - - for (id = 0, elements = 0; id <= last; id++) - if (bitmap_port_timeout_test(map, id)) - elements++; - + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) goto nla_put_failure; NLA_PUT_NET16(skb, IPSET_ATTR_PORT, htons(map->first_port)); NLA_PUT_NET16(skb, IPSET_ATTR_PORT_TO, htons(map->last_port)); NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT , htonl(map->timeout)); - NLA_PUT_NET32(skb, IPSET_ATTR_ELEMENTS, htonl(elements)); NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(atomic_read(&set->ref) - 1)); - NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(map->memsize)); + NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, + htonl(sizeof(*map) + map->memsize)); ipset_nest_end(skb, nested); return 0; @@ -447,8 +443,8 @@ bitmap_port_timeout_list(struct ip_set *set, { struct bitmap_port_timeout *map = set->data; struct nlattr *adt, *nested; - uint16_t id, first = cb->args[2]; - uint16_t last = map->last_port - map->first_port; + u16 id, first = cb->args[2]; + u16 last = map->last_port - map->first_port; unsigned long *table = map->members; adt = ipset_nest_start(skb, IPSET_ATTR_ADT); @@ -485,6 +481,17 @@ nla_put_failure: return 0; } +static bool +bitmap_port_timeout_same_set(const struct ip_set *a, const struct ip_set *b) +{ + struct bitmap_port_timeout *x = a->data; + struct bitmap_port_timeout *y = b->data; + + return x->first_port == y->first_port + && x->last_port == y->last_port + && x->timeout == y->timeout; +} + const struct ip_set_type_variant bitmap_port_timeout __read_mostly = { .kadt = bitmap_port_timeout_kadt, .uadt = bitmap_port_timeout_uadt, @@ -492,15 +499,17 @@ const struct ip_set_type_variant bitmap_port_timeout __read_mostly = { .flush = bitmap_port_timeout_flush, .head = bitmap_port_timeout_head, .list = bitmap_port_timeout_list, + .same_set = bitmap_port_timeout_same_set, }; static void -bitmap_port_timeout_gc(unsigned long ul_set) +bitmap_port_gc(unsigned long ul_set) { struct ip_set *set = (struct ip_set *) ul_set; struct bitmap_port_timeout *map = set->data; unsigned long *table = map->members; - uint16_t id, last = map->last_port - map->first_port; + u32 id; /* wraparound */ + u16 last = map->last_port - map->first_port; /* We run parallel with other readers (test element) * but adding/deleting new entries is locked out */ @@ -515,13 +524,13 @@ bitmap_port_timeout_gc(unsigned long ul_set) } static inline void -bitmap_port_timeout_gc_init(struct ip_set *set) +bitmap_port_gc_init(struct ip_set *set) { struct bitmap_port_timeout *map = set->data; init_timer(&map->gc); map->gc.data = (unsigned long) set; - map->gc.function = bitmap_port_timeout_gc; + map->gc.function = bitmap_port_gc; map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; add_timer(&map->gc); } @@ -537,7 +546,7 @@ bitmap_port_create_policy[IPSET_ATTR_CREATE_MAX+1] __read_mostly = { static bool init_map_port(struct ip_set *set, struct bitmap_port *map, - uint16_t first_port, uint16_t last_port) + u16 first_port, u16 last_port) { map->members = ip_set_alloc(map->memsize, GFP_KERNEL, &set->flags); if (!map->members) @@ -553,10 +562,10 @@ init_map_port(struct ip_set *set, struct bitmap_port *map, static int bitmap_port_create(struct ip_set *set, struct nlattr *head, int len, - uint32_t flags) + u32 flags) { struct nlattr *tb[IPSET_ATTR_CREATE_MAX]; - uint16_t first_port, last_port; + u16 first_port, last_port; if (nla_parse(tb, IPSET_ATTR_CREATE_MAX, head, len, bitmap_port_create_policy)) @@ -570,7 +579,7 @@ bitmap_port_create(struct ip_set *set, struct nlattr *head, int len, if (tb[IPSET_ATTR_PORT_TO]) { last_port = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); if (first_port > last_port) { - uint16_t tmp = first_port; + u16 tmp = first_port; first_port = last_port; last_port = tmp; @@ -594,11 +603,10 @@ bitmap_port_create(struct ip_set *set, struct nlattr *head, int len, return -ENOMEM; } - map->timeout = ip_set_get_h32(tb[IPSET_ATTR_TIMEOUT]); - set->flags |= IP_SET_FLAG_TIMEOUT; + map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); set->variant = &bitmap_port_timeout; - bitmap_port_timeout_gc_init(set); + bitmap_port_gc_init(set); } else { struct bitmap_port *map; @@ -607,7 +615,7 @@ bitmap_port_create(struct ip_set *set, struct nlattr *head, int len, return -ENOMEM; map->memsize = bitmap_bytes(0, last_port - first_port); - D("memsize: %zu", map->memsize); + pr_debug("memsize: %zu", map->memsize); if (!init_map_port(set, map, first_port, last_port)) { kfree(map); return -ENOMEM; @@ -622,6 +630,7 @@ struct ip_set_type bitmap_port_type = { .name = "bitmap:port", .protocol = IPSET_PROTOCOL, .features = IPSET_TYPE_PORT, + .dimension = IPSET_DIM_ONE, .family = AF_UNSPEC, .revision = 0, .create = bitmap_port_create, diff --git a/kernel/ip_set_hash_ip.c b/kernel/ip_set_hash_ip.c index d99c99b..e5ce6a4 100644 --- a/kernel/ip_set_hash_ip.c +++ b/kernel/ip_set_hash_ip.c @@ -7,6 +7,8 @@ /* Kernel module implementing an IP set type: the hash:ip type */ +#include +#include #include #include #include @@ -19,6 +21,7 @@ #include #include #include +#include #include #include @@ -30,213 +33,125 @@ MODULE_AUTHOR("Jozsef Kadlecsik "); MODULE_DESCRIPTION("hash:ip type of IP sets"); MODULE_ALIAS("ip_set_hash:ip"); -/* Member elements without timeout */ -struct ip4_elem { - uint32_t ip; -}; +/* Type specific function prefix */ +#define TYPE hash_ip -struct ip6_elem { - union nf_inet_addr ip; -}; +static bool +hash_ip_same_set(const struct ip_set *a, const struct ip_set *b); -/* Member elements with timeout support */ -struct ip4_elem_timeout { - uint32_t ip; - unsigned long timeout; -}; +#define hash_ip4_same_set hash_ip_same_set +#define hash_ip6_same_set hash_ip_same_set -struct ip6_elem_timeout { - union nf_inet_addr ip; - unsigned long timeout; -}; +/* The type variant functions: IPv4 */ -/* The hash:ip type structure */ -struct hash_ip { - void *members; /* the set members */ - uint32_t hashsize; /* hash size */ - uint32_t maxelem; /* max number of elements/hashsize */ - uint8_t probes; /* max number of probes */ - uint8_t resize; /* resize factor in percent */ - uint8_t netmask; /* netmask */ - uint32_t timeout; /* timeout value */ - uint32_t elements; /* number of elements */ - struct timer_list gc; /* garbage collector */ - size_t elem_size; /* size of element */ - initval_t initval[0]; /* initvals for jhash_1word */ +/* Member elements without timeout */ +struct hash_ip4_elem { + u32 ip; }; -static inline void * -hash_ip_elem(const struct hash_ip *map, uint32_t id) -{ - return (void *)((char *)map->members + id * map->elem_size); -} - -static inline unsigned long -get_ip4_elem_timeout(const struct ip4_elem *elem) -{ - return ((const struct ip4_elem_timeout *)elem)->timeout; -} - -static inline unsigned long -get_ip6_elem_timeout(const struct ip6_elem *elem) -{ - return ((const struct ip6_elem_timeout *)elem)->timeout; -} - -static inline uint32_t -ip4_hash(struct ip4_elem *elem, initval_t initval, uint32_t hashsize) -{ - return jhash_1word(elem->ip, initval) % hashsize; -} - -static inline uint32_t -ip6_hash(struct ip6_elem *elem, initval_t initval, uint32_t hashsize) -{ - return jhash2((u32 *)&elem->ip, 4, initval) % hashsize; -} +/* Member elements with timeout support */ +struct hash_ip4_telem { + u32 ip; + unsigned long timeout; +}; static inline bool -ip4_cmp(struct ip4_elem *ip1, struct ip4_elem *ip2) +hash_ip4_data_equal(const struct hash_ip4_elem *ip1, + const struct hash_ip4_elem *ip2) { return ip1->ip == ip2->ip; } static inline bool -ip6_cmp(struct ip6_elem *ip1, struct ip6_elem *ip2) -{ - return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6); -} - -static inline bool -ip4_null(struct ip4_elem *elem) +hash_ip4_data_isnull(const struct hash_ip4_elem *elem) { return elem->ip == 0; } -static inline bool -ip6_null(struct ip6_elem *elem) -{ - return ipv6_addr_any(&elem->ip.in6); -} - static inline void -ip4_cpy(struct ip4_elem *dst, const struct ip4_elem *src) +hash_ip4_data_copy(struct hash_ip4_elem *dst, const struct hash_ip4_elem *src) { dst->ip = src->ip; } static inline void -ip6_cpy(struct ip6_elem *dst, const struct ip6_elem *src) +hash_ip4_data_swap(struct hash_ip4_elem *dst, struct hash_ip4_elem *src) { - ipv6_addr_copy(&dst->ip.in6, &src->ip.in6); + swap(dst->ip, src->ip); } -/* Zero valued IP addresses (network order) cannot be stored */ +/* Zero valued IP addresses cannot be stored */ static inline void -ip4_zero_out(struct ip4_elem *elem) +hash_ip4_data_zero_out(struct hash_ip4_elem *elem) { elem->ip = 0; } -static inline void -ip6_zero_out(struct ip6_elem *elem) +static inline bool +hash_ip4_data_list(struct sk_buff *skb, const struct hash_ip4_elem *data) { - ipv6_addr_set(&elem->ip.in6, 0, 0, 0, 0); -} + NLA_PUT_NET32(skb, IPSET_ATTR_IP, data->ip); + return 0; -static inline void -ip6_netmask(union nf_inet_addr *ip, uint8_t prefix) -{ - ip->ip6[0] &= NETMASK6(prefix)[0]; - ip->ip6[1] &= NETMASK6(prefix)[1]; - ip->ip6[2] &= NETMASK6(prefix)[2]; - ip->ip6[3] &= NETMASK6(prefix)[3]; +nla_put_failure: + return 1; } -/* The type variant functions: generic ones */ - -static void -hash_ip_destroy(struct ip_set *set) +static inline bool +hash_ip4_data_tlist(struct sk_buff *skb, const struct hash_ip4_elem *data) { - struct hash_ip *map = set->data; + const struct hash_ip4_telem *tdata = + (const struct hash_ip4_telem *)data; - /* gc might be running: del_timer_sync can't be used */ - if (set->flags & IP_SET_FLAG_TIMEOUT) - while (!del_timer(&map->gc)) - msleep(IPSET_DESTROY_TIMER_SLEEP); - - ip_set_free(map->members, set->flags); - kfree(map); - - set->data = NULL; -} + NLA_PUT_NET32(skb, IPSET_ATTR_IP, tdata->ip); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(tdata->timeout))); -#define hash_ip4_destroy hash_ip_destroy -#define hash_ip6_destroy hash_ip_destroy + return 0; -static void -hash_ip_flush(struct ip_set *set) -{ - struct hash_ip *map = set->data; - - memset(map->members, 0, map->hashsize * map->elem_size); - map->elements = 0; +nla_put_failure: + return 1; } -#define hash_ip4_flush hash_ip_flush -#define hash_ip6_flush hash_ip_flush - -/* IPv4 variant */ - -#define PF 4 -#include "ip_set_hash_ip_src.c" -#undef PF +#define IP_SET_HASH_WITH_NETMASK +#define PF 4 +#define HOST_MASK 32 +#include static int hash_ip4_kadt(struct ip_set *set, const struct sk_buff *skb, - enum ipset_adt adt, uint8_t pf, const uint8_t *flags) + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) { - struct hash_ip *map = set->data; - bool with_timeout = set->flags & IP_SET_FLAG_TIMEOUT; - uint32_t ip; + struct chash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + u32 ip; - if (pf != AF_INET) - return -EINVAL; - - ip4addrptr(skb, flags, &ip); - ip &= NETMASK(map->netmask); + ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &ip); + ip &= NETMASK(h->netmask); if (ip == 0) return -EINVAL; - switch (adt) { - case IPSET_TEST: - return hash_ip4_test(map, with_timeout, - (struct ip4_elem *)&ip); - case IPSET_ADD: - return hash_ip4_add(map, with_timeout, - (struct ip4_elem *)&ip, map->timeout); - case IPSET_DEL: - return hash_ip4_del(map, with_timeout, (struct ip4_elem *)&ip); - default: - BUG(); - } - return 0; + return adtfn(set, &ip, GFP_ATOMIC, h->timeout); } static const struct nla_policy hash_ip4_adt_policy[IPSET_ATTR_ADT_MAX + 1] __read_mostly = { [IPSET_ATTR_IP] = { .type = NLA_U32 }, + [IPSET_ATTR_IP_TO] = { .type = NLA_U32 }, + [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, }; static int hash_ip4_uadt(struct ip_set *set, struct nlattr *head, int len, - enum ipset_adt adt, uint32_t *lineno, uint32_t flags) + enum ipset_adt adt, u32 *lineno, u32 flags) { - struct hash_ip *map = set->data; + struct chash *h = set->data; struct nlattr *tb[IPSET_ATTR_ADT_MAX]; - bool with_timeout = set->flags & IP_SET_FLAG_TIMEOUT; - uint32_t ip, timeout = map->timeout; + bool eexist = flags & IPSET_FLAG_EXIST; + ipset_adtfn adtfn = set->variant->adt[adt]; + u32 ip, nip, ip_to, hosts, timeout = h->timeout; + int ret = 0; if (nla_parse(tb, IPSET_ATTR_ADT_MAX, head, len, hash_ip4_adt_policy)) @@ -247,69 +162,164 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *head, int len, else return -IPSET_ERR_PROTOCOL; - ip &= NETMASK(map->netmask); + ip &= NETMASK(h->netmask); if (ip == 0) return -IPSET_ERR_HASH_ELEM; if (tb[IPSET_ATTR_TIMEOUT]) { - if (!with_timeout) + if (!with_timeout(h->timeout)) return -IPSET_ERR_TIMEOUT; - timeout = ip_set_get_h32(tb[IPSET_ATTR_TIMEOUT]); + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); } - switch (adt) { - case IPSET_TEST: - return hash_ip4_test(map, with_timeout, - (struct ip4_elem *)&ip); - case IPSET_ADD: - return hash_ip4_add(map, with_timeout, - (struct ip4_elem *)&ip, timeout); - case IPSET_DEL: - return hash_ip4_del(map, with_timeout, - (struct ip4_elem *)&ip); - default: - BUG(); + if (adt == IPSET_TEST) + return adtfn(set, &ip, GFP_KERNEL, timeout); + + ip = ntohl(ip); + if (tb[IPSET_ATTR_IP_TO]) { + ip_to = ip_set_get_h32(tb[IPSET_ATTR_IP_TO]); + if (ip > ip_to) + swap(ip, ip_to); + } else if (tb[IPSET_ATTR_CIDR]) { + u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + + if (cidr > 32) + return -IPSET_ERR_INVALID_CIDR; + ip &= HOSTMASK(cidr); + ip_to = ip | ~HOSTMASK(cidr); + } else + ip_to = ip; + + hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1); + + for (; !before(ip_to, ip); ip += hosts) { + nip = htonl(ip); + ret = adtfn(set, &nip, GFP_KERNEL, timeout); + + if (ret && !(ret == -IPSET_ERR_EXIST && eexist)) { + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + return ret; + } } + return ret; +} + +static bool +hash_ip_same_set(const struct ip_set *a, const struct ip_set *b) +{ + struct chash *x = a->data; + struct chash *y = b->data; + + return x->maxelem == y->maxelem + && x->timeout == y->timeout + && x->htable_bits == y->htable_bits /* resizing ? */ + && x->array_size == y->array_size + && x->chain_limit == y->chain_limit + && x->netmask == y->netmask; +} + +/* The type variant functions: IPv6 */ + +struct hash_ip6_elem { + union nf_inet_addr ip; +}; +struct hash_ip6_telem { + union nf_inet_addr ip; + unsigned long timeout; +}; + +static inline bool +hash_ip6_data_equal(const struct hash_ip6_elem *ip1, + const struct hash_ip6_elem *ip2) +{ + return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0; +} + +static inline bool +hash_ip6_data_isnull(const struct hash_ip6_elem *elem) +{ + return ipv6_addr_any(&elem->ip.in6); +} + +static inline void +hash_ip6_data_copy(struct hash_ip6_elem *dst, const struct hash_ip6_elem *src) +{ + ipv6_addr_copy(&dst->ip.in6, &src->ip.in6); +} + +static inline void +hash_ip6_data_swap(struct hash_ip6_elem *dst, struct hash_ip6_elem *src) +{ + struct in6_addr tmp; + + ipv6_addr_copy(&tmp, &dst->ip.in6); + ipv6_addr_copy(&dst->ip.in6, &src->ip.in6); + ipv6_addr_copy(&src->ip.in6, &tmp); +} + +static inline void +hash_ip6_data_zero_out(struct hash_ip6_elem *elem) +{ + ipv6_addr_set(&elem->ip.in6, 0, 0, 0, 0); +} + +static inline void +ip6_netmask(union nf_inet_addr *ip, u8 prefix) +{ + ip->ip6[0] &= NETMASK6(prefix)[0]; + ip->ip6[1] &= NETMASK6(prefix)[1]; + ip->ip6[2] &= NETMASK6(prefix)[2]; + ip->ip6[3] &= NETMASK6(prefix)[3]; +} + +static inline bool +hash_ip6_data_list(struct sk_buff *skb, const struct hash_ip6_elem *data) +{ + NLA_PUT(skb, IPSET_ATTR_IP, sizeof(struct in6_addr), &data->ip); return 0; + +nla_put_failure: + return 1; } -/* IPv6 variants */ +static inline bool +hash_ip6_data_tlist(struct sk_buff *skb, const struct hash_ip6_elem *data) +{ + const struct hash_ip6_telem *e = + (const struct hash_ip6_telem *)data; + + NLA_PUT(skb, IPSET_ATTR_IP, sizeof(struct in6_addr), &e->ip); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(e->timeout))); + return 0; + +nla_put_failure: + return 1; +} -#define PF 6 -#include "ip_set_hash_ip_src.c" #undef PF +#undef HOST_MASK + +#define PF 6 +#define HOST_MASK 128 +#include static int hash_ip6_kadt(struct ip_set *set, const struct sk_buff *skb, - enum ipset_adt adt, uint8_t pf, const uint8_t *flags) + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) { - struct hash_ip *map = set->data; - bool with_timeout = set->flags & IP_SET_FLAG_TIMEOUT; + struct chash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; union nf_inet_addr ip; - if (pf != AF_INET6) - return -EINVAL; - - ip6addrptr(skb, flags, &ip.in6); - ip6_netmask(&ip, map->netmask); + ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &ip.in6); + ip6_netmask(&ip, h->netmask); if (ipv6_addr_any(&ip.in6)) return -EINVAL; - switch (adt) { - case IPSET_TEST: - return hash_ip6_test(map, with_timeout, - (struct ip6_elem *)&ip); - case IPSET_ADD: - return hash_ip6_add(map, with_timeout, - (struct ip6_elem *)&ip, map->timeout); - case IPSET_DEL: - return hash_ip6_del(map, with_timeout, - (struct ip6_elem *)&ip); - default: - BUG(); - } - return 0; + return adtfn(set, &ip, GFP_ATOMIC, h->timeout); } static const struct nla_policy @@ -321,13 +331,13 @@ hash_ip6_adt_policy[IPSET_ATTR_ADT_MAX + 1] __read_mostly = { static int hash_ip6_uadt(struct ip_set *set, struct nlattr *head, int len, - enum ipset_adt adt, uint32_t *lineno, uint32_t flags) + enum ipset_adt adt, u32 *lineno, u32 flags) { - struct hash_ip *map = set->data; + struct chash *h = set->data; struct nlattr *tb[IPSET_ATTR_ADT_MAX]; + ipset_adtfn adtfn = set->variant->adt[adt]; union nf_inet_addr *ip; - bool with_timeout = set->flags & IP_SET_FLAG_TIMEOUT; - uint32_t timeout = map->timeout; + u32 timeout = h->timeout; if (nla_parse(tb, IPSET_ATTR_ADT_MAX, head, len, hash_ip6_adt_policy)) @@ -338,31 +348,17 @@ hash_ip6_uadt(struct ip_set *set, struct nlattr *head, int len, else return -IPSET_ERR_PROTOCOL; - ip6_netmask(ip, map->netmask); + ip6_netmask(ip, h->netmask); if (ipv6_addr_any(&ip->in6)) return -IPSET_ERR_HASH_ELEM; if (tb[IPSET_ATTR_TIMEOUT]) { - if (!with_timeout) + if (!with_timeout(h->timeout)) return -IPSET_ERR_TIMEOUT; - timeout = ip_set_get_h32(tb[IPSET_ATTR_TIMEOUT]); + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); } - switch (adt) { - case IPSET_TEST: - return hash_ip6_test(map, with_timeout, - (struct ip6_elem *)ip); - case IPSET_ADD: - return hash_ip6_add(map, with_timeout, - (struct ip6_elem *)ip, timeout); - case IPSET_DEL: - return hash_ip6_del(map, with_timeout, - (struct ip6_elem *)ip); - default: - BUG(); - } - - return 0; + return adtfn(set, ip, GFP_KERNEL, timeout); } /* Create hash:ip type of sets */ @@ -374,114 +370,84 @@ hash_ip_create_policy[IPSET_ATTR_CREATE_MAX+1] __read_mostly = { [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_NETMASK] = { .type = NLA_U8 }, }; -static bool -init_map_ip(struct ip_set *set, struct hash_ip *map, uint32_t maxelem, - uint32_t probes, uint32_t resize, uint8_t netmask, uint8_t family) -{ - map->members = ip_set_alloc(map->hashsize * map->elem_size, - GFP_KERNEL, &set->flags); - if (!map->members) - return false; - - map->maxelem = maxelem; - map->probes = probes; - map->resize = resize; - map->netmask = netmask; - - set->data = map; - set->family = family; - - return true; -} - static int -hash_ip_create(struct ip_set *set, struct nlattr *head, int len, - uint32_t flags) +hash_ip_create(struct ip_set *set, struct nlattr *head, int len, u32 flags) { struct nlattr *tb[IPSET_ATTR_CREATE_MAX]; - uint32_t hashsize, maxelem; - uint8_t probes, resize, netmask, family, i; - struct hash_ip *map; + u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; + u8 netmask; + struct chash *h; + + if (!(set->family == AF_INET || set->family == AF_INET6)) + return -IPSET_ERR_INVALID_FAMILY; + netmask = set->family == AF_INET ? 32 : 128; + pr_debug("Create set %s with family %s", + set->name, set->family == AF_INET ? "inet" : "inet6"); if (nla_parse(tb, IPSET_ATTR_CREATE_MAX, head, len, hash_ip_create_policy)) return -IPSET_ERR_PROTOCOL; - hashsize = IPSET_DEFAULT_HASHSIZE; - maxelem = IPSET_DEFAULT_MAXELEM; - probes = IPSET_DEFAULT_PROBES; - resize = IPSET_DEFAULT_RESIZE; - family = AF_INET; - - if (tb[IPSET_ATTR_HASHSIZE]) + if (tb[IPSET_ATTR_HASHSIZE]) { hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]); + if (hashsize < IPSET_MIMINAL_HASHSIZE) + hashsize = IPSET_MIMINAL_HASHSIZE; + } if (tb[IPSET_ATTR_MAXELEM]) maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]); - if (tb[IPSET_ATTR_PROBES]) - probes = nla_get_u8(tb[IPSET_ATTR_PROBES]); - - if (tb[IPSET_ATTR_RESIZE]) - resize = nla_get_u8(tb[IPSET_ATTR_RESIZE]); - - if (tb[IPSET_ATTR_FAMILY]) - family = nla_get_u8(tb[IPSET_ATTR_FAMILY]); - if (!(family == AF_INET || family == AF_INET6)) - return -IPSET_ERR_INVALID_FAMILY; - netmask = family == AF_INET ? 32 : 128; - if (tb[IPSET_ATTR_NETMASK]) { netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]); - if ((family == AF_INET && netmask > 32) - || (family == AF_INET6 && netmask > 128)) + if ((set->family == AF_INET && netmask > 32) + || (set->family == AF_INET6 && netmask > 128) + || netmask == 0) return -IPSET_ERR_INVALID_NETMASK; } - map = kzalloc(sizeof(*map) + probes * sizeof(initval_t), GFP_KERNEL); - if (!map) + h = kzalloc(sizeof(*h), GFP_KERNEL); + if (!h) return -ENOMEM; - - map->hashsize = hashsize; - if (tb[IPSET_ATTR_TIMEOUT]) { - map->elem_size = family == AF_INET - ? sizeof(struct ip4_elem_timeout) - : sizeof(struct ip6_elem_timeout); - - if (!init_map_ip(set, map, maxelem, probes, resize, netmask, - family)) { - kfree(map); - return -ENOMEM; - } - map->timeout = ip_set_get_h32(tb[IPSET_ATTR_TIMEOUT]); - set->flags |= IP_SET_FLAG_TIMEOUT; + h->maxelem = maxelem; + h->netmask = netmask; + h->htable_bits = htable_bits(hashsize); + h->array_size = CHASH_DEFAULT_ARRAY_SIZE; + h->chain_limit = CHASH_DEFAULT_CHAIN_LIMIT; + get_random_bytes(&h->initval, sizeof(h->initval)); + h->timeout = IPSET_NO_TIMEOUT; + + h->htable = ip_set_alloc(jhash_size(h->htable_bits) * sizeof(struct slist), + GFP_KERNEL, &set->flags); + if (!h->htable) { + kfree(h); + return -ENOMEM; + } + + set->data = h; + + if (tb[IPSET_ATTR_TIMEOUT]) { + h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); - if (family == AF_INET) + set->variant = set->family == AF_INET + ? &hash_ip4_tvariant : &hash_ip6_tvariant; + + if (set->family == AF_INET) hash_ip4_gc_init(set); else hash_ip6_gc_init(set); } else { - map->elem_size = family == AF_INET - ? sizeof(struct ip4_elem) - : sizeof(struct ip6_elem); - - if (!init_map_ip(set, map, maxelem, probes, resize, netmask, - family)) { - kfree(map); - return -ENOMEM; - } + set->variant = set->family == AF_INET + ? &hash_ip4_variant : &hash_ip6_variant; } - for (i = 0; i < map->probes; i++) - get_random_bytes(((initval_t *) map->initval)+i, - sizeof(initval_t)); - set->variant = family == AF_INET ? &hash_ip4 : &hash_ip6; - D("create %s hashsize %u maxelem %u probes %u resize %u", - set->name, map->hashsize, map->maxelem, map->probes, map->resize); + pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)", + set->name, jhash_size(h->htable_bits), + h->htable_bits, h->maxelem, set->data, h->htable); return 0; } @@ -490,6 +456,7 @@ static struct ip_set_type hash_ip_type = { .name = "hash:ip", .protocol = IPSET_PROTOCOL, .features = IPSET_TYPE_IP, + .dimension = IPSET_DIM_ONE, .family = AF_UNSPEC, .revision = 0, .create = hash_ip_create, diff --git a/kernel/ip_set_hash_ip_src.c b/kernel/ip_set_hash_ip_src.c deleted file mode 100644 index ef0a8ec..0000000 --- a/kernel/ip_set_hash_ip_src.c +++ /dev/null @@ -1,473 +0,0 @@ -/* Copyright (C) 2003-2010 Jozsef Kadlecsik - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#define CONCAT(a, b, c) a##b##c -#define TOKEN(a, b, c) CONCAT(a, b, c) - -/* IPv4/IPv6 dependent function prototypes for hash:ip */ - -#if PF == 4 -#define HOST_MASK 32 -#define NLA_PUT_ADDR(skb, ip) \ - NLA_PUT_NET32(skb, IPSET_ATTR_IP, *(ip)); -#else -#define HOST_MASK 128 -#define NLA_PUT_ADDR(skb, ip) \ - NLA_PUT(skb, IPSET_ATTR_IP, sizeof(struct in6_addr), ip); -#endif - -#define hash_ip_pf_timeout TOKEN(hash_ip, PF, _timeout) -#define hash_ip_pf_expired TOKEN(hash_ip, PF, _expired) -#define hash_ip_pf_elem_test TOKEN(hash_ip, PF, _elem_test) -#define hash_ip_pf_elem_exist TOKEN(hash_ip, PF, _elem_exist) -#define hash_ip_pf_elem_expired TOKEN(hash_ip, PF, _elem_expired) -#define hash_ip_pf_test TOKEN(hash_ip, PF, _test) -#define hash_ip_pf_add TOKEN(hash_ip, PF, _add) -#define hash_ip_pf_readd TOKEN(hash_ip, PF, _readd) -#define hash_ip_pf_del TOKEN(hash_ip, PF, _del) -#define hash_ip_pf_map_expired TOKEN(hash_ip, PF, _map_expired) -#define hash_ip_pf_set_expired TOKEN(hash_ip, PF, _set_expired) -#define hash_ip_pf_head TOKEN(hash_ip, PF, _head) -#define hash_ip_pf_list TOKEN(hash_ip, PF, _list) -#define hash_ip_pf_resize TOKEN(hash_ip, PF, _resize) -#define hash_ip_pf TOKEN(hash_ip, PF , ) -#define hash_ip_pf_kadt TOKEN(hash_ip, PF, _kadt) -#define hash_ip_pf_uadt TOKEN(hash_ip, PF, _uadt) -#define hash_ip_pf_destroy TOKEN(hash_ip, PF, _destroy) -#define hash_ip_pf_flush TOKEN(hash_ip, PF, _flush) -#define hash_ip_pf_timeout_gc TOKEN(hash_ip, PF, _timeout_gc) -#define hash_ip_pf_gc_init TOKEN(hash_ip, PF, _gc_init) -#define ip_pf_hash TOKEN(ip, PF, _hash) -#define ip_pf_cmp TOKEN(ip, PF, _cmp) -#define ip_pf_null TOKEN(ip, PF, _null) -#define ip_pf_cpy TOKEN(ip, PF, _cpy) -#define ip_pf_zero_out TOKEN(ip, PF, _zero_out) -#define ip_pf_elem TOKEN(ip, PF, _elem) -#define ip_pf_elem_timeout TOKEN(ip, PF, _elem_timeout) -#define ip_pf_get_elem_timeout TOKEN(get_ip, PF, _elem_timeout) - -static inline bool -hash_ip_pf_timeout(const struct hash_ip *map, uint32_t id) -{ - struct ip_pf_elem_timeout *elem = hash_ip_elem(map, id); - - return ip_set_timeout_test(elem->timeout); -} - -static inline bool -hash_ip_pf_expired(const struct hash_ip *map, uint32_t id) -{ - struct ip_pf_elem_timeout *elem = hash_ip_elem(map, id); - - return ip_set_timeout_expired(elem->timeout); -} - -static inline bool -hash_ip_pf_elem_test(const struct hash_ip *map, bool with_timeout, - uint32_t id, struct ip_pf_elem * ip) -{ - struct ip_pf_elem *elem = hash_ip_elem(map, id); - - return ip_pf_cmp(elem, ip) - && (!with_timeout || hash_ip_pf_timeout(map, id)); -} - -static inline bool -hash_ip_pf_elem_exist(const struct hash_ip *map, bool with_timeout, - uint32_t id) -{ - struct ip_pf_elem *elem = hash_ip_elem(map, id); - - return !(ip_pf_null(elem) - || (with_timeout && hash_ip_pf_expired(map, id))); -} - -static inline bool -hash_ip_pf_elem_expired(const struct hash_ip *map, bool with_timeout, - uint32_t id) -{ - struct ip_pf_elem *elem = hash_ip_elem(map, id); - - return ip_pf_null(elem) - || (with_timeout && hash_ip_pf_expired(map, id)); -} - -static inline uint32_t -hash_ip_pf_test(const struct hash_ip *map, bool with_timeout, - struct ip_pf_elem * ip) -{ - uint32_t id; - uint8_t i; - - for (i = 0; i < map->probes; i++) { - id = ip_pf_hash(ip, *(map->initval + i), map->hashsize); - if (hash_ip_pf_elem_test(map, with_timeout, id, ip)) - return id + 1; - /* No shortcut - there can be deleted entries. */ - } - return 0; -} - -static void -hash_ip_pf_map_expired(struct hash_ip *map) -{ - struct ip_pf_elem_timeout *table = map->members; - uint32_t i; - - /* We run parallel with other readers (test element) - * but adding/deleting new entries is locked out */ - for (i = 0; i < map->hashsize; i++) - if (ip_set_timeout_expired(table[i].timeout)) { - ip_pf_zero_out((struct ip_pf_elem *)&table[i]); - table[i].timeout = IPSET_ELEM_UNSET; - map->elements--; - } -} - -static inline void -hash_ip_pf_set_expired(struct ip_set *set) -{ - /* We run parallel with other readers (test element) - * but adding/deleting new entries is locked out */ - read_lock_bh(&set->lock); - hash_ip_pf_map_expired(set->data); - read_unlock_bh(&set->lock); -} - -static int -hash_ip_pf_add(struct hash_ip *map, bool with_timeout, - struct ip_pf_elem *ip, uint32_t timeout) -{ - uint32_t id, empty = 0; - uint8_t i; - - if (map->elements >= map->maxelem) { - if (with_timeout) { - hash_ip_pf_map_expired(map); - if (map->elements < map->maxelem) - goto doit; - } - return -IPSET_ERR_HASH_FULL; - } - -doit: - for (i = 0; i < map->probes; i++) { - id = ip_pf_hash(ip, *(map->initval + i), map->hashsize); - if (hash_ip_pf_elem_test(map, with_timeout, id, ip)) - return -IPSET_ERR_EXIST; - if (empty == 0 - && hash_ip_pf_elem_expired(map, with_timeout, id)) - empty = id + 1; - /* There can be deleted entries, must check all slots */ - } - if (!empty) - /* Trigger rehashing */ - return -EAGAIN; - - if (with_timeout) { - struct ip_pf_elem_timeout *e = hash_ip_elem(map, empty - 1); - e->timeout = ip_set_timeout_set(timeout); - D("add with timeout: %u (%lu)", timeout, e->timeout); - ip_pf_cpy((struct ip_pf_elem *)e, ip); - } else { - struct ip_pf_elem *e = hash_ip_elem(map, empty - 1); - ip_pf_cpy(e, ip); - } - map->elements++; - return 0; -} - -static int -hash_ip_pf_readd(struct hash_ip *map, bool with_timeout, struct ip_pf_elem *ip) -{ - uint32_t id, empty = 0; - uint8_t i; - - for (i = 0; empty == 0 && i < map->probes; i++) { - id = ip_pf_hash(ip, *(map->initval + i), map->hashsize); - if (ip_pf_null(hash_ip_elem(map, id))) - empty = id + 1; - } - if (!empty) - /* Trigger rehashing */ - return -EAGAIN; - - if (with_timeout) { - struct ip_pf_elem_timeout *e = hash_ip_elem(map, empty - 1); - e->timeout = ip_pf_get_elem_timeout(ip); - ip_pf_cpy((struct ip_pf_elem *)e, ip); - } else { - struct ip_pf_elem *e = hash_ip_elem(map, empty - 1); - ip_pf_cpy(e, ip); - } - map->elements++; - return 0; -} - -static int -hash_ip_pf_del(struct hash_ip *map, bool with_timeout, struct ip_pf_elem *ip) -{ - struct ip_pf_elem *e; - uint32_t id, found = 0; - uint8_t i; - - for (i = 0; i < map->probes; i++) { - id = ip_pf_hash(ip, *(map->initval + i), map->hashsize); - if (hash_ip_pf_elem_test(map, with_timeout, id, ip)) { - found = id + 1; - break; - } - } - if (!found) - return -IPSET_ERR_EXIST; - - e = hash_ip_elem(map, found - 1); - ip_pf_zero_out(e); - if (with_timeout) - ((struct ip_pf_elem_timeout *)e)->timeout = IPSET_ELEM_UNSET; - - map->elements--; - - return 0; -} - -static int -hash_ip_pf_head(struct ip_set *set, struct sk_buff *skb) -{ - const struct hash_ip *map = set->data; - struct nlattr *nested; - - if (set->flags & IP_SET_FLAG_TIMEOUT) - hash_ip_pf_set_expired(set); - - nested = ipset_nest_start(skb, IPSET_ATTR_DATA); - if (!nested) - goto nla_put_failure; - NLA_PUT_NET32(skb, IPSET_ATTR_HASHSIZE, htonl(map->hashsize)); - NLA_PUT_NET32(skb, IPSET_ATTR_MAXELEM, htonl(map->maxelem)); - if (map->netmask != HOST_MASK) - NLA_PUT_U8(skb, IPSET_ATTR_NETMASK, map->netmask); - NLA_PUT_U8(skb, IPSET_ATTR_PROBES, map->probes); - NLA_PUT_U8(skb, IPSET_ATTR_RESIZE, map->resize); - NLA_PUT_NET32(skb, IPSET_ATTR_ELEMENTS, htonl(map->elements)); - NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, - htonl(atomic_read(&set->ref) - 1)); - NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, - htonl(map->hashsize * map->elem_size)); - if (set->flags & IP_SET_FLAG_TIMEOUT) - NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout)); - ipset_nest_end(skb, nested); - - return 0; -nla_put_failure: - return -EFAULT; -} - -static int -hash_ip_pf_list(struct ip_set *set, - struct sk_buff *skb, struct netlink_callback *cb) -{ - const struct hash_ip *map = set->data; - struct nlattr *atd, *nested; - struct ip_pf_elem *elem; - uint32_t id, first = cb->args[2]; - bool with_timeout = set->flags & IP_SET_FLAG_TIMEOUT; - - atd = ipset_nest_start(skb, IPSET_ATTR_ADT); - if (!atd) - return -EFAULT; - for (; cb->args[2] < map->hashsize; cb->args[2]++) { - id = cb->args[2]; - if (hash_ip_pf_elem_expired(map, with_timeout, id)) - continue; - nested = ipset_nest_start(skb, IPSET_ATTR_DATA); - if (!nested) { - if (id == first) { - nla_nest_cancel(skb, atd); - return -EFAULT; - } else - goto nla_put_failure; - } - elem = hash_ip_elem(map, id); - NLA_PUT_ADDR(skb, &elem->ip); - if (map->netmask != HOST_MASK) - NLA_PUT_U8(skb, IPSET_ATTR_CIDR, map->netmask); - if (with_timeout) { - unsigned long timeout = ip_pf_get_elem_timeout(elem); - D("list with timeout: %u (%lu)", - ip_set_timeout_get(timeout), timeout); - NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, - htonl(ip_set_timeout_get(timeout))); - } - ipset_nest_end(skb, nested); - } - ipset_nest_end(skb, atd); - /* Set listing finished */ - cb->args[2] = 0; - - return 0; - -nla_put_failure: - nla_nest_cancel(skb, nested); - ipset_nest_end(skb, atd); - return 0; -} - -static int -hash_ip_pf_resize(struct ip_set *set, uint8_t retried) -{ - struct hash_ip *map = set->data, *tmp; - void *members; - uint32_t i, hashsize = map->hashsize; - uint8_t oflags, flags = set->flags; - bool with_timeout = flags & IP_SET_FLAG_TIMEOUT; - int ret; - - if (map->resize == 0) - return -IPSET_ERR_HASH_FULL; - - /* Try to cleanup first */ - if (retried == 0 && with_timeout) { - i = map->elements; - hash_ip_pf_set_expired(set); - if (map->elements < i) - return 0; - } - -again: - ret = 0; - - /* Calculate new hash size */ - hashsize += (hashsize * map->resize)/100; - if (hashsize == map->hashsize) - hashsize++; - if (hashsize >= map->maxelem) - return -IPSET_ERR_HASH_FULL; - - printk("Rehashing of set %s triggered: hash grows from %lu to %lu\n", - set->name, - (long unsigned)map->hashsize, - (long unsigned)hashsize); - - tmp = kmalloc(sizeof(struct hash_ip) - + map->probes * sizeof(initval_t), GFP_ATOMIC); - if (!tmp) - return -ENOMEM; - - memcpy(tmp, map, sizeof(*map) + map->probes * sizeof(initval_t)); - tmp->elements = 0; - tmp->hashsize = hashsize; - tmp->members = ip_set_alloc(hashsize * map->elem_size, - GFP_ATOMIC, &flags); - if (!tmp->members) { - kfree(tmp); - return -ENOMEM; - } - - write_lock_bh(&set->lock); - map = set->data; /* Play safe */ - for (i = 0; i < map->hashsize && ret == 0; i++) { - if (hash_ip_pf_elem_exist(map, with_timeout, i)) - ret = hash_ip_pf_readd(tmp, with_timeout, - hash_ip_elem(map, i)); - } - if (ret) { - /* Failure, try again */ - write_unlock_bh(&set->lock); - ip_set_free(tmp->members, flags); - kfree(tmp); - goto again; - } - - /* Success at resizing! */ - members = map->members; - oflags = set->flags; - - map->hashsize = tmp->hashsize; - map->members = tmp->members; - map->elements = tmp->elements; - set->flags = flags; - write_unlock_bh(&set->lock); - - ip_set_free(members, oflags); - kfree(tmp); - - return 0; -} - -static int -hash_ip_pf_kadt(struct ip_set *set, const struct sk_buff * skb, - enum ipset_adt adt, uint8_t pf, const uint8_t *flags); -static int -hash_ip_pf_uadt(struct ip_set *set, struct nlattr *head, int len, - enum ipset_adt adt, uint32_t *lineno, uint32_t flags); - -static const struct ip_set_type_variant hash_ip_pf __read_mostly = { - .kadt = hash_ip_pf_kadt, - .uadt = hash_ip_pf_uadt, - .destroy = hash_ip_pf_destroy, - .flush = hash_ip_pf_flush, - .head = hash_ip_pf_head, - .list = hash_ip_pf_list, - .resize = hash_ip_pf_resize, -}; - -static void -hash_ip_pf_timeout_gc(unsigned long ul_set) -{ - struct ip_set *set = (struct ip_set *) ul_set; - struct hash_ip *map = set->data; - - hash_ip_pf_set_expired(set); - - map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; - add_timer(&map->gc); -} - -static inline void -hash_ip_pf_gc_init(struct ip_set *set) -{ - struct hash_ip *map = set->data; - - init_timer(&map->gc); - map->gc.data = (unsigned long) set; - map->gc.function = hash_ip_pf_timeout_gc; - map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; - add_timer(&map->gc); -} - -#undef HOST_MASK -#undef NLA_PUT_ADDR -#undef hash_ip_pf_timeout -#undef hash_ip_pf_expired -#undef hash_ip_pf_elem_test -#undef hash_ip_pf_elem_exist -#undef hash_ip_pf_elem_expired -#undef hash_ip_pf_test -#undef hash_ip_pf_add -#undef hash_ip_pf_readd -#undef hash_ip_pf_del -#undef hash_ip_pf_map_expired -#undef hash_ip_pf_set_expired -#undef hash_ip_pf_head -#undef hash_ip_pf_list -#undef hash_ip_pf_resize -#undef hash_ip_pf -#undef hash_ip_pf_kadt -#undef hash_ip_pf_uadt -#undef hash_ip_pf_destroy -#undef hash_ip_pf_flush -#undef hash_ip_pf_timeout_gc -#undef hash_ip_pf_gc_init -#undef ip_pf_hash -#undef ip_pf_cmp -#undef ip_pf_null -#undef ip_pf_cpy -#undef ip_pf_zero_out -#undef ip_pf_elem -#undef ip_pf_elem_timeout -#undef ip_pf_get_elem_timeout diff --git a/kernel/ip_set_hash_ipport.c b/kernel/ip_set_hash_ipport.c index 36e68b0..8210f67 100644 --- a/kernel/ip_set_hash_ipport.c +++ b/kernel/ip_set_hash_ipport.c @@ -1,197 +1,455 @@ -/* Copyright (C) 2003-2008 Jozsef Kadlecsik +/* Copyright (C) 2003-2010 Jozsef Kadlecsik * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -/* Kernel module implementing an ip+port hash set */ +/* Kernel module implementing an IP set type: the hash:ip,port type */ +#include +#include #include -#include #include -#include -#include #include -#include #include #include #include #include #include - #include +#include +#include +#include -#include -#include +#include +#include +#include +#include +#include -static int limit = MAX_RANGE; +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_DESCRIPTION("hash:ip,port type of IP sets"); +MODULE_ALIAS("ip_set_hash:ip,port"); + +/* Type specific function prefix */ +#define TYPE hash_ipport + +static bool +hash_ipport_same_set(const struct ip_set *a, const struct ip_set *b); + +#define hash_ipport4_same_set hash_ipport_same_set +#define hash_ipport6_same_set hash_ipport_same_set + +/* The type variant functions: IPv4 */ + +/* Member elements without timeout */ +struct hash_ipport4_elem { + u32 ip; + u16 port; + u16 match; +}; + +/* Member elements with timeout support */ +struct hash_ipport4_telem { + u32 ip; + u16 port; + u16 match; + unsigned long timeout; +}; + +static inline bool +hash_ipport4_data_equal(const struct hash_ipport4_elem *ip1, + const struct hash_ipport4_elem *ip2) +{ + return ip1->ip == ip2->ip && ip1->port == ip2->port; +} -static inline __u32 -ipporthash_id(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t port) +static inline bool +hash_ipport4_data_isnull(const struct hash_ipport4_elem *elem) { - struct ip_set_ipporthash *map = set->data; - __u32 id; - u_int16_t i; - ip_set_ip_t *elem; + return elem->match == 0; +} - ip = pack_ip_port(map, ip, port); - - if (!ip) - return UINT_MAX; - - for (i = 0; i < map->probes; i++) { - id = jhash_ip(map, i, ip) % map->hashsize; - DP("hash key: %u", id); - elem = HARRAY_ELEM(map->members, ip_set_ip_t *, id); - if (*elem == ip) - return id; - /* No shortcut - there can be deleted entries. */ - } - return UINT_MAX; +static inline void +hash_ipport4_data_copy(struct hash_ipport4_elem *dst, + const struct hash_ipport4_elem *src) +{ + dst->ip = src->ip; + dst->port = src->port; + dst->match = 1; } -static inline int -ipporthash_test(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t port) +static inline void +hash_ipport4_data_swap(struct hash_ipport4_elem *dst, + struct hash_ipport4_elem *src) { - struct ip_set_ipporthash *map = set->data; - - if (ip < map->first_ip || ip > map->last_ip) - return -ERANGE; - - return (ipporthash_id(set, ip, port) != UINT_MAX); -} - -#define KADT_CONDITION \ - ip_set_ip_t port; \ - \ - if (flags[1] == 0) \ - return 0; \ - \ - port = get_port(skb, flags++); \ - \ - if (port == INVALID_PORT) \ - return 0; - -UADT(ipporthash, test, req->port) -KADT(ipporthash, test, ipaddr, port) - -static inline int -__ipporthash_add(struct ip_set_ipporthash *map, ip_set_ip_t *ip) -{ - __u32 probe; - u_int16_t i; - ip_set_ip_t *elem, *slot = NULL; - - for (i = 0; i < map->probes; i++) { - probe = jhash_ip(map, i, *ip) % map->hashsize; - elem = HARRAY_ELEM(map->members, ip_set_ip_t *, probe); - if (*elem == *ip) - return -EEXIST; - if (!(slot || *elem)) - slot = elem; - /* There can be deleted entries, must check all slots */ + swap(dst->ip, src->ip); + swap(dst->port, src->port); +} + +static inline void +hash_ipport4_data_zero_out(struct hash_ipport4_elem *elem) +{ + elem->match = 0; +} + +static inline bool +hash_ipport4_data_list(struct sk_buff *skb, + const struct hash_ipport4_elem *data) +{ + NLA_PUT_NET32(skb, IPSET_ATTR_IP, data->ip); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); + return 0; + +nla_put_failure: + return 1; +} + +static inline bool +hash_ipport4_data_tlist(struct sk_buff *skb, + const struct hash_ipport4_elem *data) +{ + const struct hash_ipport4_telem *tdata = + (const struct hash_ipport4_telem *)data; + + NLA_PUT_NET32(skb, IPSET_ATTR_IP, tdata->ip); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, tdata->port); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(tdata->timeout))); + + return 0; + +nla_put_failure: + return 1; +} + +#define PF 4 +#define HOST_MASK 32 +#include + +static int +hash_ipport4_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) +{ + struct chash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipport4_elem data = {}; + + ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip); + if (!get_port(AF_INET, skb, flags & IPSET_DIM_TWO_SRC, &data.port)) + return -EINVAL; + + return adtfn(set, &data, GFP_ATOMIC, h->timeout); +} + +static const struct nla_policy +hash_ipport4_adt_policy[IPSET_ATTR_ADT_MAX + 1] __read_mostly = { + [IPSET_ATTR_IP] = { .type = NLA_U32 }, + [IPSET_ATTR_PORT] = { .type = NLA_U16 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, +}; + +static int +hash_ipport4_uadt(struct ip_set *set, struct nlattr *head, int len, + enum ipset_adt adt, u32 *lineno, u32 flags) +{ + struct chash *h = set->data; + struct nlattr *tb[IPSET_ATTR_ADT_MAX]; + bool eexist = flags & IPSET_FLAG_EXIST; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipport4_elem data = {}; + u32 timeout = h->timeout; + int ret; + + if (nla_parse(tb, IPSET_ATTR_ADT_MAX, head, len, + hash_ipport4_adt_policy)) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_IP]) + data.ip = ip_set_get_n32(tb[IPSET_ATTR_IP]); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_PORT]) + data.port = ip_set_get_n16(tb[IPSET_ATTR_PORT]); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout(h->timeout)) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); } - if (slot) { - *slot = *ip; - map->elements++; - return 0; + + ret = adtfn(set, &data, GFP_KERNEL, timeout); + + if (ret && !(ret == -IPSET_ERR_EXIST && eexist)) { + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); } - /* Trigger rehashing */ - return -EAGAIN; + return ret; } -static inline int -ipporthash_add(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t port) +static bool +hash_ipport_same_set(const struct ip_set *a, const struct ip_set *b) { - struct ip_set_ipporthash *map = set->data; - if (map->elements > limit) - return -ERANGE; - if (ip < map->first_ip || ip > map->last_ip) - return -ERANGE; + struct chash *x = a->data; + struct chash *y = b->data; + + return x->maxelem == y->maxelem + && x->timeout == y->timeout + && x->htable_bits == y->htable_bits /* resizing ? */ + && x->array_size == y->array_size + && x->chain_limit == y->chain_limit; +} - ip = pack_ip_port(map, ip, port); +/* The type variant functions: IPv6 */ - if (!ip) - return -ERANGE; - - return __ipporthash_add(map, &ip); +struct hash_ipport6_elem { + union nf_inet_addr ip; + u16 port; + u16 match; +}; + +struct hash_ipport6_telem { + union nf_inet_addr ip; + u16 port; + u16 match; + unsigned long timeout; +}; + +static inline bool +hash_ipport6_data_equal(const struct hash_ipport6_elem *ip1, + const struct hash_ipport6_elem *ip2) +{ + return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 + && ip1->port == ip2->port; } -UADT(ipporthash, add, req->port) -KADT(ipporthash, add, ipaddr, port) +static inline bool +hash_ipport6_data_isnull(const struct hash_ipport6_elem *elem) +{ + return elem->match == 0; +} static inline void -__ipporthash_retry(struct ip_set_ipporthash *tmp, - struct ip_set_ipporthash *map) +hash_ipport6_data_copy(struct hash_ipport6_elem *dst, + const struct hash_ipport6_elem *src) { - tmp->first_ip = map->first_ip; - tmp->last_ip = map->last_ip; + memcpy(dst, src, sizeof(*dst)); + dst->match = 1; } -HASH_RETRY(ipporthash, ip_set_ip_t) - -static inline int -ipporthash_del(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t port) +static inline void +hash_ipport6_data_swap(struct hash_ipport6_elem *dst, + struct hash_ipport6_elem *src) { - struct ip_set_ipporthash *map = set->data; - ip_set_ip_t id; - ip_set_ip_t *elem; + struct hash_ipport6_elem tmp; - if (ip < map->first_ip || ip > map->last_ip) - return -ERANGE; + memcpy(&tmp, dst, sizeof(tmp)); + memcpy(dst, src, sizeof(tmp)); + memcpy(src, &tmp, sizeof(tmp)); +} - id = ipporthash_id(set, ip, port); +static inline void +hash_ipport6_data_zero_out(struct hash_ipport6_elem *elem) +{ + elem->match = 0; +} - if (id == UINT_MAX) - return -EEXIST; - - elem = HARRAY_ELEM(map->members, ip_set_ip_t *, id); - *elem = 0; - map->elements--; +static inline bool +hash_ipport6_data_list(struct sk_buff *skb, + const struct hash_ipport6_elem *data) +{ + NLA_PUT(skb, IPSET_ATTR_IP, sizeof(struct in6_addr), &data->ip); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); + return 0; +nla_put_failure: + return 1; +} + +static inline bool +hash_ipport6_data_tlist(struct sk_buff *skb, + const struct hash_ipport6_elem *data) +{ + const struct hash_ipport6_telem *e = + (const struct hash_ipport6_telem *)data; + + NLA_PUT(skb, IPSET_ATTR_IP, sizeof(struct in6_addr), &e->ip); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(e->timeout))); return 0; + +nla_put_failure: + return 1; } -UADT(ipporthash, del, req->port) -KADT(ipporthash, del, ipaddr, port) +#undef PF +#undef HOST_MASK + +#define PF 6 +#define HOST_MASK 128 +#include + +static int +hash_ipport6_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) +{ + struct chash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipport6_elem data = {}; + + ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip.in6); + if (!get_port(AF_INET, skb, flags & IPSET_DIM_TWO_SRC, &data.port)) + return -EINVAL; + + return adtfn(set, &data, GFP_ATOMIC, h->timeout); +} -static inline int -__ipporthash_create(const struct ip_set_req_ipporthash_create *req, - struct ip_set_ipporthash *map) +static const struct nla_policy +hash_ipport6_adt_policy[IPSET_ATTR_ADT_MAX + 1] __read_mostly = { + [IPSET_ATTR_IP] = { .type = NLA_BINARY, + .len = sizeof(struct in6_addr) }, + [IPSET_ATTR_PORT] = { .type = NLA_U16 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, +}; + +static int +hash_ipport6_uadt(struct ip_set *set, struct nlattr *head, int len, + enum ipset_adt adt, u32 *lineno, u32 flags) { - if (req->to - req->from > MAX_RANGE) { - ip_set_printk("range too big, %d elements (max %d)", - req->to - req->from + 1, MAX_RANGE+1); - return -ENOEXEC; + struct chash *h = set->data; + struct nlattr *tb[IPSET_ATTR_ADT_MAX]; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipport6_elem data = {}; + u32 timeout = h->timeout; + + if (nla_parse(tb, IPSET_ATTR_ADT_MAX, head, len, + hash_ipport6_adt_policy)) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_IP]) + memcpy(&data.ip, nla_data(tb[IPSET_ATTR_IP]), + sizeof(struct in6_addr)); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_PORT]) + data.port = ip_set_get_n16(tb[IPSET_ATTR_PORT]); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout(h->timeout)) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); } - map->first_ip = req->from; - map->last_ip = req->to; - return 0; + + return adtfn(set, &data, GFP_KERNEL, timeout); } -HASH_CREATE(ipporthash, ip_set_ip_t) -HASH_DESTROY(ipporthash) -HASH_FLUSH(ipporthash, ip_set_ip_t) +/* Create hash:ip type of sets */ -static inline void -__ipporthash_list_header(const struct ip_set_ipporthash *map, - struct ip_set_req_ipporthash_create *header) +static const struct nla_policy +hash_ipport_create_policy[IPSET_ATTR_CREATE_MAX+1] __read_mostly = { + [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, + [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, + [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, +}; + +static int +hash_ipport_create(struct ip_set *set, struct nlattr *head, int len, u32 flags) { - header->from = map->first_ip; - header->to = map->last_ip; -} + struct nlattr *tb[IPSET_ATTR_CREATE_MAX]; + u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; + struct chash *h; -HASH_LIST_HEADER(ipporthash) -HASH_LIST_MEMBERS_SIZE(ipporthash, ip_set_ip_t) -HASH_LIST_MEMBERS(ipporthash, ip_set_ip_t) + if (!(set->family == AF_INET || set->family == AF_INET6)) + return -IPSET_ERR_INVALID_FAMILY; -IP_SET_RTYPE(ipporthash, IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_DATA_DOUBLE) + if (nla_parse(tb, IPSET_ATTR_CREATE_MAX, head, len, + hash_ipport_create_policy)) + return -IPSET_ERR_PROTOCOL; -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Jozsef Kadlecsik "); -MODULE_DESCRIPTION("ipporthash type of IP sets"); -module_param(limit, int, 0600); -MODULE_PARM_DESC(limit, "maximal number of elements stored in the sets"); + if (tb[IPSET_ATTR_HASHSIZE]) { + hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]); + if (hashsize < IPSET_MIMINAL_HASHSIZE) + hashsize = IPSET_MIMINAL_HASHSIZE; + } + + if (tb[IPSET_ATTR_MAXELEM]) + maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]); + + h = kzalloc(sizeof(*h), GFP_KERNEL); + if (!h) + return -ENOMEM; + + h->maxelem = maxelem; + h->htable_bits = htable_bits(hashsize); + h->array_size = CHASH_DEFAULT_ARRAY_SIZE; + h->chain_limit = CHASH_DEFAULT_CHAIN_LIMIT; + get_random_bytes(&h->initval, sizeof(h->initval)); + h->timeout = IPSET_NO_TIMEOUT; + + h->htable = ip_set_alloc(jhash_size(h->htable_bits) * sizeof(struct slist), + GFP_KERNEL, &set->flags); + if (!h->htable) { + kfree(h); + return -ENOMEM; + } + + set->data = h; + + if (tb[IPSET_ATTR_TIMEOUT]) { + h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + + set->variant = set->family == AF_INET + ? &hash_ipport4_tvariant : &hash_ipport6_tvariant; + + if (set->family == AF_INET) + hash_ipport4_gc_init(set); + else + hash_ipport6_gc_init(set); + } else { + set->variant = set->family == AF_INET + ? &hash_ipport4_variant : &hash_ipport6_variant; + } + + pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)", + set->name, jhash_size(h->htable_bits), + h->htable_bits, h->maxelem, set->data, h->htable); + + return 0; +} + +static struct ip_set_type hash_ipport_type = { + .name = "hash:ip,port", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_IP | IPSET_TYPE_PORT, + .dimension = IPSET_DIM_TWO, + .family = AF_UNSPEC, + .revision = 0, + .create = hash_ipport_create, + .me = THIS_MODULE, +}; + +static int __init +hash_ipport_init(void) +{ + return ip_set_type_register(&hash_ipport_type); +} + +static void __exit +hash_ipport_fini(void) +{ + ip_set_type_unregister(&hash_ipport_type); +} -REGISTER_MODULE(ipporthash) +module_init(hash_ipport_init); +module_exit(hash_ipport_fini); diff --git a/kernel/ip_set_hash_ipportip.c b/kernel/ip_set_hash_ipportip.c index 8b8f2a2..fbf2780 100644 --- a/kernel/ip_set_hash_ipportip.c +++ b/kernel/ip_set_hash_ipportip.c @@ -1,215 +1,485 @@ -/* Copyright (C) 2008 Jozsef Kadlecsik +/* Copyright (C) 2003-2010 Jozsef Kadlecsik * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -/* Kernel module implementing an ip+port+ip hash set */ +/* Kernel module implementing an IP set type: the hash:ip,port,ip type */ +#include +#include #include -#include #include -#include -#include #include -#include #include #include #include #include #include - #include +#include +#include +#include -#include -#include +#include +#include +#include +#include +#include -static int limit = MAX_RANGE; +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_DESCRIPTION("hash:ip,port,ip type of IP sets"); +MODULE_ALIAS("ip_set_hash:ip,port,ip"); + +/* Type specific function prefix */ +#define TYPE hash_ipportip + +static bool +hash_ipportip_same_set(const struct ip_set *a, const struct ip_set *b); + +#define hash_ipportip4_same_set hash_ipportip_same_set +#define hash_ipportip6_same_set hash_ipportip_same_set + +/* The type variant functions: IPv4 */ + +/* Member elements without timeout */ +struct hash_ipportip4_elem { + u32 ip; + u32 ip2; + u16 port; + u16 match; +}; + +/* Member elements with timeout support */ +struct hash_ipportip4_telem { + u32 ip; + u32 ip2; + u16 port; + u16 match; + unsigned long timeout; +}; + +static inline bool +hash_ipportip4_data_equal(const struct hash_ipportip4_elem *ip1, + const struct hash_ipportip4_elem *ip2) +{ + return ip1->ip == ip2->ip + && ip1->ip2 == ip2->ip2 + && ip1->port == ip2->port; +} -#define jhash_ip2(map, i, ipport, ip1) \ - jhash_2words(ipport, ip1, *(map->initval + i)) +static inline bool +hash_ipportip4_data_isnull(const struct hash_ipportip4_elem *elem) +{ + return elem->match == 0; +} -static inline __u32 -ipportiphash_id(struct ip_set *set, - ip_set_ip_t ip, ip_set_ip_t port, ip_set_ip_t ip1) +static inline void +hash_ipportip4_data_copy(struct hash_ipportip4_elem *dst, + const struct hash_ipportip4_elem *src) { - struct ip_set_ipportiphash *map = set->data; - __u32 id; - u_int16_t i; - struct ipportip *elem; + memcpy(dst, src, sizeof(*dst)); + dst->match = 1; +} - ip = pack_ip_port(map, ip, port); - if (!(ip || ip1)) - return UINT_MAX; - - for (i = 0; i < map->probes; i++) { - id = jhash_ip2(map, i, ip, ip1) % map->hashsize; - DP("hash key: %u", id); - elem = HARRAY_ELEM(map->members, struct ipportip *, id); - if (elem->ip == ip && elem->ip1 == ip1) - return id; - /* No shortcut - there can be deleted entries. */ - } - return UINT_MAX; +static inline void +hash_ipportip4_data_swap(struct hash_ipportip4_elem *dst, + struct hash_ipportip4_elem *src) +{ + struct hash_ipportip4_elem tmp; + + memcpy(&tmp, dst, sizeof(tmp)); + memcpy(dst, src, sizeof(tmp)); + memcpy(src, &tmp, sizeof(tmp)); } -static inline int -ipportiphash_test(struct ip_set *set, - ip_set_ip_t ip, ip_set_ip_t port, ip_set_ip_t ip1) +static inline void +hash_ipportip4_data_zero_out(struct hash_ipportip4_elem *elem) { - struct ip_set_ipportiphash *map = set->data; + elem->match = 0; +} + +static inline bool +hash_ipportip4_data_list(struct sk_buff *skb, + const struct hash_ipportip4_elem *data) +{ + NLA_PUT_NET32(skb, IPSET_ATTR_IP, data->ip); + NLA_PUT_NET32(skb, IPSET_ATTR_IP2, data->ip2); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); + return 0; + +nla_put_failure: + return 1; +} + +static inline bool +hash_ipportip4_data_tlist(struct sk_buff *skb, + const struct hash_ipportip4_elem *data) +{ + const struct hash_ipportip4_telem *tdata = + (const struct hash_ipportip4_telem *)data; + + NLA_PUT_NET32(skb, IPSET_ATTR_IP, tdata->ip); + NLA_PUT_NET32(skb, IPSET_ATTR_IP2, tdata->ip2); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, tdata->port); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(tdata->timeout))); + + return 0; + +nla_put_failure: + return 1; +} + +#define PF 4 +#define HOST_MASK 32 +#include + +static int +hash_ipportip4_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) +{ + struct chash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipportip4_elem data = {}; - if (ip < map->first_ip || ip > map->last_ip) - return -ERANGE; - - return (ipportiphash_id(set, ip, port, ip1) != UINT_MAX); -} - -#define KADT_CONDITION \ - ip_set_ip_t port, ip1; \ - \ - if (flags[2] == 0) \ - return 0; \ - \ - port = get_port(skb, flags++); \ - ip1 = ipaddr(skb, flags++); \ - \ - if (port == INVALID_PORT) \ - return 0; - -UADT(ipportiphash, test, req->port, req->ip1) -KADT(ipportiphash, test, ipaddr, port, ip1) - -static inline int -__ipportip_add(struct ip_set_ipportiphash *map, - ip_set_ip_t ip, ip_set_ip_t ip1) -{ - __u32 probe; - u_int16_t i; - struct ipportip *elem, *slot = NULL; - - for (i = 0; i < map->probes; i++) { - probe = jhash_ip2(map, i, ip, ip1) % map->hashsize; - elem = HARRAY_ELEM(map->members, struct ipportip *, probe); - if (elem->ip == ip && elem->ip1 == ip1) - return -EEXIST; - if (!(slot || elem->ip || elem->ip1)) - slot = elem; - /* There can be deleted entries, must check all slots */ + ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip); + if (!get_port(AF_INET, skb, flags & IPSET_DIM_TWO_SRC, &data.port)) + return -EINVAL; + ip4addrptr(skb, flags & IPSET_DIM_THREE_SRC, &data.ip2); + + return adtfn(set, &data, GFP_ATOMIC, h->timeout); +} + +static const struct nla_policy +hash_ipportip4_adt_policy[IPSET_ATTR_ADT_MAX + 1] __read_mostly = { + [IPSET_ATTR_IP] = { .type = NLA_U32 }, + [IPSET_ATTR_IP2] = { .type = NLA_U32 }, + [IPSET_ATTR_PORT] = { .type = NLA_U16 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, +}; + +static int +hash_ipportip4_uadt(struct ip_set *set, struct nlattr *head, int len, + enum ipset_adt adt, u32 *lineno, u32 flags) +{ + struct chash *h = set->data; + struct nlattr *tb[IPSET_ATTR_ADT_MAX]; + bool eexist = flags & IPSET_FLAG_EXIST; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipportip4_elem data = {}; + u32 timeout = h->timeout; + int ret; + + if (nla_parse(tb, IPSET_ATTR_ADT_MAX, head, len, + hash_ipportip4_adt_policy)) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_IP]) + data.ip = ip_set_get_n32(tb[IPSET_ATTR_IP]); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_IP2]) + data.ip2 = ip_set_get_n32(tb[IPSET_ATTR_IP2]); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_PORT]) + data.port = ip_set_get_n16(tb[IPSET_ATTR_PORT]); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout(h->timeout)) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); } - if (slot) { - slot->ip = ip; - slot->ip1 = ip1; - map->elements++; - return 0; + + ret = adtfn(set, &data, GFP_KERNEL, timeout); + + if (ret && !(ret == -IPSET_ERR_EXIST && eexist)) { + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); } - /* Trigger rehashing */ - return -EAGAIN; + return ret; } -static inline int -__ipportiphash_add(struct ip_set_ipportiphash *map, - struct ipportip *elem) +static bool +hash_ipportip_same_set(const struct ip_set *a, const struct ip_set *b) { - return __ipportip_add(map, elem->ip, elem->ip1); + struct chash *x = a->data; + struct chash *y = b->data; + + return x->maxelem == y->maxelem + && x->timeout == y->timeout + && x->htable_bits == y->htable_bits /* resizing ? */ + && x->array_size == y->array_size + && x->chain_limit == y->chain_limit; } -static inline int -ipportiphash_add(struct ip_set *set, - ip_set_ip_t ip, ip_set_ip_t port, ip_set_ip_t ip1) +/* The type variant functions: IPv6 */ + +struct hash_ipportip6_elem { + union nf_inet_addr ip; + union nf_inet_addr ip2; + u16 port; + u16 match; +}; + +struct hash_ipportip6_telem { + union nf_inet_addr ip; + union nf_inet_addr ip2; + u16 port; + u16 match; + unsigned long timeout; +}; + +static inline bool +hash_ipportip6_data_equal(const struct hash_ipportip6_elem *ip1, + const struct hash_ipportip6_elem *ip2) { - struct ip_set_ipportiphash *map = set->data; - - if (map->elements > limit) - return -ERANGE; - if (ip < map->first_ip || ip > map->last_ip) - return -ERANGE; - - ip = pack_ip_port(map, ip, port); - if (!(ip || ip1)) - return -ERANGE; - - return __ipportip_add(map, ip, ip1); + return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 + && ipv6_addr_cmp(&ip1->ip2.in6, &ip2->ip2.in6) == 0 + && ip1->port == ip2->port; } -UADT(ipportiphash, add, req->port, req->ip1) -KADT(ipportiphash, add, ipaddr, port, ip1) +static inline bool +hash_ipportip6_data_isnull(const struct hash_ipportip6_elem *elem) +{ + return elem->match == 0; +} static inline void -__ipportiphash_retry(struct ip_set_ipportiphash *tmp, - struct ip_set_ipportiphash *map) +hash_ipportip6_data_copy(struct hash_ipportip6_elem *dst, + const struct hash_ipportip6_elem *src) { - tmp->first_ip = map->first_ip; - tmp->last_ip = map->last_ip; + memcpy(dst, src, sizeof(*dst)); + dst->match = 1; } -HASH_RETRY2(ipportiphash, struct ipportip) - -static inline int -ipportiphash_del(struct ip_set *set, - ip_set_ip_t ip, ip_set_ip_t port, ip_set_ip_t ip1) +static inline void +hash_ipportip6_data_swap(struct hash_ipportip6_elem *dst, + struct hash_ipportip6_elem *src) { - struct ip_set_ipportiphash *map = set->data; - ip_set_ip_t id; - struct ipportip *elem; + struct hash_ipportip6_elem tmp; + + memcpy(&tmp, dst, sizeof(tmp)); + memcpy(dst, src, sizeof(tmp)); + memcpy(src, &tmp, sizeof(tmp)); +} - if (ip < map->first_ip || ip > map->last_ip) - return -ERANGE; +static inline void +hash_ipportip6_data_zero_out(struct hash_ipportip6_elem *elem) +{ + elem->match = 0; +} - id = ipportiphash_id(set, ip, port, ip1); +static inline bool +hash_ipportip6_data_list(struct sk_buff *skb, + const struct hash_ipportip6_elem *data) +{ + NLA_PUT(skb, IPSET_ATTR_IP, sizeof(struct in6_addr), &data->ip); + NLA_PUT(skb, IPSET_ATTR_IP2, sizeof(struct in6_addr), &data->ip2); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); + return 0; - if (id == UINT_MAX) - return -EEXIST; - - elem = HARRAY_ELEM(map->members, struct ipportip *, id); - elem->ip = elem->ip1 = 0; - map->elements--; +nla_put_failure: + return 1; +} +static inline bool +hash_ipportip6_data_tlist(struct sk_buff *skb, + const struct hash_ipportip6_elem *data) +{ + const struct hash_ipportip6_telem *e = + (const struct hash_ipportip6_telem *)data; + + NLA_PUT(skb, IPSET_ATTR_IP, sizeof(struct in6_addr), &e->ip); + NLA_PUT(skb, IPSET_ATTR_IP2, sizeof(struct in6_addr), &data->ip2); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(e->timeout))); return 0; + +nla_put_failure: + return 1; } -UADT(ipportiphash, del, req->port, req->ip1) -KADT(ipportiphash, del, ipaddr, port, ip1) +#undef PF +#undef HOST_MASK + +#define PF 6 +#define HOST_MASK 128 +#include + +static int +hash_ipportip6_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) +{ + struct chash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipportip6_elem data = {}; + + ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip.in6); + if (!get_port(AF_INET, skb, flags & IPSET_DIM_TWO_SRC, &data.port)) + return -EINVAL; + ip6addrptr(skb, flags & IPSET_DIM_THREE_SRC, &data.ip2.in6); + + return adtfn(set, &data, GFP_ATOMIC, h->timeout); +} -static inline int -__ipportiphash_create(const struct ip_set_req_ipportiphash_create *req, - struct ip_set_ipportiphash *map) +static const struct nla_policy +hash_ipportip6_adt_policy[IPSET_ATTR_ADT_MAX + 1] __read_mostly = { + [IPSET_ATTR_IP] = { .type = NLA_BINARY, + .len = sizeof(struct in6_addr) }, + [IPSET_ATTR_IP2] = { .type = NLA_BINARY, + .len = sizeof(struct in6_addr) }, + [IPSET_ATTR_PORT] = { .type = NLA_U16 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, +}; + +static int +hash_ipportip6_uadt(struct ip_set *set, struct nlattr *head, int len, + enum ipset_adt adt, u32 *lineno, u32 flags) { - if (req->to - req->from > MAX_RANGE) { - ip_set_printk("range too big, %d elements (max %d)", - req->to - req->from + 1, MAX_RANGE+1); - return -ENOEXEC; + struct chash *h = set->data; + struct nlattr *tb[IPSET_ATTR_ADT_MAX]; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipportip6_elem data = {}; + u32 timeout = h->timeout; + + if (nla_parse(tb, IPSET_ATTR_ADT_MAX, head, len, + hash_ipportip6_adt_policy)) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_IP]) + memcpy(&data.ip, nla_data(tb[IPSET_ATTR_IP]), + sizeof(struct in6_addr)); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_IP2]) + memcpy(&data.ip2, nla_data(tb[IPSET_ATTR_IP2]), + sizeof(struct in6_addr)); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_PORT]) + data.port = ip_set_get_n16(tb[IPSET_ATTR_PORT]); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout(h->timeout)) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); } - map->first_ip = req->from; - map->last_ip = req->to; - return 0; + + return adtfn(set, &data, GFP_KERNEL, timeout); } -HASH_CREATE(ipportiphash, struct ipportip) -HASH_DESTROY(ipportiphash) -HASH_FLUSH(ipportiphash, struct ipportip) +/* Create hash:ip type of sets */ -static inline void -__ipportiphash_list_header(const struct ip_set_ipportiphash *map, - struct ip_set_req_ipportiphash_create *header) +static const struct nla_policy +hash_ipportip_create_policy[IPSET_ATTR_CREATE_MAX+1] __read_mostly = { + [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, + [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, + [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, +}; + +static int +hash_ipportip_create(struct ip_set *set, struct nlattr *head, + int len, u32 flags) { - header->from = map->first_ip; - header->to = map->last_ip; -} + struct nlattr *tb[IPSET_ATTR_CREATE_MAX]; + u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; + struct chash *h; -HASH_LIST_HEADER(ipportiphash) -HASH_LIST_MEMBERS_SIZE(ipportiphash, struct ipportip) -HASH_LIST_MEMBERS_MEMCPY(ipportiphash, struct ipportip, - (elem->ip || elem->ip1)) + if (!(set->family == AF_INET || set->family == AF_INET6)) + return -IPSET_ERR_INVALID_FAMILY; -IP_SET_RTYPE(ipportiphash, IPSET_TYPE_IP | IPSET_TYPE_PORT - | IPSET_TYPE_IP1 | IPSET_DATA_TRIPLE) + if (nla_parse(tb, IPSET_ATTR_CREATE_MAX, head, len, + hash_ipportip_create_policy)) + return -IPSET_ERR_PROTOCOL; -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Jozsef Kadlecsik "); -MODULE_DESCRIPTION("ipportiphash type of IP sets"); -module_param(limit, int, 0600); -MODULE_PARM_DESC(limit, "maximal number of elements stored in the sets"); + if (tb[IPSET_ATTR_HASHSIZE]) { + hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]); + if (hashsize < IPSET_MIMINAL_HASHSIZE) + hashsize = IPSET_MIMINAL_HASHSIZE; + } + + if (tb[IPSET_ATTR_MAXELEM]) + maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]); + + h = kzalloc(sizeof(*h), GFP_KERNEL); + if (!h) + return -ENOMEM; + + h->maxelem = maxelem; + h->htable_bits = htable_bits(hashsize); + h->array_size = CHASH_DEFAULT_ARRAY_SIZE; + h->chain_limit = CHASH_DEFAULT_CHAIN_LIMIT; + get_random_bytes(&h->initval, sizeof(h->initval)); + h->timeout = IPSET_NO_TIMEOUT; + + h->htable = ip_set_alloc(jhash_size(h->htable_bits) * sizeof(struct slist), + GFP_KERNEL, &set->flags); + if (!h->htable) { + kfree(h); + return -ENOMEM; + } + + set->data = h; + + if (tb[IPSET_ATTR_TIMEOUT]) { + h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + + set->variant = set->family == AF_INET + ? &hash_ipportip4_tvariant : &hash_ipportip6_tvariant; + + if (set->family == AF_INET) + hash_ipportip4_gc_init(set); + else + hash_ipportip6_gc_init(set); + } else { + set->variant = set->family == AF_INET + ? &hash_ipportip4_variant : &hash_ipportip6_variant; + } + + pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)", + set->name, jhash_size(h->htable_bits), + h->htable_bits, h->maxelem, set->data, h->htable); + + return 0; +} + +static struct ip_set_type hash_ipportip_type = { + .name = "hash:ip,port,ip", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2, + .dimension = IPSET_DIM_THREE, + .family = AF_UNSPEC, + .revision = 0, + .create = hash_ipportip_create, + .me = THIS_MODULE, +}; + +static int __init +hash_ipportip_init(void) +{ + return ip_set_type_register(&hash_ipportip_type); +} + +static void __exit +hash_ipportip_fini(void) +{ + ip_set_type_unregister(&hash_ipportip_type); +} -REGISTER_MODULE(ipportiphash) +module_init(hash_ipportip_init); +module_exit(hash_ipportip_fini); diff --git a/kernel/ip_set_hash_ipportnet.c b/kernel/ip_set_hash_ipportnet.c index e0bb352..dfe9348 100644 --- a/kernel/ip_set_hash_ipportnet.c +++ b/kernel/ip_set_hash_ipportnet.c @@ -1,298 +1,554 @@ -/* Copyright (C) 2008 Jozsef Kadlecsik +/* Copyright (C) 2003-2010 Jozsef Kadlecsik * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -/* Kernel module implementing an ip+port+net hash set */ +/* Kernel module implementing an IP set type: the hash:ip,port,net type */ +#include +#include #include -#include #include -#include -#include #include -#include #include #include #include #include #include - #include +#include +#include +#include -#include -#include +#include +#include +#include +#include +#include -static int limit = MAX_RANGE; +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_DESCRIPTION("hash:ip,port,net type of IP sets"); +MODULE_ALIAS("ip_set_hash:ip,port,net"); + +/* Type specific function prefix */ +#define TYPE hash_ipportnet + +static bool +hash_ipportnet_same_set(const struct ip_set *a, const struct ip_set *b); + +#define hash_ipportnet4_same_set hash_ipportnet_same_set +#define hash_ipportnet6_same_set hash_ipportnet_same_set + +/* The type variant functions: IPv4 */ + +/* Member elements without timeout */ +struct hash_ipportnet4_elem { + u32 ip; + u32 ip2; + u16 port; + u8 cidr; + u8 match; +}; + +/* Member elements with timeout support */ +struct hash_ipportnet4_telem { + u32 ip; + u32 ip2; + u16 port; + u8 cidr; + u8 match; + unsigned long timeout; +}; + +static inline bool +hash_ipportnet4_data_equal(const struct hash_ipportnet4_elem *ip1, + const struct hash_ipportnet4_elem *ip2) +{ + return ip1->ip == ip2->ip + && ip1->ip2 == ip2->ip2 + && ip1->cidr == ip2->cidr + && ip1->port == ip2->port; +} -#define jhash_ip2(map, i, ipport, ip1) \ - jhash_2words(ipport, ip1, *(map->initval + i)) +static inline bool +hash_ipportnet4_data_isnull(const struct hash_ipportnet4_elem *elem) +{ + return elem->match == 0; +} -static inline __u32 -ipportnethash_id_cidr(struct ip_set *set, - ip_set_ip_t ip, ip_set_ip_t port, - ip_set_ip_t ip1, uint8_t cidr) +static inline void +hash_ipportnet4_data_copy(struct hash_ipportnet4_elem *dst, + const struct hash_ipportnet4_elem *src) { - struct ip_set_ipportnethash *map = set->data; - __u32 id; - u_int16_t i; - struct ipportip *elem; - - ip = pack_ip_port(map, ip, port); - ip1 = pack_ip_cidr(ip1, cidr); - if (!(ip || ip1)) - return UINT_MAX; - - for (i = 0; i < map->probes; i++) { - id = jhash_ip2(map, i, ip, ip1) % map->hashsize; - DP("hash key: %u", id); - elem = HARRAY_ELEM(map->members, struct ipportip *, id); - if (elem->ip == ip && elem->ip1 == ip1) - return id; - /* No shortcut - there can be deleted entries. */ - } - return UINT_MAX; + memcpy(dst, src, sizeof(*dst)); + dst->match = 1; } -static inline __u32 -ipportnethash_id(struct ip_set *set, - ip_set_ip_t ip, ip_set_ip_t port, ip_set_ip_t ip1) +static inline void +hash_ipportnet4_data_swap(struct hash_ipportnet4_elem *dst, + struct hash_ipportnet4_elem *src) { - struct ip_set_ipportnethash *map = set->data; - __u32 id = UINT_MAX; - int i; - - for (i = 0; i < 30 && map->cidr[i]; i++) { - id = ipportnethash_id_cidr(set, ip, port, ip1, map->cidr[i]); - if (id != UINT_MAX) - break; - } - return id; + struct hash_ipportnet4_elem tmp; + + memcpy(&tmp, dst, sizeof(tmp)); + memcpy(dst, src, sizeof(tmp)); + memcpy(src, &tmp, sizeof(tmp)); +} + +static inline void +hash_ipportnet4_data_netmask(struct hash_ipportnet4_elem *elem, u8 cidr) +{ + elem->ip2 &= NETMASK(cidr); + elem->cidr = cidr; } -static inline int -ipportnethash_test_cidr(struct ip_set *set, - ip_set_ip_t ip, ip_set_ip_t port, - ip_set_ip_t ip1, uint8_t cidr) +static inline void +hash_ipportnet4_data_zero_out(struct hash_ipportnet4_elem *elem) { - struct ip_set_ipportnethash *map = set->data; - - if (ip < map->first_ip || ip > map->last_ip) - return -ERANGE; + elem->match = 0; +} + +static inline bool +hash_ipportnet4_data_list(struct sk_buff *skb, + const struct hash_ipportnet4_elem *data) +{ + NLA_PUT_NET32(skb, IPSET_ATTR_IP, data->ip); + NLA_PUT_NET32(skb, IPSET_ATTR_IP2, data->ip2); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); + NLA_PUT_U8(skb, IPSET_ATTR_CIDR2, data->cidr); + return 0; - return (ipportnethash_id_cidr(set, ip, port, ip1, cidr) != UINT_MAX); +nla_put_failure: + return 1; } -static inline int -ipportnethash_test(struct ip_set *set, - ip_set_ip_t ip, ip_set_ip_t port, ip_set_ip_t ip1) +static inline bool +hash_ipportnet4_data_tlist(struct sk_buff *skb, + const struct hash_ipportnet4_elem *data) { - struct ip_set_ipportnethash *map = set->data; - - if (ip < map->first_ip || ip > map->last_ip) - return -ERANGE; + const struct hash_ipportnet4_telem *tdata = + (const struct hash_ipportnet4_telem *)data; + + NLA_PUT_NET32(skb, IPSET_ATTR_IP, tdata->ip); + NLA_PUT_NET32(skb, IPSET_ATTR_IP2, tdata->ip2); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, tdata->port); + NLA_PUT_U8(skb, IPSET_ATTR_CIDR2, data->cidr); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(tdata->timeout))); - return (ipportnethash_id(set, ip, port, ip1) != UINT_MAX); + return 0; + +nla_put_failure: + return 1; } +#define IP_SET_HASH_WITH_NETS + +#define PF 4 +#define HOST_MASK 32 +#include + static int -ipportnethash_utest(struct ip_set *set, const void *data, u_int32_t size) +hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) { - const struct ip_set_req_ipportnethash *req = data; + struct chash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipportnet4_elem data = + { .cidr = h->nets[0].cidr || HOST_MASK }; + + if (data.cidr == 0) + return -EINVAL; + if (adt == IPSET_TEST) + data.cidr = HOST_MASK; - if (req->cidr <= 0 || req->cidr > 32) + ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip); + if (!get_port(AF_INET, skb, flags & IPSET_DIM_TWO_SRC, &data.port)) return -EINVAL; - return (req->cidr == 32 - ? ipportnethash_test(set, req->ip, req->port, req->ip1) - : ipportnethash_test_cidr(set, req->ip, req->port, - req->ip1, req->cidr)); + ip4addrptr(skb, flags & IPSET_DIM_THREE_SRC, &data.ip2); + data.ip2 &= NETMASK(data.cidr); + + return adtfn(set, &data, GFP_ATOMIC, h->timeout); } -#define KADT_CONDITION \ - ip_set_ip_t port, ip1; \ - \ - if (flags[2] == 0) \ - return 0; \ - \ - port = get_port(skb, flags++); \ - ip1 = ipaddr(skb, flags++); \ - \ - if (port == INVALID_PORT) \ - return 0; - -KADT(ipportnethash, test, ipaddr, port, ip1) - -static inline int -__ipportnet_add(struct ip_set_ipportnethash *map, - ip_set_ip_t ip, ip_set_ip_t ip1) +static const struct nla_policy +hash_ipportnet4_adt_policy[IPSET_ATTR_ADT_MAX + 1] __read_mostly = { + [IPSET_ATTR_IP] = { .type = NLA_U32 }, + [IPSET_ATTR_IP2] = { .type = NLA_U32 }, + [IPSET_ATTR_PORT] = { .type = NLA_U16 }, + [IPSET_ATTR_CIDR2] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, +}; + +static int +hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *head, int len, + enum ipset_adt adt, u32 *lineno, u32 flags) { - __u32 probe; - u_int16_t i; - struct ipportip *elem, *slot = NULL; - - for (i = 0; i < map->probes; i++) { - probe = jhash_ip2(map, i, ip, ip1) % map->hashsize; - elem = HARRAY_ELEM(map->members, struct ipportip *, probe); - if (elem->ip == ip && elem->ip1 == ip1) - return -EEXIST; - if (!(slot || elem->ip || elem->ip1)) - slot = elem; - /* There can be deleted entries, must check all slots */ + struct chash *h = set->data; + struct nlattr *tb[IPSET_ATTR_ADT_MAX]; + bool eexist = flags & IPSET_FLAG_EXIST; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipportnet4_elem data = { .cidr = HOST_MASK }; + u32 timeout = h->timeout; + int ret; + + if (nla_parse(tb, IPSET_ATTR_ADT_MAX, head, len, + hash_ipportnet4_adt_policy)) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_IP]) + data.ip = ip_set_get_n32(tb[IPSET_ATTR_IP]); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_IP2]) + data.ip2 = ip_set_get_n32(tb[IPSET_ATTR_IP2]); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_CIDR2]) + data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR2]); + + if (!data.cidr) + return -IPSET_ERR_INVALID_CIDR; + + data.ip2 &= NETMASK(data.cidr); + + if (tb[IPSET_ATTR_PORT]) + data.port = ip_set_get_n16(tb[IPSET_ATTR_PORT]); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout(h->timeout)) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); } - if (slot) { - slot->ip = ip; - slot->ip1 = ip1; - map->elements++; - return 0; + + ret = adtfn(set, &data, GFP_KERNEL, timeout); + + if (ret && !(ret == -IPSET_ERR_EXIST && eexist)) { + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); } - /* Trigger rehashing */ - return -EAGAIN; + return ret; } -static inline int -__ipportnethash_add(struct ip_set_ipportnethash *map, - struct ipportip *elem) +static bool +hash_ipportnet_same_set(const struct ip_set *a, const struct ip_set *b) { - return __ipportnet_add(map, elem->ip, elem->ip1); + struct chash *x = a->data; + struct chash *y = b->data; + + return x->maxelem == y->maxelem + && x->timeout == y->timeout + && x->htable_bits == y->htable_bits /* resizing ? */ + && x->array_size == y->array_size + && x->chain_limit == y->chain_limit; } -static inline int -ipportnethash_add(struct ip_set *set, - ip_set_ip_t ip, ip_set_ip_t port, - ip_set_ip_t ip1, uint8_t cidr) +/* The type variant functions: IPv6 */ + +struct hash_ipportnet6_elem { + union nf_inet_addr ip; + union nf_inet_addr ip2; + u16 port; + u8 cidr; + u8 match; +}; + +struct hash_ipportnet6_telem { + union nf_inet_addr ip; + union nf_inet_addr ip2; + u16 port; + u8 cidr; + u8 match; + unsigned long timeout; +}; + +static inline bool +hash_ipportnet6_data_equal(const struct hash_ipportnet6_elem *ip1, + const struct hash_ipportnet6_elem *ip2) { - struct ip_set_ipportnethash *map = set->data; - struct ipportip; - int ret; - - if (map->elements > limit) - return -ERANGE; - if (ip < map->first_ip || ip > map->last_ip) - return -ERANGE; - if (cidr <= 0 || cidr >= 32) - return -EINVAL; - if (map->nets[cidr-1] == UINT16_MAX) - return -ERANGE; + return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 + && ipv6_addr_cmp(&ip1->ip2.in6, &ip2->ip2.in6) == 0 + && ip1->cidr == ip2->cidr + && ip1->port == ip2->port; +} - ip = pack_ip_port(map, ip, port); - ip1 = pack_ip_cidr(ip1, cidr); - if (!(ip || ip1)) - return -ERANGE; - - ret =__ipportnet_add(map, ip, ip1); - if (ret == 0) { - if (!map->nets[cidr-1]++) - add_cidr_size(map->cidr, cidr); - } - return ret; +static inline bool +hash_ipportnet6_data_isnull(const struct hash_ipportnet6_elem *elem) +{ + return elem->match == 0; } -#undef KADT_CONDITION -#define KADT_CONDITION \ - struct ip_set_ipportnethash *map = set->data; \ - uint8_t cidr = map->cidr[0] ? map->cidr[0] : 31; \ - ip_set_ip_t port, ip1; \ - \ - if (flags[2] == 0) \ - return 0; \ - \ - port = get_port(skb, flags++); \ - ip1 = ipaddr(skb, flags++); \ - \ - if (port == INVALID_PORT) \ - return 0; - -UADT(ipportnethash, add, req->port, req->ip1, req->cidr) -KADT(ipportnethash, add, ipaddr, port, ip1, cidr) +static inline void +hash_ipportnet6_data_copy(struct hash_ipportnet6_elem *dst, + const struct hash_ipportnet6_elem *src) +{ + memcpy(dst, src, sizeof(*dst)); + dst->match = 1; +} static inline void -__ipportnethash_retry(struct ip_set_ipportnethash *tmp, - struct ip_set_ipportnethash *map) +hash_ipportnet6_data_swap(struct hash_ipportnet6_elem *dst, + struct hash_ipportnet6_elem *src) { - tmp->first_ip = map->first_ip; - tmp->last_ip = map->last_ip; - memcpy(tmp->cidr, map->cidr, sizeof(tmp->cidr)); - memcpy(tmp->nets, map->nets, sizeof(tmp->nets)); + struct hash_ipportnet6_elem tmp; + + memcpy(&tmp, dst, sizeof(tmp)); + memcpy(dst, src, sizeof(tmp)); + memcpy(src, &tmp, sizeof(tmp)); } -HASH_RETRY2(ipportnethash, struct ipportip) +static inline void +hash_ipportnet6_data_zero_out(struct hash_ipportnet6_elem *elem) +{ + elem->match = 0; +} -static inline int -ipportnethash_del(struct ip_set *set, - ip_set_ip_t ip, ip_set_ip_t port, - ip_set_ip_t ip1, uint8_t cidr) +static inline void +ip6_netmask(union nf_inet_addr *ip, u8 prefix) { - struct ip_set_ipportnethash *map = set->data; - ip_set_ip_t id; - struct ipportip *elem; + ip->ip6[0] &= NETMASK6(prefix)[0]; + ip->ip6[1] &= NETMASK6(prefix)[1]; + ip->ip6[2] &= NETMASK6(prefix)[2]; + ip->ip6[3] &= NETMASK6(prefix)[3]; +} - if (ip < map->first_ip || ip > map->last_ip) - return -ERANGE; - if (!ip) - return -ERANGE; - if (cidr <= 0 || cidr >= 32) - return -EINVAL; +static inline void +hash_ipportnet6_data_netmask(struct hash_ipportnet6_elem *elem, u8 cidr) +{ + ip6_netmask(&elem->ip2, cidr); + elem->cidr = cidr; +} - id = ipportnethash_id_cidr(set, ip, port, ip1, cidr); +static inline bool +hash_ipportnet6_data_list(struct sk_buff *skb, + const struct hash_ipportnet6_elem *data) +{ + NLA_PUT(skb, IPSET_ATTR_IP, sizeof(struct in6_addr), &data->ip); + NLA_PUT(skb, IPSET_ATTR_IP2, sizeof(struct in6_addr), &data->ip2); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); + NLA_PUT_U8(skb, IPSET_ATTR_CIDR2, data->cidr); + return 0; - if (id == UINT_MAX) - return -EEXIST; - - elem = HARRAY_ELEM(map->members, struct ipportip *, id); - elem->ip = elem->ip1 = 0; - map->elements--; - if (!map->nets[cidr-1]--) - del_cidr_size(map->cidr, cidr); +nla_put_failure: + return 1; +} +static inline bool +hash_ipportnet6_data_tlist(struct sk_buff *skb, + const struct hash_ipportnet6_elem *data) +{ + const struct hash_ipportnet6_telem *e = + (const struct hash_ipportnet6_telem *)data; + + NLA_PUT(skb, IPSET_ATTR_IP, sizeof(struct in6_addr), &e->ip); + NLA_PUT(skb, IPSET_ATTR_IP2, sizeof(struct in6_addr), &data->ip2); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); + NLA_PUT_U8(skb, IPSET_ATTR_CIDR2, data->cidr); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(e->timeout))); return 0; + +nla_put_failure: + return 1; } -UADT(ipportnethash, del, req->port, req->ip1, req->cidr) -KADT(ipportnethash, del, ipaddr, port, ip1, cidr) +#undef PF +#undef HOST_MASK + +#define PF 6 +#define HOST_MASK 128 +#include -static inline int -__ipportnethash_create(const struct ip_set_req_ipportnethash_create *req, - struct ip_set_ipportnethash *map) +static int +hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) { - if (req->to - req->from > MAX_RANGE) { - ip_set_printk("range too big, %d elements (max %d)", - req->to - req->from + 1, MAX_RANGE+1); - return -ENOEXEC; - } - map->first_ip = req->from; - map->last_ip = req->to; - memset(map->cidr, 0, sizeof(map->cidr)); - memset(map->nets, 0, sizeof(map->nets)); - return 0; + struct chash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipportnet6_elem data = + { .cidr = h->nets[0].cidr || HOST_MASK }; + + if (data.cidr == 0) + return -EINVAL; + if (adt == IPSET_TEST) + data.cidr = HOST_MASK; + + ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip.in6); + if (!get_port(AF_INET, skb, flags & IPSET_DIM_TWO_SRC, &data.port)) + return -EINVAL; + ip6addrptr(skb, flags & IPSET_DIM_THREE_SRC, &data.ip2.in6); + ip6_netmask(&data.ip2, data.cidr); + + return adtfn(set, &data, GFP_ATOMIC, h->timeout); } -HASH_CREATE(ipportnethash, struct ipportip) -HASH_DESTROY(ipportnethash) -HASH_FLUSH_CIDR(ipportnethash, struct ipportip); +static const struct nla_policy +hash_ipportnet6_adt_policy[IPSET_ATTR_ADT_MAX + 1] __read_mostly = { + [IPSET_ATTR_IP] = { .type = NLA_BINARY, + .len = sizeof(struct in6_addr) }, + [IPSET_ATTR_IP2] = { .type = NLA_BINARY, + .len = sizeof(struct in6_addr) }, + [IPSET_ATTR_PORT] = { .type = NLA_U16 }, + [IPSET_ATTR_CIDR2] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, +}; -static inline void -__ipportnethash_list_header(const struct ip_set_ipportnethash *map, - struct ip_set_req_ipportnethash_create *header) +static int +hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *head, int len, + enum ipset_adt adt, u32 *lineno, u32 flags) { - header->from = map->first_ip; - header->to = map->last_ip; + struct chash *h = set->data; + struct nlattr *tb[IPSET_ATTR_ADT_MAX]; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipportnet6_elem data = { .cidr = HOST_MASK }; + u32 timeout = h->timeout; + + if (nla_parse(tb, IPSET_ATTR_ADT_MAX, head, len, + hash_ipportnet6_adt_policy)) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_IP]) + memcpy(&data.ip, nla_data(tb[IPSET_ATTR_IP]), + sizeof(struct in6_addr)); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_IP2]) + memcpy(&data.ip2, nla_data(tb[IPSET_ATTR_IP2]), + sizeof(struct in6_addr)); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_CIDR2]) + data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR2]); + + if (!data.cidr) + return -IPSET_ERR_INVALID_CIDR; + + ip6_netmask(&data.ip2, data.cidr); + + if (tb[IPSET_ATTR_PORT]) + data.port = ip_set_get_n16(tb[IPSET_ATTR_PORT]); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout(h->timeout)) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + } + + return adtfn(set, &data, GFP_KERNEL, timeout); } -HASH_LIST_HEADER(ipportnethash) +/* Create hash:ip type of sets */ + +static const struct nla_policy +hash_ipportnet_create_policy[IPSET_ATTR_CREATE_MAX+1] __read_mostly = { + [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, + [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, + [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, +}; + +static int +hash_ipportnet_create(struct ip_set *set, struct nlattr *head, + int len, u32 flags) +{ + struct nlattr *tb[IPSET_ATTR_CREATE_MAX]; + u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; + struct chash *h; -HASH_LIST_MEMBERS_SIZE(ipportnethash, struct ipportip) -HASH_LIST_MEMBERS_MEMCPY(ipportnethash, struct ipportip, - (elem->ip || elem->ip1)) + if (!(set->family == AF_INET || set->family == AF_INET6)) + return -IPSET_ERR_INVALID_FAMILY; -IP_SET_RTYPE(ipportnethash, IPSET_TYPE_IP | IPSET_TYPE_PORT - | IPSET_TYPE_IP1 | IPSET_DATA_TRIPLE) + if (nla_parse(tb, IPSET_ATTR_CREATE_MAX, head, len, + hash_ipportnet_create_policy)) + return -IPSET_ERR_PROTOCOL; -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Jozsef Kadlecsik "); -MODULE_DESCRIPTION("ipportnethash type of IP sets"); -module_param(limit, int, 0600); -MODULE_PARM_DESC(limit, "maximal number of elements stored in the sets"); + if (tb[IPSET_ATTR_HASHSIZE]) { + hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]); + if (hashsize < IPSET_MIMINAL_HASHSIZE) + hashsize = IPSET_MIMINAL_HASHSIZE; + } + + if (tb[IPSET_ATTR_MAXELEM]) + maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]); + + h = kzalloc(sizeof(*h) + + sizeof(struct chash_nets) + * (set->family == AF_INET ? 31 : 127), GFP_KERNEL); + if (!h) + return -ENOMEM; + + h->maxelem = maxelem; + h->htable_bits = htable_bits(hashsize); + h->array_size = CHASH_DEFAULT_ARRAY_SIZE; + h->chain_limit = CHASH_DEFAULT_CHAIN_LIMIT; + get_random_bytes(&h->initval, sizeof(h->initval)); + h->timeout = IPSET_NO_TIMEOUT; + + h->htable = ip_set_alloc(jhash_size(h->htable_bits) * sizeof(struct slist), + GFP_KERNEL, &set->flags); + if (!h->htable) { + kfree(h); + return -ENOMEM; + } + + set->data = h; + + if (tb[IPSET_ATTR_TIMEOUT]) { + h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + + set->variant = set->family == AF_INET + ? &hash_ipportnet4_tvariant : &hash_ipportnet6_tvariant; + + if (set->family == AF_INET) + hash_ipportnet4_gc_init(set); + else + hash_ipportnet6_gc_init(set); + } else { + set->variant = set->family == AF_INET + ? &hash_ipportnet4_variant : &hash_ipportnet6_variant; + } + + pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)", + set->name, jhash_size(h->htable_bits), + h->htable_bits, h->maxelem, set->data, h->htable); + + return 0; +} + +static struct ip_set_type hash_ipportnet_type = { + .name = "hash:ip,port,net", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2, + .dimension = IPSET_DIM_THREE, + .family = AF_UNSPEC, + .revision = 0, + .create = hash_ipportnet_create, + .me = THIS_MODULE, +}; + +static int __init +hash_ipportnet_init(void) +{ + return ip_set_type_register(&hash_ipportnet_type); +} + +static void __exit +hash_ipportnet_fini(void) +{ + ip_set_type_unregister(&hash_ipportnet_type); +} -REGISTER_MODULE(ipportnethash) +module_init(hash_ipportnet_init); +module_exit(hash_ipportnet_fini); diff --git a/kernel/ip_set_hash_net.c b/kernel/ip_set_hash_net.c index e3b09e0..a8611c2 100644 --- a/kernel/ip_set_hash_net.c +++ b/kernel/ip_set_hash_net.c @@ -1,218 +1,488 @@ -/* Copyright (C) 2003-2008 Jozsef Kadlecsik +/* Copyright (C) 2003-2010 Jozsef Kadlecsik * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -/* Kernel module implementing a cidr nethash set */ +/* Kernel module implementing an IP set type: the hash:net type */ +#include +#include #include -#include #include #include -#include #include #include #include #include #include - #include +#include +#include +#include + +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_DESCRIPTION("hash:net type of IP sets"); +MODULE_ALIAS("ip_set_hash:net"); + +/* Type specific function prefix */ +#define TYPE hash_net + +static bool +hash_net_same_set(const struct ip_set *a, const struct ip_set *b); + +#define hash_net4_same_set hash_net_same_set +#define hash_net6_same_set hash_net_same_set -#include +/* The type variant functions: IPv4 */ -static int limit = MAX_RANGE; +/* Member elements without timeout */ +struct hash_net4_elem { + u32 ip; + u8 cidr; /* Not hashed, zero for null value */ +}; -static inline __u32 -nethash_id_cidr(const struct ip_set_nethash *map, - ip_set_ip_t ip, - uint8_t cidr) +/* Member elements with timeout support */ +struct hash_net4_telem { + u32 ip; + u8 cidr; /* Not hashed, zero for null value */ + unsigned long timeout; +}; + +static inline bool +hash_net4_data_equal(const struct hash_net4_elem *ip1, + const struct hash_net4_elem *ip2) { - __u32 id; - u_int16_t i; - ip_set_ip_t *elem; + /* We don't have to check the cidr equality + * because overlapping nets cannot be added to the set + */ + return ip1->ip == ip2->ip; +} - ip = pack_ip_cidr(ip, cidr); - if (!ip) - return MAX_RANGE; - - for (i = 0; i < map->probes; i++) { - id = jhash_ip(map, i, ip) % map->hashsize; - DP("hash key: %u", id); - elem = HARRAY_ELEM(map->members, ip_set_ip_t *, id); - if (*elem == ip) - return id; - /* No shortcut - there can be deleted entries. */ - } - return UINT_MAX; +static inline bool +hash_net4_data_isnull(const struct hash_net4_elem *elem) +{ + return elem->cidr == 0; } -static inline __u32 -nethash_id(struct ip_set *set, ip_set_ip_t ip) +static inline void +hash_net4_data_copy(struct hash_net4_elem *dst, + const struct hash_net4_elem *src) { - const struct ip_set_nethash *map = set->data; - __u32 id = UINT_MAX; - int i; + dst->ip = src->ip; + dst->cidr = src->cidr; +} - for (i = 0; i < 30 && map->cidr[i]; i++) { - id = nethash_id_cidr(map, ip, map->cidr[i]); - if (id != UINT_MAX) - break; - } - return id; +static inline void +hash_net4_data_swap(struct hash_net4_elem *dst, + struct hash_net4_elem *src) +{ + swap(dst->ip, src->ip); + swap(dst->cidr, src->cidr); } -static inline int -nethash_test_cidr(struct ip_set *set, ip_set_ip_t ip, uint8_t cidr) +static inline void +hash_net4_data_netmask(struct hash_net4_elem *elem, u8 cidr) { - const struct ip_set_nethash *map = set->data; + elem->ip &= NETMASK(cidr); + elem->cidr = cidr; +} - return (nethash_id_cidr(map, ip, cidr) != UINT_MAX); +/* Zero CIDR values cannot be stored */ +static inline void +hash_net4_data_zero_out(struct hash_net4_elem *elem) +{ + elem->cidr = 0; } -static inline int -nethash_test(struct ip_set *set, ip_set_ip_t ip) +static inline bool +hash_net4_data_list(struct sk_buff *skb, const struct hash_net4_elem *data) { - return (nethash_id(set, ip) != UINT_MAX); + NLA_PUT_NET32(skb, IPSET_ATTR_IP, data->ip); + NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr); + return 0; + +nla_put_failure: + return 1; } -static int -nethash_utest(struct ip_set *set, const void *data, u_int32_t size) +static inline bool +hash_net4_data_tlist(struct sk_buff *skb, const struct hash_net4_elem *data) { - const struct ip_set_req_nethash *req = data; + const struct hash_net4_telem *tdata = + (const struct hash_net4_telem *)data; - if (req->cidr <= 0 || req->cidr > 32) - return -EINVAL; - return (req->cidr == 32 ? nethash_test(set, req->ip) - : nethash_test_cidr(set, req->ip, req->cidr)); + NLA_PUT_NET32(skb, IPSET_ATTR_IP, tdata->ip); + NLA_PUT_U8(skb, IPSET_ATTR_CIDR, tdata->cidr); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(tdata->timeout))); + + return 0; + +nla_put_failure: + return 1; } -#define KADT_CONDITION +#define IP_SET_HASH_WITH_NETS -KADT(nethash, test, ipaddr) +#define PF 4 +#define HOST_MASK 32 +#include -static inline int -__nethash_add(struct ip_set_nethash *map, ip_set_ip_t *ip) +static int +hash_net4_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) { - __u32 probe; - u_int16_t i; - ip_set_ip_t *elem, *slot = NULL; + struct chash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_net4_elem data = { .cidr = h->nets[0].cidr || HOST_MASK }; - for (i = 0; i < map->probes; i++) { - probe = jhash_ip(map, i, *ip) % map->hashsize; - elem = HARRAY_ELEM(map->members, ip_set_ip_t *, probe); - if (*elem == *ip) - return -EEXIST; - if (!(slot || *elem)) - slot = elem; - /* There can be deleted entries, must check all slots */ - } - if (slot) { - *slot = *ip; - map->elements++; - return 0; - } - /* Trigger rehashing */ - return -EAGAIN; + if (data.cidr == 0) + return -EINVAL; + if (adt == IPSET_TEST) + data.cidr = HOST_MASK; + + ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip); + data.ip &= NETMASK(data.cidr); + + return adtfn(set, &data, GFP_ATOMIC, h->timeout); } -static inline int -nethash_add(struct ip_set *set, ip_set_ip_t ip, uint8_t cidr) +static const struct nla_policy +hash_net4_adt_policy[IPSET_ATTR_ADT_MAX + 1] __read_mostly = { + [IPSET_ATTR_IP] = { .type = NLA_U32 }, + [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, +}; + +static int +hash_net4_uadt(struct ip_set *set, struct nlattr *head, int len, + enum ipset_adt adt, u32 *lineno, u32 flags) { - struct ip_set_nethash *map = set->data; + struct chash *h = set->data; + struct nlattr *tb[IPSET_ATTR_ADT_MAX]; + bool eexist = flags & IPSET_FLAG_EXIST; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_net4_elem data = { .cidr = HOST_MASK }; + u32 timeout = h->timeout; int ret; - - if (map->elements >= limit || map->nets[cidr-1] == UINT16_MAX) - return -ERANGE; - if (cidr <= 0 || cidr >= 32) - return -EINVAL; - ip = pack_ip_cidr(ip, cidr); - if (!ip) - return -ERANGE; - - ret = __nethash_add(map, &ip); - if (ret == 0) { - if (!map->nets[cidr-1]++) - add_cidr_size(map->cidr, cidr); + if (nla_parse(tb, IPSET_ATTR_ADT_MAX, head, len, + hash_net4_adt_policy)) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_IP]) + data.ip = ip_set_get_n32(tb[IPSET_ATTR_IP]); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_CIDR]) + data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + + if (!data.cidr) + return -IPSET_ERR_INVALID_CIDR; + + data.ip &= NETMASK(data.cidr); + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout(h->timeout)) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + } + + ret = adtfn(set, &data, GFP_KERNEL, timeout); + + if (ret && !(ret == -IPSET_ERR_EXIST && eexist)) { + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); } - return ret; } -#undef KADT_CONDITION -#define KADT_CONDITION \ - struct ip_set_nethash *map = set->data; \ - uint8_t cidr = map->cidr[0] ? map->cidr[0] : 31; +static bool +hash_net_same_set(const struct ip_set *a, const struct ip_set *b) +{ + struct chash *x = a->data; + struct chash *y = b->data; + + return x->maxelem == y->maxelem + && x->timeout == y->timeout + && x->htable_bits == y->htable_bits /* resizing ? */ + && x->array_size == y->array_size + && x->chain_limit == y->chain_limit; +} + +/* The type variant functions: IPv6 */ + +struct hash_net6_elem { + union nf_inet_addr ip; + u8 cidr; /* Not hashed */ +}; -UADT(nethash, add, req->cidr) -KADT(nethash, add, ipaddr, cidr) +struct hash_net6_telem { + union nf_inet_addr ip; + u8 cidr; /* Not hashed */ + unsigned long timeout; +}; + +static inline bool +hash_net6_data_equal(const struct hash_net6_elem *ip1, + const struct hash_net6_elem *ip2) +{ + return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0; +} + +static inline bool +hash_net6_data_isnull(const struct hash_net6_elem *elem) +{ + return elem->cidr == 0; +} static inline void -__nethash_retry(struct ip_set_nethash *tmp, struct ip_set_nethash *map) +hash_net6_data_copy(struct hash_net6_elem *dst, + const struct hash_net6_elem *src) { - memcpy(tmp->cidr, map->cidr, sizeof(tmp->cidr)); - memcpy(tmp->nets, map->nets, sizeof(tmp->nets)); + ipv6_addr_copy(&dst->ip.in6, &src->ip.in6); + dst->cidr = src->cidr; } -HASH_RETRY(nethash, ip_set_ip_t) +static inline void +hash_net6_data_swap(struct hash_net6_elem *dst, struct hash_net6_elem *src) +{ + struct hash_net6_elem tmp; + + memcpy(&tmp, dst, sizeof(tmp)); + memcpy(dst, src, sizeof(tmp)); + memcpy(src, &tmp, sizeof(tmp)); +} -static inline int -nethash_del(struct ip_set *set, ip_set_ip_t ip, uint8_t cidr) +static inline void +hash_net6_data_zero_out(struct hash_net6_elem *elem) { - struct ip_set_nethash *map = set->data; - ip_set_ip_t id, *elem; + elem->cidr = 0; +} - if (cidr <= 0 || cidr >= 32) - return -EINVAL; - - id = nethash_id_cidr(map, ip, cidr); - if (id == UINT_MAX) - return -EEXIST; - - elem = HARRAY_ELEM(map->members, ip_set_ip_t *, id); - *elem = 0; - map->elements--; - if (!map->nets[cidr-1]--) - del_cidr_size(map->cidr, cidr); - return 0; +static inline void +ip6_netmask(union nf_inet_addr *ip, u8 prefix) +{ + ip->ip6[0] &= NETMASK6(prefix)[0]; + ip->ip6[1] &= NETMASK6(prefix)[1]; + ip->ip6[2] &= NETMASK6(prefix)[2]; + ip->ip6[3] &= NETMASK6(prefix)[3]; } -UADT(nethash, del, req->cidr) -KADT(nethash, del, ipaddr, cidr) +static inline void +hash_net6_data_netmask(struct hash_net6_elem *elem, u8 cidr) +{ + ip6_netmask(&elem->ip, cidr); + elem->cidr = cidr; +} -static inline int -__nethash_create(const struct ip_set_req_nethash_create *req, - struct ip_set_nethash *map) +static inline bool +hash_net6_data_list(struct sk_buff *skb, const struct hash_net6_elem *data) { - memset(map->cidr, 0, sizeof(map->cidr)); - memset(map->nets, 0, sizeof(map->nets)); + NLA_PUT(skb, IPSET_ATTR_IP, sizeof(struct in6_addr), &data->ip); + NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr); + return 0; + +nla_put_failure: + return 1; +} + +static inline bool +hash_net6_data_tlist(struct sk_buff *skb, const struct hash_net6_elem *data) +{ + const struct hash_net6_telem *e = + (const struct hash_net6_telem *)data; + NLA_PUT(skb, IPSET_ATTR_IP, sizeof(struct in6_addr), &e->ip); + NLA_PUT_U8(skb, IPSET_ATTR_CIDR, e->cidr); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(e->timeout))); return 0; + +nla_put_failure: + return 1; } -HASH_CREATE(nethash, ip_set_ip_t) -HASH_DESTROY(nethash) +#undef PF +#undef HOST_MASK -HASH_FLUSH_CIDR(nethash, ip_set_ip_t) +#define PF 6 +#define HOST_MASK 128 +#include -static inline void -__nethash_list_header(const struct ip_set_nethash *map, - struct ip_set_req_nethash_create *header) -{ +static int +hash_net6_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) +{ + struct chash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_net6_elem data = { .cidr = h->nets[0].cidr || HOST_MASK }; + + if (data.cidr == 0) + return -EINVAL; + if (adt == IPSET_TEST) + data.cidr = HOST_MASK; + + ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip.in6); + ip6_netmask(&data.ip, data.cidr); + + return adtfn(set, &data, GFP_ATOMIC, h->timeout); +} + +static const struct nla_policy +hash_net6_adt_policy[IPSET_ATTR_ADT_MAX + 1] __read_mostly = { + [IPSET_ATTR_IP] = { .type = NLA_BINARY, + .len = sizeof(struct in6_addr) }, + [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, +}; + +static int +hash_net6_uadt(struct ip_set *set, struct nlattr *head, int len, + enum ipset_adt adt, u32 *lineno, u32 flags) +{ + struct chash *h = set->data; + struct nlattr *tb[IPSET_ATTR_ADT_MAX]; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_net6_elem data = { .cidr = HOST_MASK }; + u32 timeout = h->timeout; + + if (nla_parse(tb, IPSET_ATTR_ADT_MAX, head, len, + hash_net6_adt_policy)) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_IP]) + memcpy(&data.ip, nla_data(tb[IPSET_ATTR_IP]), + sizeof(struct in6_addr)); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_CIDR]) + data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + + if (!data.cidr) + return -IPSET_ERR_INVALID_CIDR; + + ip6_netmask(&data.ip, data.cidr); + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout(h->timeout)) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + } + + return adtfn(set, &data, GFP_KERNEL, timeout); } -HASH_LIST_HEADER(nethash) -HASH_LIST_MEMBERS_SIZE(nethash, ip_set_ip_t) -HASH_LIST_MEMBERS(nethash, ip_set_ip_t) +/* Create hash:ip type of sets */ -IP_SET_RTYPE(nethash, IPSET_TYPE_IP | IPSET_DATA_SINGLE) +static const struct nla_policy +hash_net_create_policy[IPSET_ATTR_CREATE_MAX+1] __read_mostly = { + [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, + [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, + [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, +}; -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Jozsef Kadlecsik "); -MODULE_DESCRIPTION("nethash type of IP sets"); -module_param(limit, int, 0600); -MODULE_PARM_DESC(limit, "maximal number of elements stored in the sets"); +static int +hash_net_create(struct ip_set *set, struct nlattr *head, int len, u32 flags) +{ + struct nlattr *tb[IPSET_ATTR_CREATE_MAX]; + u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; + struct chash *h; + + if (!(set->family == AF_INET || set->family == AF_INET6)) + return -IPSET_ERR_INVALID_FAMILY; + + if (nla_parse(tb, IPSET_ATTR_CREATE_MAX, head, len, + hash_net_create_policy)) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_HASHSIZE]) { + hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]); + if (hashsize < IPSET_MIMINAL_HASHSIZE) + hashsize = IPSET_MIMINAL_HASHSIZE; + } + + if (tb[IPSET_ATTR_MAXELEM]) + maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]); + + h = kzalloc(sizeof(*h) + + sizeof(struct chash_nets) + * (set->family == AF_INET ? 31 : 127), GFP_KERNEL); + if (!h) + return -ENOMEM; + + h->maxelem = maxelem; + h->htable_bits = htable_bits(hashsize); + h->array_size = CHASH_DEFAULT_ARRAY_SIZE; + h->chain_limit = CHASH_DEFAULT_CHAIN_LIMIT; + get_random_bytes(&h->initval, sizeof(h->initval)); + h->timeout = IPSET_NO_TIMEOUT; + + h->htable = ip_set_alloc(jhash_size(h->htable_bits) * sizeof(struct slist), + GFP_KERNEL, &set->flags); + if (!h->htable) { + kfree(h); + return -ENOMEM; + } + + set->data = h; + + if (tb[IPSET_ATTR_TIMEOUT]) { + h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + + set->variant = set->family == AF_INET + ? &hash_net4_tvariant : &hash_net6_tvariant; + + if (set->family == AF_INET) + hash_net4_gc_init(set); + else + hash_net6_gc_init(set); + } else { + set->variant = set->family == AF_INET + ? &hash_net4_variant : &hash_net6_variant; + } + + pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)", + set->name, jhash_size(h->htable_bits), + h->htable_bits, h->maxelem, set->data, h->htable); + + return 0; +} + +static struct ip_set_type hash_net_type = { + .name = "hash:net", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_IP, + .dimension = IPSET_DIM_ONE, + .family = AF_UNSPEC, + .revision = 0, + .create = hash_net_create, + .me = THIS_MODULE, +}; + +static int __init +hash_net_init(void) +{ + return ip_set_type_register(&hash_net_type); +} + +static void __exit +hash_net_fini(void) +{ + ip_set_type_unregister(&hash_net_type); +} -REGISTER_MODULE(nethash) +module_init(hash_net_init); +module_exit(hash_net_fini); diff --git a/kernel/ip_set_iptreemap.c b/kernel/ip_set_iptreemap.c deleted file mode 100644 index 02f657e..0000000 --- a/kernel/ip_set_iptreemap.c +++ /dev/null @@ -1,700 +0,0 @@ -/* Copyright (C) 2007 Sven Wegener - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - */ - -/* This modules implements the iptreemap ipset type. It uses bitmaps to - * represent every single IPv4 address as a bit. The bitmaps are managed in a - * tree structure, where the first three octets of an address are used as an - * index to find the bitmap and the last octet is used as the bit number. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#define IPTREEMAP_DEFAULT_GC_TIME (5 * 60) -#define IPTREEMAP_DESTROY_SLEEP (100) - -static __KMEM_CACHE_T__ *cachep_b; -static __KMEM_CACHE_T__ *cachep_c; -static __KMEM_CACHE_T__ *cachep_d; - -static struct ip_set_iptreemap_d *fullbitmap_d; -static struct ip_set_iptreemap_c *fullbitmap_c; -static struct ip_set_iptreemap_b *fullbitmap_b; - -#if defined(__LITTLE_ENDIAN) -#define ABCD(a, b, c, d, addr) \ - do { \ - a = ((unsigned char *)addr)[3]; \ - b = ((unsigned char *)addr)[2]; \ - c = ((unsigned char *)addr)[1]; \ - d = ((unsigned char *)addr)[0]; \ - } while (0) -#elif defined(__BIG_ENDIAN) -#define ABCD(a,b,c,d,addrp) do { \ - a = ((unsigned char *)addrp)[0]; \ - b = ((unsigned char *)addrp)[1]; \ - c = ((unsigned char *)addrp)[2]; \ - d = ((unsigned char *)addrp)[3]; \ -} while (0) -#else -#error "Please fix asm/byteorder.h" -#endif /* __LITTLE_ENDIAN */ - -#define TESTIP_WALK(map, elem, branch, full) \ - do { \ - branch = (map)->tree[elem]; \ - if (!branch) \ - return 0; \ - else if (branch == full) \ - return 1; \ - } while (0) - -#define ADDIP_WALK(map, elem, branch, type, cachep, full) \ - do { \ - branch = (map)->tree[elem]; \ - if (!branch) { \ - branch = (type *) kmem_cache_alloc(cachep, GFP_ATOMIC); \ - if (!branch) \ - return -ENOMEM; \ - memset(branch, 0, sizeof(*branch)); \ - (map)->tree[elem] = branch; \ - } else if (branch == full) { \ - return -EEXIST; \ - } \ - } while (0) - -#define ADDIP_RANGE_LOOP(map, a, a1, a2, hint, branch, full, cachep, free) \ - for (a = a1; a <= a2; a++) { \ - branch = (map)->tree[a]; \ - if (branch != full) { \ - if ((a > a1 && a < a2) || (hint)) { \ - if (branch) \ - free(branch); \ - (map)->tree[a] = full; \ - continue; \ - } else if (!branch) { \ - branch = kmem_cache_alloc(cachep, GFP_ATOMIC); \ - if (!branch) \ - return -ENOMEM; \ - memset(branch, 0, sizeof(*branch)); \ - (map)->tree[a] = branch; \ - } - -#define ADDIP_RANGE_LOOP_END() \ - } \ - } - -#define DELIP_WALK(map, elem, branch, cachep, full, flags) \ - do { \ - branch = (map)->tree[elem]; \ - if (!branch) { \ - return -EEXIST; \ - } else if (branch == full) { \ - branch = kmem_cache_alloc(cachep, flags); \ - if (!branch) \ - return -ENOMEM; \ - memcpy(branch, full, sizeof(*full)); \ - (map)->tree[elem] = branch; \ - } \ - } while (0) - -#define DELIP_RANGE_LOOP(map, a, a1, a2, hint, branch, full, cachep, free, flags) \ - for (a = a1; a <= a2; a++) { \ - branch = (map)->tree[a]; \ - if (branch) { \ - if ((a > a1 && a < a2) || (hint)) { \ - if (branch != full) \ - free(branch); \ - (map)->tree[a] = NULL; \ - continue; \ - } else if (branch == full) { \ - branch = kmem_cache_alloc(cachep, flags); \ - if (!branch) \ - return -ENOMEM; \ - memcpy(branch, full, sizeof(*branch)); \ - (map)->tree[a] = branch; \ - } - -#define DELIP_RANGE_LOOP_END() \ - } \ - } - -#define LOOP_WALK_BEGIN(map, i, branch) \ - for (i = 0; i < 256; i++) { \ - branch = (map)->tree[i]; \ - if (likely(!branch)) \ - continue; - -#define LOOP_WALK_END() \ - } - -#define LOOP_WALK_BEGIN_GC(map, i, branch, full, cachep, count) \ - count = -256; \ - for (i = 0; i < 256; i++) { \ - branch = (map)->tree[i]; \ - if (likely(!branch)) \ - continue; \ - count++; \ - if (branch == full) { \ - count++; \ - continue; \ - } - -#define LOOP_WALK_END_GC(map, i, branch, full, cachep, count) \ - if (-256 == count) { \ - kmem_cache_free(cachep, branch); \ - (map)->tree[i] = NULL; \ - } else if (256 == count) { \ - kmem_cache_free(cachep, branch); \ - (map)->tree[i] = full; \ - } \ - } - -#define LOOP_WALK_BEGIN_COUNT(map, i, branch, inrange, count) \ - for (i = 0; i < 256; i++) { \ - if (!(map)->tree[i]) { \ - if (inrange) { \ - count++; \ - inrange = 0; \ - } \ - continue; \ - } \ - branch = (map)->tree[i]; - -#define LOOP_WALK_END_COUNT() \ - } - -#define GETVALUE1(a, a1, b1, r) \ - (a == a1 ? b1 : r) - -#define GETVALUE2(a, b, a1, b1, c1, r) \ - (a == a1 && b == b1 ? c1 : r) - -#define GETVALUE3(a, b, c, a1, b1, c1, d1, r) \ - (a == a1 && b == b1 && c == c1 ? d1 : r) - -#define CHECK1(a, a1, a2, b1, b2, c1, c2, d1, d2) \ - ( \ - GETVALUE1(a, a1, b1, 0) == 0 \ - && GETVALUE1(a, a2, b2, 255) == 255 \ - && c1 == 0 \ - && c2 == 255 \ - && d1 == 0 \ - && d2 == 255 \ - ) - -#define CHECK2(a, b, a1, a2, b1, b2, c1, c2, d1, d2) \ - ( \ - GETVALUE2(a, b, a1, b1, c1, 0) == 0 \ - && GETVALUE2(a, b, a2, b2, c2, 255) == 255 \ - && d1 == 0 \ - && d2 == 255 \ - ) - -#define CHECK3(a, b, c, a1, a2, b1, b2, c1, c2, d1, d2) \ - ( \ - GETVALUE3(a, b, c, a1, b1, c1, d1, 0) == 0 \ - && GETVALUE3(a, b, c, a2, b2, c2, d2, 255) == 255 \ - ) - - -static inline void -free_d(struct ip_set_iptreemap_d *map) -{ - kmem_cache_free(cachep_d, map); -} - -static inline void -free_c(struct ip_set_iptreemap_c *map) -{ - struct ip_set_iptreemap_d *dtree; - unsigned int i; - - LOOP_WALK_BEGIN(map, i, dtree) { - if (dtree != fullbitmap_d) - free_d(dtree); - } LOOP_WALK_END(); - - kmem_cache_free(cachep_c, map); -} - -static inline void -free_b(struct ip_set_iptreemap_b *map) -{ - struct ip_set_iptreemap_c *ctree; - unsigned int i; - - LOOP_WALK_BEGIN(map, i, ctree) { - if (ctree != fullbitmap_c) - free_c(ctree); - } LOOP_WALK_END(); - - kmem_cache_free(cachep_b, map); -} - -static inline int -iptreemap_test(struct ip_set *set, ip_set_ip_t ip) -{ - struct ip_set_iptreemap *map = set->data; - struct ip_set_iptreemap_b *btree; - struct ip_set_iptreemap_c *ctree; - struct ip_set_iptreemap_d *dtree; - unsigned char a, b, c, d; - - ABCD(a, b, c, d, &ip); - - TESTIP_WALK(map, a, btree, fullbitmap_b); - TESTIP_WALK(btree, b, ctree, fullbitmap_c); - TESTIP_WALK(ctree, c, dtree, fullbitmap_d); - - return !!test_bit(d, (void *) dtree->bitmap); -} - -#define KADT_CONDITION - -UADT(iptreemap, test) -KADT(iptreemap, test, ipaddr) - -static inline int -__addip_single(struct ip_set *set, ip_set_ip_t ip) -{ - struct ip_set_iptreemap *map = (struct ip_set_iptreemap *) set->data; - struct ip_set_iptreemap_b *btree; - struct ip_set_iptreemap_c *ctree; - struct ip_set_iptreemap_d *dtree; - unsigned char a, b, c, d; - - ABCD(a, b, c, d, &ip); - - ADDIP_WALK(map, a, btree, struct ip_set_iptreemap_b, cachep_b, fullbitmap_b); - ADDIP_WALK(btree, b, ctree, struct ip_set_iptreemap_c, cachep_c, fullbitmap_c); - ADDIP_WALK(ctree, c, dtree, struct ip_set_iptreemap_d, cachep_d, fullbitmap_d); - - if (__test_and_set_bit(d, (void *) dtree->bitmap)) - return -EEXIST; - - __set_bit(b, (void *) btree->dirty); - - return 0; -} - -static inline int -iptreemap_add(struct ip_set *set, ip_set_ip_t start, ip_set_ip_t end) -{ - struct ip_set_iptreemap *map = set->data; - struct ip_set_iptreemap_b *btree; - struct ip_set_iptreemap_c *ctree; - struct ip_set_iptreemap_d *dtree; - unsigned int a, b, c, d; - unsigned char a1, b1, c1, d1; - unsigned char a2, b2, c2, d2; - - if (start == end) - return __addip_single(set, start); - - ABCD(a1, b1, c1, d1, &start); - ABCD(a2, b2, c2, d2, &end); - - /* This is sooo ugly... */ - ADDIP_RANGE_LOOP(map, a, a1, a2, CHECK1(a, a1, a2, b1, b2, c1, c2, d1, d2), btree, fullbitmap_b, cachep_b, free_b) { - ADDIP_RANGE_LOOP(btree, b, GETVALUE1(a, a1, b1, 0), GETVALUE1(a, a2, b2, 255), CHECK2(a, b, a1, a2, b1, b2, c1, c2, d1, d2), ctree, fullbitmap_c, cachep_c, free_c) { - ADDIP_RANGE_LOOP(ctree, c, GETVALUE2(a, b, a1, b1, c1, 0), GETVALUE2(a, b, a2, b2, c2, 255), CHECK3(a, b, c, a1, a2, b1, b2, c1, c2, d1, d2), dtree, fullbitmap_d, cachep_d, free_d) { - for (d = GETVALUE3(a, b, c, a1, b1, c1, d1, 0); d <= GETVALUE3(a, b, c, a2, b2, c2, d2, 255); d++) - __set_bit(d, (void *) dtree->bitmap); - __set_bit(b, (void *) btree->dirty); - } ADDIP_RANGE_LOOP_END(); - } ADDIP_RANGE_LOOP_END(); - } ADDIP_RANGE_LOOP_END(); - - return 0; -} - -UADT0(iptreemap, add, min(req->ip, req->end), max(req->ip, req->end)) -KADT(iptreemap, add, ipaddr, ip) - -static inline int -__delip_single(struct ip_set *set, ip_set_ip_t ip, gfp_t flags) -{ - struct ip_set_iptreemap *map = set->data; - struct ip_set_iptreemap_b *btree; - struct ip_set_iptreemap_c *ctree; - struct ip_set_iptreemap_d *dtree; - unsigned char a,b,c,d; - - ABCD(a, b, c, d, &ip); - - DELIP_WALK(map, a, btree, cachep_b, fullbitmap_b, flags); - DELIP_WALK(btree, b, ctree, cachep_c, fullbitmap_c, flags); - DELIP_WALK(ctree, c, dtree, cachep_d, fullbitmap_d, flags); - - if (!__test_and_clear_bit(d, (void *) dtree->bitmap)) - return -EEXIST; - - __set_bit(b, (void *) btree->dirty); - - return 0; -} - -static inline int -iptreemap_del(struct ip_set *set, - ip_set_ip_t start, ip_set_ip_t end, gfp_t flags) -{ - struct ip_set_iptreemap *map = set->data; - struct ip_set_iptreemap_b *btree; - struct ip_set_iptreemap_c *ctree; - struct ip_set_iptreemap_d *dtree; - unsigned int a, b, c, d; - unsigned char a1, b1, c1, d1; - unsigned char a2, b2, c2, d2; - - if (start == end) - return __delip_single(set, start, flags); - - ABCD(a1, b1, c1, d1, &start); - ABCD(a2, b2, c2, d2, &end); - - /* This is sooo ugly... */ - DELIP_RANGE_LOOP(map, a, a1, a2, CHECK1(a, a1, a2, b1, b2, c1, c2, d1, d2), btree, fullbitmap_b, cachep_b, free_b, flags) { - DELIP_RANGE_LOOP(btree, b, GETVALUE1(a, a1, b1, 0), GETVALUE1(a, a2, b2, 255), CHECK2(a, b, a1, a2, b1, b2, c1, c2, d1, d2), ctree, fullbitmap_c, cachep_c, free_c, flags) { - DELIP_RANGE_LOOP(ctree, c, GETVALUE2(a, b, a1, b1, c1, 0), GETVALUE2(a, b, a2, b2, c2, 255), CHECK3(a, b, c, a1, a2, b1, b2, c1, c2, d1, d2), dtree, fullbitmap_d, cachep_d, free_d, flags) { - for (d = GETVALUE3(a, b, c, a1, b1, c1, d1, 0); d <= GETVALUE3(a, b, c, a2, b2, c2, d2, 255); d++) - __clear_bit(d, (void *) dtree->bitmap); - __set_bit(b, (void *) btree->dirty); - } DELIP_RANGE_LOOP_END(); - } DELIP_RANGE_LOOP_END(); - } DELIP_RANGE_LOOP_END(); - - return 0; -} - -UADT0(iptreemap, del, min(req->ip, req->end), max(req->ip, req->end), GFP_KERNEL) -KADT(iptreemap, del, ipaddr, ip, GFP_ATOMIC) - -/* Check the status of the bitmap - * -1 == all bits cleared - * 1 == all bits set - * 0 == anything else - */ -static inline int -bitmap_status(struct ip_set_iptreemap_d *dtree) -{ - unsigned char first = dtree->bitmap[0]; - int a; - - for (a = 1; a < 32; a++) - if (dtree->bitmap[a] != first) - return 0; - - return (first == 0 ? -1 : (first == 255 ? 1 : 0)); -} - -static void -gc(unsigned long addr) -{ - struct ip_set *set = (struct ip_set *) addr; - struct ip_set_iptreemap *map = set->data; - struct ip_set_iptreemap_b *btree; - struct ip_set_iptreemap_c *ctree; - struct ip_set_iptreemap_d *dtree; - unsigned int a, b, c; - int i, j, k; - - write_lock_bh(&set->lock); - - LOOP_WALK_BEGIN_GC(map, a, btree, fullbitmap_b, cachep_b, i) { - LOOP_WALK_BEGIN_GC(btree, b, ctree, fullbitmap_c, cachep_c, j) { - if (!__test_and_clear_bit(b, (void *) btree->dirty)) - continue; - LOOP_WALK_BEGIN_GC(ctree, c, dtree, fullbitmap_d, cachep_d, k) { - switch (bitmap_status(dtree)) { - case -1: - kmem_cache_free(cachep_d, dtree); - ctree->tree[c] = NULL; - k--; - break; - case 1: - kmem_cache_free(cachep_d, dtree); - ctree->tree[c] = fullbitmap_d; - k++; - break; - } - } LOOP_WALK_END(); - } LOOP_WALK_END_GC(btree, b, ctree, fullbitmap_c, cachep_c, k); - } LOOP_WALK_END_GC(map, a, btree, fullbitmap_b, cachep_b, j); - - write_unlock_bh(&set->lock); - - map->gc.expires = jiffies + map->gc_interval * HZ; - add_timer(&map->gc); -} - -static inline void -init_gc_timer(struct ip_set *set) -{ - struct ip_set_iptreemap *map = set->data; - - init_timer(&map->gc); - map->gc.data = (unsigned long) set; - map->gc.function = gc; - map->gc.expires = jiffies + map->gc_interval * HZ; - add_timer(&map->gc); -} - -static int -iptreemap_create(struct ip_set *set, const void *data, u_int32_t size) -{ - const struct ip_set_req_iptreemap_create *req = data; - struct ip_set_iptreemap *map; - - map = kzalloc(sizeof(*map), GFP_KERNEL); - if (!map) - return -ENOMEM; - - map->gc_interval = req->gc_interval ? req->gc_interval : IPTREEMAP_DEFAULT_GC_TIME; - set->data = map; - - init_gc_timer(set); - - return 0; -} - -static inline void -__flush(struct ip_set_iptreemap *map) -{ - struct ip_set_iptreemap_b *btree; - unsigned int a; - - LOOP_WALK_BEGIN(map, a, btree); - if (btree != fullbitmap_b) - free_b(btree); - LOOP_WALK_END(); -} - -static void -iptreemap_destroy(struct ip_set *set) -{ - struct ip_set_iptreemap *map = set->data; - - while (!del_timer(&map->gc)) - msleep(IPTREEMAP_DESTROY_SLEEP); - - __flush(map); - kfree(map); - - set->data = NULL; -} - -static void -iptreemap_flush(struct ip_set *set) -{ - struct ip_set_iptreemap *map = set->data; - unsigned int gc_interval = map->gc_interval; - - while (!del_timer(&map->gc)) - msleep(IPTREEMAP_DESTROY_SLEEP); - - __flush(map); - - memset(map, 0, sizeof(*map)); - map->gc_interval = gc_interval; - - init_gc_timer(set); -} - -static void -iptreemap_list_header(const struct ip_set *set, void *data) -{ - struct ip_set_iptreemap *map = set->data; - struct ip_set_req_iptreemap_create *header = data; - - header->gc_interval = map->gc_interval; -} - -static int -iptreemap_list_members_size(const struct ip_set *set, char dont_align) -{ - struct ip_set_iptreemap *map = set->data; - struct ip_set_iptreemap_b *btree; - struct ip_set_iptreemap_c *ctree; - struct ip_set_iptreemap_d *dtree; - unsigned int a, b, c, d, inrange = 0, count = 0; - - LOOP_WALK_BEGIN_COUNT(map, a, btree, inrange, count) { - LOOP_WALK_BEGIN_COUNT(btree, b, ctree, inrange, count) { - LOOP_WALK_BEGIN_COUNT(ctree, c, dtree, inrange, count) { - for (d = 0; d < 256; d++) { - if (test_bit(d, (void *) dtree->bitmap)) { - inrange = 1; - } else if (inrange) { - count++; - inrange = 0; - } - } - } LOOP_WALK_END_COUNT(); - } LOOP_WALK_END_COUNT(); - } LOOP_WALK_END_COUNT(); - - if (inrange) - count++; - - return (count * IPSET_VALIGN(sizeof(struct ip_set_req_iptreemap), dont_align)); -} - -static inline void -add_member(void *data, size_t offset, ip_set_ip_t start, ip_set_ip_t end) -{ - struct ip_set_req_iptreemap *entry = data + offset; - - entry->ip = start; - entry->end = end; -} - -static void -iptreemap_list_members(const struct ip_set *set, void *data, char dont_align) -{ - struct ip_set_iptreemap *map = set->data; - struct ip_set_iptreemap_b *btree; - struct ip_set_iptreemap_c *ctree; - struct ip_set_iptreemap_d *dtree; - unsigned int a, b, c, d, inrange = 0; - size_t offset = 0, datasize; - ip_set_ip_t start = 0, end = 0, ip; - - datasize = IPSET_VALIGN(sizeof(struct ip_set_req_iptreemap), dont_align); - LOOP_WALK_BEGIN(map, a, btree) { - LOOP_WALK_BEGIN(btree, b, ctree) { - LOOP_WALK_BEGIN(ctree, c, dtree) { - for (d = 0; d < 256; d++) { - if (test_bit(d, (void *) dtree->bitmap)) { - ip = ((a << 24) | (b << 16) | (c << 8) | d); - if (!inrange) { - inrange = 1; - start = ip; - } else if (end < ip - 1) { - add_member(data, offset, start, end); - offset += datasize; - start = ip; - } - end = ip; - } else if (inrange) { - add_member(data, offset, start, end); - offset += datasize; - inrange = 0; - } - } - } LOOP_WALK_END(); - } LOOP_WALK_END(); - } LOOP_WALK_END(); - - if (inrange) - add_member(data, offset, start, end); -} - -IP_SET_TYPE(iptreemap, IPSET_TYPE_IP | IPSET_DATA_SINGLE) - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Sven Wegener "); -MODULE_DESCRIPTION("iptreemap type of IP sets"); - -static int __init ip_set_iptreemap_init(void) -{ - int ret = -ENOMEM; - int a; - - cachep_b = KMEM_CACHE_CREATE("ip_set_iptreemap_b", - sizeof(struct ip_set_iptreemap_b)); - if (!cachep_b) { - ip_set_printk("Unable to create ip_set_iptreemap_b slab cache"); - goto out; - } - - cachep_c = KMEM_CACHE_CREATE("ip_set_iptreemap_c", - sizeof(struct ip_set_iptreemap_c)); - if (!cachep_c) { - ip_set_printk("Unable to create ip_set_iptreemap_c slab cache"); - goto outb; - } - - cachep_d = KMEM_CACHE_CREATE("ip_set_iptreemap_d", - sizeof(struct ip_set_iptreemap_d)); - if (!cachep_d) { - ip_set_printk("Unable to create ip_set_iptreemap_d slab cache"); - goto outc; - } - - fullbitmap_d = kmem_cache_alloc(cachep_d, GFP_KERNEL); - if (!fullbitmap_d) - goto outd; - - fullbitmap_c = kmem_cache_alloc(cachep_c, GFP_KERNEL); - if (!fullbitmap_c) - goto outbitmapd; - - fullbitmap_b = kmem_cache_alloc(cachep_b, GFP_KERNEL); - if (!fullbitmap_b) - goto outbitmapc; - - ret = ip_set_register_set_type(&ip_set_iptreemap); - if (0 > ret) - goto outbitmapb; - - /* Now init our global bitmaps */ - memset(fullbitmap_d->bitmap, 0xff, sizeof(fullbitmap_d->bitmap)); - - for (a = 0; a < 256; a++) - fullbitmap_c->tree[a] = fullbitmap_d; - - for (a = 0; a < 256; a++) - fullbitmap_b->tree[a] = fullbitmap_c; - memset(fullbitmap_b->dirty, 0, sizeof(fullbitmap_b->dirty)); - - return 0; - -outbitmapb: - kmem_cache_free(cachep_b, fullbitmap_b); -outbitmapc: - kmem_cache_free(cachep_c, fullbitmap_c); -outbitmapd: - kmem_cache_free(cachep_d, fullbitmap_d); -outd: - kmem_cache_destroy(cachep_d); -outc: - kmem_cache_destroy(cachep_c); -outb: - kmem_cache_destroy(cachep_b); -out: - - return ret; -} - -static void __exit ip_set_iptreemap_fini(void) -{ - ip_set_unregister_set_type(&ip_set_iptreemap); - kmem_cache_free(cachep_d, fullbitmap_d); - kmem_cache_free(cachep_c, fullbitmap_c); - kmem_cache_free(cachep_b, fullbitmap_b); - kmem_cache_destroy(cachep_d); - kmem_cache_destroy(cachep_c); - kmem_cache_destroy(cachep_b); -} - -module_init(ip_set_iptreemap_init); -module_exit(ip_set_iptreemap_fini); diff --git a/kernel/ip_set_list_set.c b/kernel/ip_set_list_set.c index 3cfdae8..ce6c4d1 100644 --- a/kernel/ip_set_list_set.c +++ b/kernel/ip_set_list_set.c @@ -1,324 +1,589 @@ -/* Copyright (C) 2008 Jozsef Kadlecsik +/* Copyright (C) 2008-2010 Jozsef Kadlecsik * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -/* Kernel module implementing an IP set type: the setlist type */ +/* Kernel module implementing an IP set type: the list:set type */ +#include #include #include #include #include -#include -#include -#include +#include +#include +#include -/* - * before ==> index, ref - * after ==> ref, index - */ +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_DESCRIPTION("list:set type of IP sets"); +MODULE_ALIAS("ip_set_list:set"); + +/* Member elements without and with timeout */ +struct set_elem { + ip_set_id_t id; +}; + +struct set_telem { + ip_set_id_t id; + unsigned long timeout; +}; + +/* Type structure */ +struct list_set { + size_t dsize; /* element size */ + u32 size; /* size of set list array */ + u32 timeout; /* timeout value */ + struct timer_list gc; /* garbage collection */ + struct set_elem members[0]; /* the set members */ +}; + +static inline struct set_elem * +list_set_elem(const struct list_set *map, u32 id) +{ + return (struct set_elem *)((char *)map->members + id * map->dsize); +} + +static inline bool +list_set_timeout(const struct list_set *map, u32 id) +{ + const struct set_telem *elem = + (const struct set_telem *) list_set_elem(map, id); + + return ip_set_timeout_test(elem->timeout); +} + +static inline bool +list_set_expired(const struct list_set *map, u32 id) +{ + const struct set_telem *elem = + (const struct set_telem *) list_set_elem(map, id); + + return ip_set_timeout_expired(elem->timeout); +} static inline int -next_index_eq(const struct ip_set_setlist *map, int i, ip_set_id_t index) +list_set_exist(const struct set_telem *elem) { - return i < map->size && map->index[i] == index; + return elem->id != IPSET_INVALID_ID + && !ip_set_timeout_expired(elem->timeout); } +/* Set list without and with timeout */ + static int -setlist_utest(struct ip_set *set, const void *data, u_int32_t size) +list_set_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) { - const struct ip_set_setlist *map = set->data; - const struct ip_set_req_setlist *req = data; - ip_set_id_t index, ref = IP_SET_INVALID_ID; - int i, res = 0; - struct ip_set *s; - - if (req->before && req->ref[0] == '\0') - return 0; - - index = __ip_set_get_byname(req->name, &s); - if (index == IP_SET_INVALID_ID) - return 0; - if (req->ref[0] != '\0') { - ref = __ip_set_get_byname(req->ref, &s); - if (ref == IP_SET_INVALID_ID) - goto finish; - } - for (i = 0; i < map->size - && map->index[i] != IP_SET_INVALID_ID; i++) { - if (req->before && map->index[i] == index) { - res = next_index_eq(map, i + 1, ref); - break; - } else if (!req->before) { - if ((ref == IP_SET_INVALID_ID - && map->index[i] == index) - || (map->index[i] == ref - && next_index_eq(map, i + 1, index))) { - res = 1; - break; - } + struct list_set *map = set->data; + struct set_elem *elem; + u32 i; + int ret; + + for (i = 0; i < map->size; i++) { + elem = list_set_elem(map, i); + if (elem->id == IPSET_INVALID_ID) + return 0; + if (with_timeout(map->timeout) && list_set_expired(map, i)) + continue; + switch (adt) { + case IPSET_TEST: + ret = ip_set_test(elem->id, skb, pf, dim, flags); + if (ret > 0) + return ret; + break; + case IPSET_ADD: + ret = ip_set_add(elem->id, skb, pf, dim, flags); + if (ret == 0) + return ret; + break; + case IPSET_DEL: + ret = ip_set_del(elem->id, skb, pf, dim, flags); + if (ret == 0) + return ret; + break; + default: + break; } } - if (ref != IP_SET_INVALID_ID) - __ip_set_put_byindex(ref); -finish: - __ip_set_put_byindex(index); - return res; + return -EINVAL; +} + +static const struct nla_policy +list_set_adt_policy[IPSET_ATTR_ADT_MAX+1] __read_mostly = { + [IPSET_ATTR_NAME] = { .type = NLA_STRING, + .len = IPSET_MAXNAMELEN }, + [IPSET_ATTR_NAMEREF] = { .type = NLA_STRING, + .len = IPSET_MAXNAMELEN }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, +}; + +static inline bool +next_id_eq(const struct list_set *map, u32 i, ip_set_id_t id) +{ + const struct set_elem *elem; + + if (i + 1 < map->size) { + elem = list_set_elem(map, i + 1); + return !!(elem->id == id + && !(with_timeout(map->timeout) + && list_set_expired(map, i + 1))); + } + + return 0; } +static inline void +list_elem_add(struct list_set *map, u32 i, ip_set_id_t id) +{ + struct set_elem *e; + + for (; i < map->size; i++) { + e = list_set_elem(map, i); + swap(e->id, id); + if (e->id == IPSET_INVALID_ID) + break; + } +} + +static inline void +list_elem_tadd(struct list_set *map, u32 i, ip_set_id_t id, + unsigned long timeout) +{ + struct set_telem *e; + + for (; i < map->size; i++) { + e = (struct set_telem *)list_set_elem(map, i); + swap(e->id, id); + if (e->id == IPSET_INVALID_ID) + break; + swap(e->timeout, timeout); + } +} + static int -setlist_ktest(struct ip_set *set, - const struct sk_buff *skb, - const u_int32_t *flags) +list_set_add(struct list_set *map, u32 i, ip_set_id_t id, + unsigned long timeout) { - struct ip_set_setlist *map = set->data; - int i, res = 0; + struct set_elem *e = list_set_elem(map, i); + + if (i == map->size - 1 && e->id != IPSET_INVALID_ID) + /* Last element replaced: e.g. add new,before,last */ + ip_set_put_byindex(e->id); + if (with_timeout(map->timeout)) + list_elem_tadd(map, i, id, timeout); + else + list_elem_add(map, i, id); - for (i = 0; i < map->size - && map->index[i] != IP_SET_INVALID_ID - && res == 0; i++) - res = ip_set_testip_kernel(map->index[i], skb, flags); - return res; + return 0; } -static inline int -insert_setlist(struct ip_set_setlist *map, int i, ip_set_id_t index) +static int +list_set_del(struct list_set *map, ip_set_id_t id, u32 i) { - ip_set_id_t tmp; - int j; + struct set_elem *a = list_set_elem(map, i), *b; - DP("i: %u, last %u\n", i, map->index[map->size - 1]); - if (i >= map->size || map->index[map->size - 1] != IP_SET_INVALID_ID) - return -ERANGE; - - for (j = i; j < map->size - && index != IP_SET_INVALID_ID; j++) { - tmp = map->index[j]; - map->index[j] = index; - index = tmp; + ip_set_put_byindex(id); + + for (; i < map->size - 1; i++) { + b = list_set_elem(map, i + 1); + a->id = b->id; + if (with_timeout(map->timeout)) + ((struct set_telem *)a)->timeout = + ((struct set_telem *)b)->timeout; + a = b; + if (a->id == IPSET_INVALID_ID) + break; } + /* Last element */ + a->id = IPSET_INVALID_ID; return 0; } static int -setlist_uadd(struct ip_set *set, const void *data, u_int32_t size) +list_set_uadt(struct ip_set *set, struct nlattr *head, int len, + enum ipset_adt adt, u32 *lineno, u32 flags) { - struct ip_set_setlist *map = set->data; - const struct ip_set_req_setlist *req = data; - ip_set_id_t index, ref = IP_SET_INVALID_ID; - int i, res = -ERANGE; + struct list_set *map = set->data; + struct nlattr *tb[IPSET_ATTR_ADT_MAX]; + bool eexist = flags & IPSET_FLAG_EXIST, + with_timeout = with_timeout(map->timeout); + int before = 0; + u32 timeout = map->timeout; + ip_set_id_t id, refid = IPSET_INVALID_ID; + struct set_elem *elem; struct ip_set *s; - - if (req->before && req->ref[0] == '\0') - return -EINVAL; - - index = __ip_set_get_byname(req->name, &s); - if (index == IP_SET_INVALID_ID) - return -EEXIST; - /* "Loop detection" */ - if (strcmp(s->type->typename, "setlist") == 0) + u32 i; + int ret = 0; + + if (nla_parse(tb, IPSET_ATTR_ADT_MAX, head, len, + list_set_adt_policy)) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_NAME]) { + id = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAME]), &s); + if (id == IPSET_INVALID_ID) + return -IPSET_ERR_NAME; + /* "Loop detection" */ + if (s->type->features & IPSET_TYPE_NAME) { + ret = -IPSET_ERR_LOOP; + goto finish; + } + } else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_CADT_FLAGS]) { + u32 f = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + before = f & IPSET_FLAG_BEFORE; + } + + if (before && !tb[IPSET_ATTR_NAMEREF]) { + ret = -IPSET_ERR_BEFORE; goto finish; + } - if (req->ref[0] != '\0') { - ref = __ip_set_get_byname(req->ref, &s); - if (ref == IP_SET_INVALID_ID) { - res = -EEXIST; + if (tb[IPSET_ATTR_NAMEREF]) { + refid = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAMEREF]), &s); + if (refid == IPSET_INVALID_ID) { + ret = -IPSET_ERR_NAMEREF; goto finish; } + if (!before) + before = -1; } - for (i = 0; i < map->size; i++) { - if (map->index[i] != ref) - continue; - if (req->before) - res = insert_setlist(map, i, index); - else - res = insert_setlist(map, - ref == IP_SET_INVALID_ID ? i : i + 1, - index); + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout) { + ret = -IPSET_ERR_TIMEOUT; + goto finish; + } + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + } + + switch (adt) { + case IPSET_TEST: + for (i = 0; i < map->size && !ret; i++) { + elem = list_set_elem(map, i); + if (elem->id == IPSET_INVALID_ID + || (before != 0 && i + 1 >= map->size)) + break; + else if (with_timeout && list_set_expired(map, i)) + continue; + else if (before > 0 && elem->id == id) + ret = next_id_eq(map, i, refid); + else if (before < 0 && elem->id == refid) + ret = next_id_eq(map, i, id); + else if (before == 0 && elem->id == id) + ret = 1; + } + break; + case IPSET_ADD: + for (i = 0; i < map->size && !ret; i++) { + elem = list_set_elem(map, i); + if (elem->id == id + && !(with_timeout && list_set_expired(map, i))) + ret = -IPSET_ERR_EXIST; + } + if (ret == -IPSET_ERR_EXIST) + break; + ret = -IPSET_ERR_LIST_FULL; + for (i = 0; i < map->size && ret == -IPSET_ERR_LIST_FULL; i++) { + elem = list_set_elem(map, i); + if (elem->id == IPSET_INVALID_ID) + ret = before != 0 ? -IPSET_ERR_REF_EXIST + : list_set_add(map, i, id, timeout); + else if (elem->id != refid) + continue; + else if (with_timeout && list_set_expired(map, i)) + ret = -IPSET_ERR_REF_EXIST; + else if (before) + ret = list_set_add(map, i, id, timeout); + else if (i + 1 < map->size) + ret = list_set_add(map, i + 1, id, timeout); + } + break; + case IPSET_DEL: + ret = -IPSET_ERR_EXIST; + for (i = 0; i < map->size && ret == -IPSET_ERR_EXIST; i++) { + elem = list_set_elem(map, i); + if (elem->id == IPSET_INVALID_ID) { + ret = before != 0 ? -IPSET_ERR_REF_EXIST + : -IPSET_ERR_EXIST; + break; + } else if (with_timeout && list_set_expired(map, i)) + continue; + else if (elem->id == id + && (before == 0 + || (before > 0 && next_id_eq(map, i, refid)))) + ret = list_set_del(map, id, i); + else if (before < 0 && elem->id == refid + && next_id_eq(map, i, id)) + ret = list_set_del(map, id, i + 1); + } + break; + default: break; } - if (ref != IP_SET_INVALID_ID) - __ip_set_put_byindex(ref); - /* In case of success, we keep the reference to the set */ + finish: - if (res != 0) - __ip_set_put_byindex(index); - return res; + if (refid != IPSET_INVALID_ID) + ip_set_put_byindex(refid); + if (adt != IPSET_ADD || ret) + ip_set_put_byindex(id); + + if (ret && !(ret == -IPSET_ERR_EXIST && eexist)) { + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + return ret; + } + return ret; } -static int -setlist_kadd(struct ip_set *set, - const struct sk_buff *skb, - const u_int32_t *flags) +static void +list_set_flush(struct ip_set *set) { - struct ip_set_setlist *map = set->data; - int i, res = -EINVAL; + struct list_set *map = set->data; + struct set_elem *elem; + u32 i; + + for (i = 0; i < map->size; i++) { + elem = list_set_elem(map, i); + if (elem->id != IPSET_INVALID_ID) { + ip_set_put_byindex(elem->id); + elem->id = IPSET_INVALID_ID; + } + } +} + +static void +list_set_destroy(struct ip_set *set) +{ + struct list_set *map = set->data; + + if (with_timeout(map->timeout)) + del_timer_sync(&map->gc); + list_set_flush(set); + kfree(map); - for (i = 0; i < map->size - && map->index[i] != IP_SET_INVALID_ID - && res != 0; i++) - res = ip_set_addip_kernel(map->index[i], skb, flags); - return res; + set->data = NULL; } -static inline int -unshift_setlist(struct ip_set_setlist *map, int i) +static int +list_set_head(struct ip_set *set, struct sk_buff *skb) { - int j; + const struct list_set *map = set->data; + struct nlattr *nested; + + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) + goto nla_put_failure; + NLA_PUT_NET32(skb, IPSET_ATTR_SIZE, htonl(map->size)); + if (with_timeout(map->timeout)) + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout)); + NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, + htonl(atomic_read(&set->ref) - 1)); + NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, + htonl(sizeof(*map) + map->size * map->dsize)); + ipset_nest_end(skb, nested); - for (j = i; j < map->size - 1; j++) - map->index[j] = map->index[j+1]; - map->index[map->size-1] = IP_SET_INVALID_ID; return 0; +nla_put_failure: + return -EFAULT; } static int -setlist_udel(struct ip_set *set, const void *data, u_int32_t size) +list_set_list(struct ip_set *set, + struct sk_buff *skb, struct netlink_callback *cb) { - struct ip_set_setlist *map = set->data; - const struct ip_set_req_setlist *req = data; - ip_set_id_t index, ref = IP_SET_INVALID_ID; - int i, res = -EEXIST; - struct ip_set *s; - - if (req->before && req->ref[0] == '\0') - return -EINVAL; - - index = __ip_set_get_byname(req->name, &s); - if (index == IP_SET_INVALID_ID) - return -EEXIST; - if (req->ref[0] != '\0') { - ref = __ip_set_get_byname(req->ref, &s); - if (ref == IP_SET_INVALID_ID) + const struct list_set *map = set->data; + struct nlattr *atd, *nested; + u32 i, first = cb->args[2]; + const struct set_elem *e; + + atd = ipset_nest_start(skb, IPSET_ATTR_ADT); + if (!atd) + return -EFAULT; + for (; cb->args[2] < map->size; cb->args[2]++) { + i = cb->args[2]; + e = list_set_elem(map, i); + if (e->id == IPSET_INVALID_ID) goto finish; - } - for (i = 0; i < map->size - && map->index[i] != IP_SET_INVALID_ID; i++) { - if (req->before) { - if (map->index[i] == index - && next_index_eq(map, i + 1, ref)) { - res = unshift_setlist(map, i); - break; - } - } else if (ref == IP_SET_INVALID_ID) { - if (map->index[i] == index) { - res = unshift_setlist(map, i); - break; - } - } else if (map->index[i] == ref - && next_index_eq(map, i + 1, index)) { - res = unshift_setlist(map, i + 1); - break; + if (with_timeout(map->timeout) && list_set_expired(map, i)) + continue; + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) { + if (i == first) { + nla_nest_cancel(skb, atd); + return -EFAULT; + } else + goto nla_put_failure; } + NLA_PUT_STRING(skb, IPSET_ATTR_NAME, + ip_set_name_byindex(e->id)); + if (with_timeout(map->timeout)) { + const struct set_telem *te = + (const struct set_telem *) e; + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(te->timeout))); + } + ipset_nest_end(skb, nested); } - if (ref != IP_SET_INVALID_ID) - __ip_set_put_byindex(ref); finish: - __ip_set_put_byindex(index); - /* In case of success, release the reference to the set */ - if (res == 0) - __ip_set_put_byindex(index); - return res; + ipset_nest_end(skb, atd); + /* Set listing finished */ + cb->args[2] = 0; + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nested); + ipset_nest_end(skb, atd); + return 0; } -static int -setlist_kdel(struct ip_set *set, - const struct sk_buff *skb, - const u_int32_t *flags) +static bool +list_set_same_set(const struct ip_set *a, const struct ip_set *b) { - struct ip_set_setlist *map = set->data; - int i, res = -EINVAL; + struct list_set *x = a->data; + struct list_set *y = b->data; - for (i = 0; i < map->size - && map->index[i] != IP_SET_INVALID_ID - && res != 0; i++) - res = ip_set_delip_kernel(map->index[i], skb, flags); - return res; + return x->size == y->size + && x->timeout == y->timeout; } -static int -setlist_create(struct ip_set *set, const void *data, u_int32_t size) -{ - struct ip_set_setlist *map; - const struct ip_set_req_setlist_create *req = data; - int i; - - map = kmalloc(sizeof(struct ip_set_setlist) + - req->size * sizeof(ip_set_id_t), GFP_KERNEL); - if (!map) - return -ENOMEM; - map->size = req->size; - for (i = 0; i < map->size; i++) - map->index[i] = IP_SET_INVALID_ID; - - set->data = map; - return 0; -} +static const struct ip_set_type_variant list_set __read_mostly = { + .kadt = list_set_kadt, + .uadt = list_set_uadt, + .destroy = list_set_destroy, + .flush = list_set_flush, + .head = list_set_head, + .list = list_set_list, + .same_set = list_set_same_set, +}; static void -setlist_destroy(struct ip_set *set) +list_set_gc(unsigned long ul_set) { - struct ip_set_setlist *map = set->data; - int i; - - for (i = 0; i < map->size - && map->index[i] != IP_SET_INVALID_ID; i++) - __ip_set_put_byindex(map->index[i]); + struct ip_set *set = (struct ip_set *) ul_set; + struct list_set *map = set->data; + struct set_telem *e; + u32 i; - kfree(map); - set->data = NULL; + /* We run parallel with other readers (test element) + * but adding/deleting new entries is locked out */ + read_lock_bh(&set->lock); + for (i = map->size - 1; i >= 0; i--) { + e = (struct set_telem *) list_set_elem(map, i); + if (e->id != IPSET_INVALID_ID + && list_set_expired(map, i)) + list_set_del(map, e->id, i); + } + read_unlock_bh(&set->lock); + + map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + add_timer(&map->gc); } -static void -setlist_flush(struct ip_set *set) +static inline void +list_set_gc_init(struct ip_set *set) { - struct ip_set_setlist *map = set->data; - int i; - - for (i = 0; i < map->size - && map->index[i] != IP_SET_INVALID_ID; i++) { - __ip_set_put_byindex(map->index[i]); - map->index[i] = IP_SET_INVALID_ID; - } + struct list_set *map = set->data; + + init_timer(&map->gc); + map->gc.data = (unsigned long) set; + map->gc.function = list_set_gc; + map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + add_timer(&map->gc); } -static void -setlist_list_header(const struct ip_set *set, void *data) +/* Create list:set type of sets */ + +static const struct nla_policy +list_set_create_policy[IPSET_ATTR_CREATE_MAX+1] __read_mostly = { + [IPSET_ATTR_SIZE] = { .type = NLA_U32 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, +}; + +static inline bool +init_list_set(struct ip_set *set, u32 size, size_t dsize, + unsigned long timeout) { - const struct ip_set_setlist *map = set->data; - struct ip_set_req_setlist_create *header = data; + struct list_set *map; + struct set_elem *e; + u32 i; - header->size = map->size; + map = kzalloc(sizeof(*map) + size * dsize, GFP_KERNEL); + if (!map) + return false; + + map->size = size; + map->dsize = dsize; + map->timeout = timeout; + set->data = map; + + for (i = 0; i < size; i++) { + e = list_set_elem(map, i); + e->id = IPSET_INVALID_ID; + } + + return true; } static int -setlist_list_members_size(const struct ip_set *set, char dont_align) +list_set_create(struct ip_set *set, struct nlattr *head, int len, + u32 flags) { - const struct ip_set_setlist *map = set->data; - - return map->size * IPSET_VALIGN(sizeof(ip_set_id_t), dont_align); -} + struct nlattr *tb[IPSET_ATTR_CREATE_MAX]; + u32 size = IP_SET_LIST_DEFAULT_SIZE; -static void -setlist_list_members(const struct ip_set *set, void *data, char dont_align) -{ - struct ip_set_setlist *map = set->data; - ip_set_id_t *d; - int i; + if (nla_parse(tb, IPSET_ATTR_CREATE_MAX, head, len, + list_set_create_policy)) + return -IPSET_ERR_PROTOCOL; - for (i = 0; i < map->size; i++) { - d = data + i * IPSET_VALIGN(sizeof(ip_set_id_t), dont_align); - *d = ip_set_id(map->index[i]); + if (tb[IPSET_ATTR_SIZE]) + size = ip_set_get_h32(tb[IPSET_ATTR_SIZE]); + if (size < IP_SET_LIST_MIN_SIZE) + size = IP_SET_LIST_MIN_SIZE; + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!init_list_set(set, size, sizeof(struct set_telem), + ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]))) + return -ENOMEM; + + list_set_gc_init(set); + } else { + if (!init_list_set(set, size, sizeof(struct set_elem), + IPSET_NO_TIMEOUT)) + return -ENOMEM; } + set->variant = &list_set; + return 0; } -IP_SET_TYPE(setlist, IPSET_TYPE_SETNAME | IPSET_DATA_SINGLE) +static struct ip_set_type list_set_type = { + .name = "list:set", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_NAME, + .dimension = IPSET_DIM_ONE, + .family = AF_UNSPEC, + .revision = 0, + .create = list_set_create, + .me = THIS_MODULE, +}; -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Jozsef Kadlecsik "); -MODULE_DESCRIPTION("setlist type of IP sets"); +static int __init +list_set_init(void) +{ + return ip_set_type_register(&list_set_type); +} + +static void __exit +list_set_fini(void) +{ + ip_set_type_unregister(&list_set_type); +} -REGISTER_MODULE(setlist) +module_init(list_set_init); +module_exit(list_set_fini); diff --git a/kernel/ip_set_tree_ip.c b/kernel/ip_set_tree_ip.c deleted file mode 100644 index 77eb180..0000000 --- a/kernel/ip_set_tree_ip.c +++ /dev/null @@ -1,464 +0,0 @@ -/* Copyright (C) 2005-2008 Jozsef Kadlecsik - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -/* Kernel module implementing an IP set type: the iptree type */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -static int limit = MAX_RANGE; - -/* Garbage collection interval in seconds: */ -#define IPTREE_GC_TIME 5*60 -/* Sleep so many milliseconds before trying again - * to delete the gc timer at destroying/flushing a set */ -#define IPTREE_DESTROY_SLEEP 100 - -static __KMEM_CACHE_T__ *branch_cachep; -static __KMEM_CACHE_T__ *leaf_cachep; - - -#if defined(__LITTLE_ENDIAN) -#define ABCD(a,b,c,d,addrp) do { \ - a = ((unsigned char *)addrp)[3]; \ - b = ((unsigned char *)addrp)[2]; \ - c = ((unsigned char *)addrp)[1]; \ - d = ((unsigned char *)addrp)[0]; \ -} while (0) -#elif defined(__BIG_ENDIAN) -#define ABCD(a,b,c,d,addrp) do { \ - a = ((unsigned char *)addrp)[0]; \ - b = ((unsigned char *)addrp)[1]; \ - c = ((unsigned char *)addrp)[2]; \ - d = ((unsigned char *)addrp)[3]; \ -} while (0) -#else -#error "Please fix asm/byteorder.h" -#endif /* __LITTLE_ENDIAN */ - -#define TESTIP_WALK(map, elem, branch) do { \ - if ((map)->tree[elem]) { \ - branch = (map)->tree[elem]; \ - } else \ - return 0; \ -} while (0) - -static inline int -iptree_test(struct ip_set *set, ip_set_ip_t ip) -{ - struct ip_set_iptree *map = set->data; - struct ip_set_iptreeb *btree; - struct ip_set_iptreec *ctree; - struct ip_set_iptreed *dtree; - unsigned char a,b,c,d; - - if (!ip) - return -ERANGE; - - ABCD(a, b, c, d, &ip); - DP("%u %u %u %u timeout %u", a, b, c, d, map->timeout); - TESTIP_WALK(map, a, btree); - TESTIP_WALK(btree, b, ctree); - TESTIP_WALK(ctree, c, dtree); - DP("%lu %lu", dtree->expires[d], jiffies); - return dtree->expires[d] - && (!map->timeout - || time_after(dtree->expires[d], jiffies)); -} - -#define KADT_CONDITION - -UADT(iptree, test) -KADT(iptree, test, ipaddr) - -#define ADDIP_WALK(map, elem, branch, type, cachep) do { \ - if ((map)->tree[elem]) { \ - DP("found %u", elem); \ - branch = (map)->tree[elem]; \ - } else { \ - branch = (type *) \ - kmem_cache_alloc(cachep, GFP_ATOMIC); \ - if (branch == NULL) \ - return -ENOMEM; \ - memset(branch, 0, sizeof(*branch)); \ - (map)->tree[elem] = branch; \ - DP("alloc %u", elem); \ - } \ -} while (0) - -static inline int -iptree_add(struct ip_set *set, ip_set_ip_t ip, unsigned int timeout) -{ - struct ip_set_iptree *map = set->data; - struct ip_set_iptreeb *btree; - struct ip_set_iptreec *ctree; - struct ip_set_iptreed *dtree; - unsigned char a,b,c,d; - int ret = 0; - - if (!ip || map->elements >= limit) - /* We could call the garbage collector - * but it's probably overkill */ - return -ERANGE; - - ABCD(a, b, c, d, &ip); - DP("%u %u %u %u timeout %u", a, b, c, d, timeout); - ADDIP_WALK(map, a, btree, struct ip_set_iptreeb, branch_cachep); - ADDIP_WALK(btree, b, ctree, struct ip_set_iptreec, branch_cachep); - ADDIP_WALK(ctree, c, dtree, struct ip_set_iptreed, leaf_cachep); - if (dtree->expires[d] - && (!map->timeout || time_after(dtree->expires[d], jiffies))) - ret = -EEXIST; - if (map->timeout && timeout == 0) - timeout = map->timeout; - dtree->expires[d] = map->timeout ? (timeout * HZ + jiffies) : 1; - /* Lottery: I won! */ - if (dtree->expires[d] == 0) - dtree->expires[d] = 1; - DP("%u %lu", d, dtree->expires[d]); - if (ret == 0) - map->elements++; - return ret; -} - -UADT(iptree, add, req->timeout) -KADT(iptree, add, ipaddr, 0) - -#define DELIP_WALK(map, elem, branch) do { \ - if ((map)->tree[elem]) { \ - branch = (map)->tree[elem]; \ - } else \ - return -EEXIST; \ -} while (0) - -static inline int -iptree_del(struct ip_set *set, ip_set_ip_t ip) -{ - struct ip_set_iptree *map = set->data; - struct ip_set_iptreeb *btree; - struct ip_set_iptreec *ctree; - struct ip_set_iptreed *dtree; - unsigned char a,b,c,d; - - if (!ip) - return -ERANGE; - - ABCD(a, b, c, d, &ip); - DELIP_WALK(map, a, btree); - DELIP_WALK(btree, b, ctree); - DELIP_WALK(ctree, c, dtree); - - if (dtree->expires[d]) { - dtree->expires[d] = 0; - map->elements--; - return 0; - } - return -EEXIST; -} - -UADT(iptree, del) -KADT(iptree, del, ipaddr) - -#define LOOP_WALK_BEGIN(map, i, branch) \ - for (i = 0; i < 256; i++) { \ - if (!(map)->tree[i]) \ - continue; \ - branch = (map)->tree[i] - -#define LOOP_WALK_END } - -static void -ip_tree_gc(unsigned long ul_set) -{ - struct ip_set *set = (struct ip_set *) ul_set; - struct ip_set_iptree *map = set->data; - struct ip_set_iptreeb *btree; - struct ip_set_iptreec *ctree; - struct ip_set_iptreed *dtree; - unsigned int a,b,c,d; - unsigned char i,j,k; - - i = j = k = 0; - DP("gc: %s", set->name); - write_lock_bh(&set->lock); - LOOP_WALK_BEGIN(map, a, btree); - LOOP_WALK_BEGIN(btree, b, ctree); - LOOP_WALK_BEGIN(ctree, c, dtree); - for (d = 0; d < 256; d++) { - if (dtree->expires[d]) { - DP("gc: %u %u %u %u: expires %lu jiffies %lu", - a, b, c, d, - dtree->expires[d], jiffies); - if (map->timeout - && time_before(dtree->expires[d], jiffies)) { - dtree->expires[d] = 0; - map->elements--; - } else - k = 1; - } - } - if (k == 0) { - DP("gc: %s: leaf %u %u %u empty", - set->name, a, b, c); - kmem_cache_free(leaf_cachep, dtree); - ctree->tree[c] = NULL; - } else { - DP("gc: %s: leaf %u %u %u not empty", - set->name, a, b, c); - j = 1; - k = 0; - } - LOOP_WALK_END; - if (j == 0) { - DP("gc: %s: branch %u %u empty", - set->name, a, b); - kmem_cache_free(branch_cachep, ctree); - btree->tree[b] = NULL; - } else { - DP("gc: %s: branch %u %u not empty", - set->name, a, b); - i = 1; - j = k = 0; - } - LOOP_WALK_END; - if (i == 0) { - DP("gc: %s: branch %u empty", - set->name, a); - kmem_cache_free(branch_cachep, btree); - map->tree[a] = NULL; - } else { - DP("gc: %s: branch %u not empty", - set->name, a); - i = j = k = 0; - } - LOOP_WALK_END; - write_unlock_bh(&set->lock); - - map->gc.expires = jiffies + map->gc_interval * HZ; - add_timer(&map->gc); -} - -static inline void -init_gc_timer(struct ip_set *set) -{ - struct ip_set_iptree *map = set->data; - - /* Even if there is no timeout for the entries, - * we still have to call gc because delete - * do not clean up empty branches */ - map->gc_interval = IPTREE_GC_TIME; - init_timer(&map->gc); - map->gc.data = (unsigned long) set; - map->gc.function = ip_tree_gc; - map->gc.expires = jiffies + map->gc_interval * HZ; - add_timer(&map->gc); -} - -static int -iptree_create(struct ip_set *set, const void *data, u_int32_t size) -{ - const struct ip_set_req_iptree_create *req = data; - struct ip_set_iptree *map; - - if (size != sizeof(struct ip_set_req_iptree_create)) { - ip_set_printk("data length wrong (want %zu, have %lu)", - sizeof(struct ip_set_req_iptree_create), - (unsigned long)size); - return -EINVAL; - } - - map = kmalloc(sizeof(struct ip_set_iptree), GFP_KERNEL); - if (!map) { - DP("out of memory for %zu bytes", - sizeof(struct ip_set_iptree)); - return -ENOMEM; - } - memset(map, 0, sizeof(*map)); - map->timeout = req->timeout; - map->elements = 0; - set->data = map; - - init_gc_timer(set); - - return 0; -} - -static inline void -__flush(struct ip_set_iptree *map) -{ - struct ip_set_iptreeb *btree; - struct ip_set_iptreec *ctree; - struct ip_set_iptreed *dtree; - unsigned int a,b,c; - - LOOP_WALK_BEGIN(map, a, btree); - LOOP_WALK_BEGIN(btree, b, ctree); - LOOP_WALK_BEGIN(ctree, c, dtree); - kmem_cache_free(leaf_cachep, dtree); - LOOP_WALK_END; - kmem_cache_free(branch_cachep, ctree); - LOOP_WALK_END; - kmem_cache_free(branch_cachep, btree); - LOOP_WALK_END; - map->elements = 0; -} - -static void -iptree_destroy(struct ip_set *set) -{ - struct ip_set_iptree *map = set->data; - - /* gc might be running */ - while (!del_timer(&map->gc)) - msleep(IPTREE_DESTROY_SLEEP); - __flush(map); - kfree(map); - set->data = NULL; -} - -static void -iptree_flush(struct ip_set *set) -{ - struct ip_set_iptree *map = set->data; - unsigned int timeout = map->timeout; - - /* gc might be running */ - while (!del_timer(&map->gc)) - msleep(IPTREE_DESTROY_SLEEP); - __flush(map); - memset(map, 0, sizeof(*map)); - map->timeout = timeout; - - init_gc_timer(set); -} - -static void -iptree_list_header(const struct ip_set *set, void *data) -{ - const struct ip_set_iptree *map = set->data; - struct ip_set_req_iptree_create *header = data; - - header->timeout = map->timeout; -} - -static int -iptree_list_members_size(const struct ip_set *set, char dont_align) -{ - const struct ip_set_iptree *map = set->data; - struct ip_set_iptreeb *btree; - struct ip_set_iptreec *ctree; - struct ip_set_iptreed *dtree; - unsigned int a,b,c,d; - unsigned int count = 0; - - LOOP_WALK_BEGIN(map, a, btree); - LOOP_WALK_BEGIN(btree, b, ctree); - LOOP_WALK_BEGIN(ctree, c, dtree); - for (d = 0; d < 256; d++) { - if (dtree->expires[d] - && (!map->timeout || time_after(dtree->expires[d], jiffies))) - count++; - } - LOOP_WALK_END; - LOOP_WALK_END; - LOOP_WALK_END; - - DP("members %u", count); - return (count * IPSET_VALIGN(sizeof(struct ip_set_req_iptree), dont_align)); -} - -static void -iptree_list_members(const struct ip_set *set, void *data, char dont_align) -{ - const struct ip_set_iptree *map = set->data; - struct ip_set_iptreeb *btree; - struct ip_set_iptreec *ctree; - struct ip_set_iptreed *dtree; - unsigned int a,b,c,d; - size_t offset = 0, datasize; - struct ip_set_req_iptree *entry; - - datasize = IPSET_VALIGN(sizeof(struct ip_set_req_iptree), dont_align); - LOOP_WALK_BEGIN(map, a, btree); - LOOP_WALK_BEGIN(btree, b, ctree); - LOOP_WALK_BEGIN(ctree, c, dtree); - for (d = 0; d < 256; d++) { - if (dtree->expires[d] - && (!map->timeout || time_after(dtree->expires[d], jiffies))) { - entry = data + offset; - entry->ip = ((a << 24) | (b << 16) | (c << 8) | d); - entry->timeout = !map->timeout ? 0 - : (dtree->expires[d] - jiffies)/HZ; - offset += datasize; - } - } - LOOP_WALK_END; - LOOP_WALK_END; - LOOP_WALK_END; -} - -IP_SET_TYPE(iptree, IPSET_TYPE_IP | IPSET_DATA_SINGLE) - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Jozsef Kadlecsik "); -MODULE_DESCRIPTION("iptree type of IP sets"); -module_param(limit, int, 0600); -MODULE_PARM_DESC(limit, "maximal number of elements stored in the sets"); - -static int __init ip_set_iptree_init(void) -{ - int ret; - - branch_cachep = KMEM_CACHE_CREATE("ip_set_iptreeb", - sizeof(struct ip_set_iptreeb)); - if (!branch_cachep) { - printk(KERN_ERR "Unable to create ip_set_iptreeb slab cache\n"); - ret = -ENOMEM; - goto out; - } - leaf_cachep = KMEM_CACHE_CREATE("ip_set_iptreed", - sizeof(struct ip_set_iptreed)); - if (!leaf_cachep) { - printk(KERN_ERR "Unable to create ip_set_iptreed slab cache\n"); - ret = -ENOMEM; - goto free_branch; - } - ret = ip_set_register_set_type(&ip_set_iptree); - if (ret == 0) - goto out; - - kmem_cache_destroy(leaf_cachep); - free_branch: - kmem_cache_destroy(branch_cachep); - out: - return ret; -} - -static void __exit ip_set_iptree_fini(void) -{ - /* FIXME: possible race with ip_set_create() */ - ip_set_unregister_set_type(&ip_set_iptree); - kmem_cache_destroy(leaf_cachep); - kmem_cache_destroy(branch_cachep); -} - -module_init(ip_set_iptree_init); -module_exit(ip_set_iptree_fini); diff --git a/kernel/ipt_SET.c b/kernel/ipt_SET.c deleted file mode 100644 index 6009d64..0000000 --- a/kernel/ipt_SET.c +++ /dev/null @@ -1,242 +0,0 @@ -/* Copyright (C) 2000-2002 Joakim Axelsson - * Patrick Schaaf - * Martin Josefsson - * Copyright (C) 2003-2004 Jozsef Kadlecsik - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -/* ipt_SET.c - netfilter target to manipulate IP sets */ - -#include -#include -#include -#include - -#include -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16) -#include -#define xt_register_target ipt_register_target -#define xt_unregister_target ipt_unregister_target -#define xt_target ipt_target -#define XT_CONTINUE IPT_CONTINUE -#else -#include -#endif -#include - -static unsigned int -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) -target(struct sk_buff **pskb, - unsigned int hooknum, - const struct net_device *in, - const struct net_device *out, - const void *targinfo, - void *userinfo) -#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) -target(struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const void *targinfo, - void *userinfo) -#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) -target(struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const struct xt_target *target, - const void *targinfo, - void *userinfo) -#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) -target(struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const struct xt_target *target, - const void *targinfo) -#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) -target(struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const struct xt_target *target, - const void *targinfo) -#else /* LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,28) */ -target(struct sk_buff *skb, - const struct xt_target_param *par) -#endif -{ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) - const struct ipt_set_info_target *info = targinfo; -#else - const struct ipt_set_info_target *info = par->targinfo; -#endif -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) - struct sk_buff *skb = *pskb; -#endif - - - if (info->add_set.index != IP_SET_INVALID_ID) - ip_set_addip_kernel(info->add_set.index, - skb, - info->add_set.flags); - if (info->del_set.index != IP_SET_INVALID_ID) - ip_set_delip_kernel(info->del_set.index, - skb, - info->del_set.flags); - - return XT_CONTINUE; -} - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16) -static int -checkentry(const char *tablename, - const struct ipt_entry *e, - void *targinfo, - unsigned int targinfosize, - unsigned int hook_mask) -#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) -static int -checkentry(const char *tablename, - const void *e, - void *targinfo, - unsigned int targinfosize, - unsigned int hook_mask) -#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) -static int -checkentry(const char *tablename, - const void *e, - const struct xt_target *target, - void *targinfo, - unsigned int targinfosize, - unsigned int hook_mask) -#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) -static int -checkentry(const char *tablename, - const void *e, - const struct xt_target *target, - void *targinfo, - unsigned int hook_mask) -#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) -static bool -checkentry(const char *tablename, - const void *e, - const struct xt_target *target, - void *targinfo, - unsigned int hook_mask) -#else /* LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,28) */ -static bool -checkentry(const struct xt_tgchk_param *par) -#endif -{ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) - const struct ipt_set_info_target *info = targinfo; -#else - const struct ipt_set_info_target *info = par->targinfo; -#endif - ip_set_id_t index; - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) - if (targinfosize != IPT_ALIGN(sizeof(*info))) { - DP("bad target info size %u", targinfosize); - return 0; - } -#endif - - if (info->add_set.index != IP_SET_INVALID_ID) { - index = ip_set_get_byindex(info->add_set.index); - if (index == IP_SET_INVALID_ID) { - ip_set_printk("cannot find add_set index %u as target", - info->add_set.index); - return 0; /* error */ - } - } - - if (info->del_set.index != IP_SET_INVALID_ID) { - index = ip_set_get_byindex(info->del_set.index); - if (index == IP_SET_INVALID_ID) { - ip_set_printk("cannot find del_set index %u as target", - info->del_set.index); - return 0; /* error */ - } - } - if (info->add_set.flags[IP_SET_MAX_BINDINGS] != 0 - || info->del_set.flags[IP_SET_MAX_BINDINGS] != 0) { - ip_set_printk("That's nasty!"); - return 0; /* error */ - } - - return 1; -} - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) -static void destroy(void *targetinfo, - unsigned int targetsize) -#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) -static void destroy(const struct xt_target *target, - void *targetinfo, - unsigned int targetsize) -#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) -static void destroy(const struct xt_target *target, - void *targetinfo) -#else /* LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,28) */ -static void destroy(const struct xt_tgdtor_param *par) -#endif -{ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) - const struct ipt_set_info_target *info = targetinfo; -#else - const struct ipt_set_info_target *info = par->targinfo; -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) - if (targetsize != IPT_ALIGN(sizeof(struct ipt_set_info_target))) { - ip_set_printk("invalid targetsize %d", targetsize); - return; - } -#endif - if (info->add_set.index != IP_SET_INVALID_ID) - ip_set_put_byindex(info->add_set.index); - if (info->del_set.index != IP_SET_INVALID_ID) - ip_set_put_byindex(info->del_set.index); -} - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) -static struct xt_target SET_target = { - .name = "SET", - .target = target, - .checkentry = checkentry, - .destroy = destroy, - .me = THIS_MODULE -}; -#else /* LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,17) */ -static struct xt_target SET_target = { - .name = "SET", - .family = AF_INET, - .target = target, - .targetsize = sizeof(struct ipt_set_info_target), - .checkentry = checkentry, - .destroy = destroy, - .me = THIS_MODULE -}; -#endif - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Jozsef Kadlecsik "); -MODULE_DESCRIPTION("iptables IP set target module"); - -static int __init ipt_SET_init(void) -{ - return xt_register_target(&SET_target); -} - -static void __exit ipt_SET_fini(void) -{ - xt_unregister_target(&SET_target); -} - -module_init(ipt_SET_init); -module_exit(ipt_SET_fini); diff --git a/kernel/ipt_set.c b/kernel/ipt_set.c deleted file mode 100644 index 2f97cbb..0000000 --- a/kernel/ipt_set.c +++ /dev/null @@ -1,238 +0,0 @@ -/* Copyright (C) 2000-2002 Joakim Axelsson - * Patrick Schaaf - * Martin Josefsson - * Copyright (C) 2003-2004 Jozsef Kadlecsik - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -/* Kernel module to match an IP set. */ - -#include -#include -#include -#include - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16) -#include -#define xt_register_match ipt_register_match -#define xt_unregister_match ipt_unregister_match -#define xt_match ipt_match -#else -#include -#endif -#include -#include - -static inline int -match_set(const struct ipt_set_info *info, - const struct sk_buff *skb, - int inv) -{ - if (ip_set_testip_kernel(info->index, skb, info->flags)) - inv = !inv; - return inv; -} - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) -static int -match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const void *matchinfo, - int offset, - const void *hdr, - u_int16_t datalen, - int *hotdrop) -#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16) -static int -match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const void *matchinfo, - int offset, - int *hotdrop) -#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) -static int -match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const void *matchinfo, - int offset, - unsigned int protoff, - int *hotdrop) -#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) -static int -match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - int *hotdrop) -#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) -static bool -match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - bool *hotdrop) -#else /* LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,28) */ -static bool -match(const struct sk_buff *skb, - const struct xt_match_param *par) -#endif -{ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) - const struct ipt_set_info_match *info = matchinfo; -#else - const struct ipt_set_info_match *info = par->matchinfo; -#endif - - return match_set(&info->match_set, - skb, - info->match_set.flags[0] & IPSET_MATCH_INV); -} - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16) -static int -checkentry(const char *tablename, - const struct ipt_ip *ip, - void *matchinfo, - unsigned int matchsize, - unsigned int hook_mask) -#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) -static int -checkentry(const char *tablename, - const void *inf, - void *matchinfo, - unsigned int matchsize, - unsigned int hook_mask) -#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) -static int -checkentry(const char *tablename, - const void *inf, - const struct xt_match *match, - void *matchinfo, - unsigned int matchsize, - unsigned int hook_mask) -#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) -static int -checkentry(const char *tablename, - const void *inf, - const struct xt_match *match, - void *matchinfo, - unsigned int hook_mask) -#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) -static bool -checkentry(const char *tablename, - const void *inf, - const struct xt_match *match, - void *matchinfo, - unsigned int hook_mask) -#else /* LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,28) */ -static bool -checkentry(const struct xt_mtchk_param *par) -#endif -{ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) - struct ipt_set_info_match *info = matchinfo; -#else - struct ipt_set_info_match *info = par->matchinfo; -#endif - ip_set_id_t index; - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) - if (matchsize != IPT_ALIGN(sizeof(struct ipt_set_info_match))) { - ip_set_printk("invalid matchsize %d", matchsize); - return 0; - } -#endif - - index = ip_set_get_byindex(info->match_set.index); - - if (index == IP_SET_INVALID_ID) { - ip_set_printk("Cannot find set indentified by id %u to match", - info->match_set.index); - return 0; /* error */ - } - if (info->match_set.flags[IP_SET_MAX_BINDINGS] != 0) { - ip_set_printk("That's nasty!"); - return 0; /* error */ - } - - return 1; -} - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) -static void destroy(void *matchinfo, - unsigned int matchsize) -#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) -static void destroy(const struct xt_match *match, - void *matchinfo, - unsigned int matchsize) -#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) -static void destroy(const struct xt_match *match, - void *matchinfo) -#else /* LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,28) */ -static void destroy(const struct xt_mtdtor_param *par) -#endif -{ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) - struct ipt_set_info_match *info = matchinfo; -#else - struct ipt_set_info_match *info = par->matchinfo; -#endif - - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) - if (matchsize != IPT_ALIGN(sizeof(struct ipt_set_info_match))) { - ip_set_printk("invalid matchsize %d", matchsize); - return; - } -#endif - ip_set_put_byindex(info->match_set.index); -} - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) -static struct xt_match set_match = { - .name = "set", - .match = &match, - .checkentry = &checkentry, - .destroy = &destroy, - .me = THIS_MODULE -}; -#else /* LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,17) */ -static struct xt_match set_match = { - .name = "set", - .family = AF_INET, - .match = &match, - .matchsize = sizeof(struct ipt_set_info_match), - .checkentry = &checkentry, - .destroy = &destroy, - .me = THIS_MODULE -}; -#endif - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Jozsef Kadlecsik "); -MODULE_DESCRIPTION("iptables IP set match module"); - -static int __init ipt_ipset_init(void) -{ - return xt_register_match(&set_match); -} - -static void __exit ipt_ipset_fini(void) -{ - xt_unregister_match(&set_match); -} - -module_init(ipt_ipset_init); -module_exit(ipt_ipset_fini); diff --git a/kernel/xt_set.c b/kernel/xt_set.c new file mode 100644 index 0000000..7dfd5ee --- /dev/null +++ b/kernel/xt_set.c @@ -0,0 +1,356 @@ +/* Copyright (C) 2000-2002 Joakim Axelsson + * Patrick Schaaf + * Martin Josefsson + * Copyright (C) 2003-2010 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module which implements the set match and SET target + * for netfilter/iptables. */ + +#include +#include + +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_DESCRIPTION("Xtables: IP set match and target module"); +MODULE_ALIAS("xt_SET"); +MODULE_ALIAS("ipt_set"); +MODULE_ALIAS("ipt_SET"); + +static inline int +match_set(ip_set_id_t index, const struct sk_buff *skb, + u8 pf, u8 dim, u8 flags, int inv) +{ + if (ip_set_test(index, skb, pf, dim, flags)) + inv = !inv; + return inv; +} + +/* Revision 0 interface: backward compatible with netfilter/iptables */ + +static bool +set_match_v0(const struct sk_buff *skb, const struct xt_match_param *par) +{ + const struct xt_set_info_match_v0 *info = par->matchinfo; + + return match_set(info->match_set.index, skb, par->family, + info->match_set.u.compat.dim, + info->match_set.u.compat.flags, + info->match_set.u.compat.flags & IPSET_INV_MATCH); +} + +static void +compat_flags(struct xt_set_info_v0 *info) +{ + u_int8_t i; + + /* Fill out compatibility data according to enum ip_set_kopt */ + info->u.compat.dim = IPSET_DIM_ZERO; + if (info->u.flags[0] & IPSET_MATCH_INV) + info->u.compat.flags |= IPSET_INV_MATCH; + for (i = 0; i < IPSET_DIM_MAX-1 && info->u.flags[i]; i++) { + info->u.compat.dim++; + if (info->u.flags[i] & IPSET_SRC) + info->u.compat.flags |= (1<u.compat.dim); + } +} + +static bool +set_match_v0_checkentry(const struct xt_mtchk_param *par) +{ + struct xt_set_info_match_v0 *info = par->matchinfo; + ip_set_id_t index; + + index = ip_set_nfnl_get_byindex(info->match_set.index); + + if (index == IPSET_INVALID_ID) { + pr_warning("Cannot find set indentified by id %u to match", + info->match_set.index); + return 0; /* error */ + } + if (info->match_set.u.flags[IPSET_DIM_MAX-1] != 0) { + pr_warning("That's nasty!"); + return 0; /* error */ + } + + /* Fill out compatibility data */ + compat_flags(&info->match_set); + + return 1; +} + +static void +set_match_v0_destroy(const struct xt_mtdtor_param *par) +{ + struct xt_set_info_match *info = par->matchinfo; + + + ip_set_nfnl_put(info->match_set.index); +} + +static unsigned int +set_target_v0(struct sk_buff *skb, const struct xt_target_param *par) +{ + const struct xt_set_info_target_v0 *info = par->targinfo; + + if (info->add_set.index != IPSET_INVALID_ID) + ip_set_add(info->add_set.index, skb, par->family, + info->add_set.u.compat.dim, + info->add_set.u.compat.flags); + if (info->del_set.index != IPSET_INVALID_ID) + ip_set_del(info->del_set.index, skb, par->family, + info->del_set.u.compat.dim, + info->del_set.u.compat.flags); + + return XT_CONTINUE; +} + +static bool +set_target_v0_checkentry(const struct xt_tgchk_param *par) +{ + struct xt_set_info_target_v0 *info = par->targinfo; + ip_set_id_t index; + + if (info->add_set.index != IPSET_INVALID_ID) { + index = ip_set_nfnl_get_byindex(info->add_set.index); + if (index == IPSET_INVALID_ID) { + pr_warning("cannot find add_set index %u as target", + info->add_set.index); + return 0; /* error */ + } + } + + if (info->del_set.index != IPSET_INVALID_ID) { + index = ip_set_nfnl_get_byindex(info->del_set.index); + if (index == IPSET_INVALID_ID) { + pr_warning("cannot find del_set index %u as target", + info->del_set.index); + return 0; /* error */ + } + } + if (info->add_set.u.flags[IPSET_DIM_MAX-1] != 0 + || info->del_set.u.flags[IPSET_DIM_MAX-1] != 0) { + pr_warning("That's nasty!"); + return 0; /* error */ + } + + /* Fill out compatibility data */ + compat_flags(&info->add_set); + compat_flags(&info->del_set); + + return 1; +} + +static void +set_target_v0_destroy(const struct xt_tgdtor_param *par) +{ + const struct xt_set_info_target_v0 *info = par->targinfo; + + if (info->add_set.index != IPSET_INVALID_ID) + ip_set_nfnl_put(info->add_set.index); + if (info->del_set.index != IPSET_INVALID_ID) + ip_set_nfnl_put(info->del_set.index); +} + +/* Revision 1: current interface to netfilter/iptables */ + +static bool +set_match(const struct sk_buff *skb, const struct xt_match_param *par) +{ + const struct xt_set_info_match *info = par->matchinfo; + + return match_set(info->match_set.index, skb, par->family, + info->match_set.dim, + info->match_set.flags, + info->match_set.flags & IPSET_INV_MATCH); +} + +static bool +set_match_checkentry(const struct xt_mtchk_param *par) +{ + struct xt_set_info_match *info = par->matchinfo; + ip_set_id_t index; + + index = ip_set_nfnl_get_byindex(info->match_set.index); + + if (index == IPSET_INVALID_ID) { + pr_warning("Cannot find set indentified by id %u to match", + info->match_set.index); + return 0; /* error */ + } + if (info->match_set.dim > IPSET_DIM_MAX) { + pr_warning("That's nasty!"); + return 0; /* error */ + } + + return 1; +} + +static void +set_match_destroy(const struct xt_mtdtor_param *par) +{ + struct xt_set_info_match *info = par->matchinfo; + + + ip_set_nfnl_put(info->match_set.index); +} + +/* Set target */ + +static unsigned int +set_target(struct sk_buff *skb, const struct xt_target_param *par) +{ + const struct xt_set_info_target *info = par->targinfo; + + if (info->add_set.index != IPSET_INVALID_ID) + ip_set_add(info->add_set.index, + skb, par->family, + info->add_set.dim, + info->add_set.flags); + if (info->del_set.index != IPSET_INVALID_ID) + ip_set_del(info->del_set.index, + skb, par->family, + info->add_set.dim, + info->del_set.flags); + + return XT_CONTINUE; +} + +static bool +set_target_checkentry(const struct xt_tgchk_param *par) +{ + const struct xt_set_info_target *info = par->targinfo; + ip_set_id_t index; + + if (info->add_set.index != IPSET_INVALID_ID) { + index = ip_set_nfnl_get_byindex(info->add_set.index); + if (index == IPSET_INVALID_ID) { + pr_warning("cannot find add_set index %u as target", + info->add_set.index); + return 0; /* error */ + } + } + + if (info->del_set.index != IPSET_INVALID_ID) { + index = ip_set_nfnl_get_byindex(info->del_set.index); + if (index == IPSET_INVALID_ID) { + pr_warning("cannot find del_set index %u as target", + info->del_set.index); + return 0; /* error */ + } + } + if (info->add_set.dim > IPSET_DIM_MAX + || info->del_set.flags > IPSET_DIM_MAX) { + pr_warning("That's nasty!"); + return 0; /* error */ + } + + return 1; +} + +static void +set_target_destroy(const struct xt_tgdtor_param *par) +{ + const struct xt_set_info_target *info = par->targinfo; + + if (info->add_set.index != IPSET_INVALID_ID) + ip_set_nfnl_put(info->add_set.index); + if (info->del_set.index != IPSET_INVALID_ID) + ip_set_nfnl_put(info->del_set.index); +} + +static struct xt_match set_matches[] __read_mostly = { + { + .name = "set", + .family = NFPROTO_IPV4, + .revision = 0, + .match = set_match_v0, + .matchsize = sizeof(struct xt_set_info_match_v0), + .checkentry = set_match_v0_checkentry, + .destroy = set_match_v0_destroy, + .me = THIS_MODULE + }, + { + .name = "set", + .family = NFPROTO_IPV4, + .revision = 1, + .match = set_match, + .matchsize = sizeof(struct xt_set_info_match), + .checkentry = set_match_checkentry, + .destroy = set_match_destroy, + .me = THIS_MODULE + }, + { + .name = "set", + .family = NFPROTO_IPV6, + .revision = 1, + .match = set_match, + .matchsize = sizeof(struct xt_set_info_match), + .checkentry = set_match_checkentry, + .destroy = set_match_destroy, + .me = THIS_MODULE + }, +}; + +static struct xt_target set_targets[] __read_mostly = { + { + .name = "SET", + .revision = 0, + .family = NFPROTO_IPV4, + .target = set_target_v0, + .targetsize = sizeof(struct xt_set_info_target_v0), + .checkentry = set_target_v0_checkentry, + .destroy = set_target_v0_destroy, + .me = THIS_MODULE + }, + { + .name = "SET", + .revision = 1, + .family = NFPROTO_IPV4, + .target = set_target, + .targetsize = sizeof(struct xt_set_info_target), + .checkentry = set_target_checkentry, + .destroy = set_target_destroy, + .me = THIS_MODULE + }, + { + .name = "SET", + .revision = 1, + .family = NFPROTO_IPV6, + .target = set_target, + .targetsize = sizeof(struct xt_set_info_target), + .checkentry = set_target_checkentry, + .destroy = set_target_destroy, + .me = THIS_MODULE + }, +}; + +static int __init xt_set_init(void) +{ + int ret = xt_register_matches(set_matches, ARRAY_SIZE(set_matches)); + + if (!ret) { + ret = xt_register_targets(set_targets, + ARRAY_SIZE(set_targets)); + if (ret) + xt_unregister_matches(set_matches, + ARRAY_SIZE(set_matches)); + } + return ret; +} + +static void __exit xt_set_fini(void) +{ + xt_unregister_matches(set_matches, ARRAY_SIZE(set_matches)); + xt_unregister_targets(set_targets, ARRAY_SIZE(set_targets)); +} + +module_init(xt_set_init); +module_exit(xt_set_fini); -- cgit v1.2.3