diff options
author | Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> | 2010-04-22 17:09:18 +0200 |
---|---|---|
committer | Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> | 2010-04-22 17:09:18 +0200 |
commit | 456b1d993711eb4297012ad4a881c459c0511358 (patch) | |
tree | 518bb02b7cf25ed6f338e96969efe96b642f8bf2 | |
parent | ac0e5da3166da201ea00fd7f3cd927b0a49d8fef (diff) |
Eight stage to ipset-5
Commit changed files in kernel/...
-rw-r--r-- | kernel/ChangeLog | 3 | ||||
-rw-r--r-- | kernel/Kbuild | 21 | ||||
-rw-r--r-- | kernel/Kconfig.ipset | 69 | ||||
-rw-r--r-- | kernel/Makefile | 11 | ||||
-rw-r--r-- | kernel/include/linux/netfilter/ip_set.h | 838 | ||||
-rw-r--r-- | kernel/include/linux/netfilter/ip_set_bitmap.h | 139 | ||||
-rw-r--r-- | kernel/include/linux/netfilter/ip_set_getport.h | 76 | ||||
-rw-r--r-- | kernel/include/linux/netfilter/ip_set_hash.h | 321 | ||||
-rw-r--r-- | kernel/include/linux/netfilter/ip_set_jhash.h | 48 | ||||
-rw-r--r-- | kernel/ip_set.c | 2061 | ||||
-rw-r--r-- | kernel/ip_set_bitmap_ip.c | 732 | ||||
-rw-r--r-- | kernel/ip_set_bitmap_ipmac.c | 596 | ||||
-rw-r--r-- | kernel/ip_set_bitmap_port.c | 660 | ||||
-rw-r--r-- | kernel/ip_set_hash_ip.c | 550 |
14 files changed, 3590 insertions, 2535 deletions
diff --git a/kernel/ChangeLog b/kernel/ChangeLog index 1ad54cd..b587dc8 100644 --- a/kernel/ChangeLog +++ b/kernel/ChangeLog @@ -1,3 +1,6 @@ +5.0 + - New main branch - ipset completely rewritten + 4.2 - nethash and ipportnethash types counted every entry twice which could produce bogus entries when listing/saving these types diff --git a/kernel/Kbuild b/kernel/Kbuild index 9757a4a..c171711 100644 --- a/kernel/Kbuild +++ b/kernel/Kbuild @@ -1,14 +1,15 @@ EXTRA_CFLAGS := -I$(M)/include \ - -DCONFIG_IP_NF_SET_MAX=$(IP_NF_SET_MAX) \ - -DCONFIG_IP_NF_SET_HASHSIZE=$(IP_NF_SET_HASHSIZE) + -DCONFIG_IP_SET_MAX=$(IP_SET_MAX) -obj-m += ip_set.o ipt_set.o ipt_SET.o -obj-m += ip_set_ipmap.o ip_set_macipmap.o ip_set_portmap.o -obj-m += ip_set_iphash.o ip_set_nethash.o ip_set_ipporthash.o -obj-m += ip_set_ipportiphash.o ip_set_ipportnethash.o -obj-m += ip_set_iptree.o ip_set_iptreemap.o -obj-m += ip_set_setlist.o +obj-m += ip_set.o +#ipt_set.o ipt_SET.o +obj-m += ip_set_bitmap_ip.o ip_set_bitmap_ipmac.o ip_set_bitmap_port.o +obj-m += ip_set_hash_ip.o +#obj-m += ip_set_iphash.o ip_set_nethash.o ip_set_ipporthash.o +#obj-m += ip_set_ipportiphash.o ip_set_ipportnethash.o +#obj-m += ip_set_iptree.o ip_set_iptreemap.o +#obj-m += ip_set_setlist.o # It's for me... -incdirs := $(M) $(M)/include/linux/netfilter_ipv4 -clean-files := $(foreach dir,$(incdirs),$(wildcard $(dir)/*~)) *.m.c +incdirs := $(M) $(M)/include/linux/netfilter +clean-files := $(foreach dir,$(incdirs),$(wildcard $(dir)/*~)) diff --git a/kernel/Kconfig.ipset b/kernel/Kconfig.ipset index 8b27517..7f7a34a 100644 --- a/kernel/Kconfig.ipset +++ b/kernel/Kconfig.ipset @@ -1,4 +1,4 @@ -config IP_NF_SET +config IP_SET tristate "IP set support" depends on INET && NETFILTER help @@ -8,11 +8,11 @@ config IP_NF_SET To compile it as a module, choose M here. If unsure, say N. -config IP_NF_SET_MAX +config IP_SET_MAX int "Maximum number of IP sets" default 256 range 2 65534 - depends on IP_NF_SET + depends on IP_SET help You can define here default value of the maximum number of IP sets for the kernel. @@ -20,117 +20,106 @@ config IP_NF_SET_MAX The value can be overriden by the 'max_sets' module parameter of the 'ip_set' module. -config IP_NF_SET_HASHSIZE - int "Hash size for bindings of IP sets" - default 1024 - depends on IP_NF_SET - help - You can define here default value of the hash size for - bindings of IP sets. - - The value can be overriden by the 'hash_size' module - parameter of the 'ip_set' module. - -config IP_NF_SET_IPMAP +config IP_SET_IPMAP tristate "ipmap set support" - depends on IP_NF_SET + depends on IP_SET help This option adds the ipmap set type support. To compile it as a module, choose M here. If unsure, say N. -config IP_NF_SET_MACIPMAP +config IP_SET_MACIPMAP tristate "macipmap set support" - depends on IP_NF_SET + depends on IP_SET help This option adds the macipmap set type support. To compile it as a module, choose M here. If unsure, say N. -config IP_NF_SET_PORTMAP +config IP_SET_PORTMAP tristate "portmap set support" - depends on IP_NF_SET + depends on IP_SET help This option adds the portmap set type support. To compile it as a module, choose M here. If unsure, say N. -config IP_NF_SET_IPHASH +config IP_SET_IPHASH tristate "iphash set support" - depends on IP_NF_SET + depends on IP_SET help This option adds the iphash set type support. To compile it as a module, choose M here. If unsure, say N. -config IP_NF_SET_NETHASH +config IP_SET_NETHASH tristate "nethash set support" - depends on IP_NF_SET + depends on IP_SET help This option adds the nethash set type support. To compile it as a module, choose M here. If unsure, say N. -config IP_NF_SET_IPPORTHASH +config IP_SET_IPPORTHASH tristate "ipporthash set support" - depends on IP_NF_SET + depends on IP_SET help This option adds the ipporthash set type support. To compile it as a module, choose M here. If unsure, say N. -config IP_NF_SET_IPPORTIPHASH +config IP_SET_IPPORTIPHASH tristate "ipportiphash set support" - depends on IP_NF_SET + depends on IP_SET help This option adds the ipportiphash set type support. To compile it as a module, choose M here. If unsure, say N. -config IP_NF_SET_IPPORTNETHASH +config IP_SET_IPPORTNETHASH tristate "ipportnethash set support" - depends on IP_NF_SET + depends on IP_SET help This option adds the ipportnethash set type support. To compile it as a module, choose M here. If unsure, say N. -config IP_NF_SET_IPTREE +config IP_SET_IPTREE tristate "iptree set support" - depends on IP_NF_SET + depends on IP_SET help This option adds the iptree set type support. To compile it as a module, choose M here. If unsure, say N. -config IP_NF_SET_IPTREEMAP +config IP_SET_IPTREEMAP tristate "iptreemap set support" - depends on IP_NF_SET + depends on IP_SET help This option adds the iptreemap set type support. To compile it as a module, choose M here. If unsure, say N. -config IP_NF_SET_SETLIST +config IP_SET_SETLIST tristate "setlist set support" - depends on IP_NF_SET + depends on IP_SET help This option adds the setlist set type support. To compile it as a module, choose M here. If unsure, say N. -config IP_NF_MATCH_SET +config IP_MATCH_SET tristate "set match support" - depends on IP_NF_SET + depends on IP_SET help Set matching matches against given IP sets. You need the ipset utility to create and set up the sets. To compile it as a module, choose M here. If unsure, say N. -config IP_NF_TARGET_SET +config IP_TARGET_SET tristate "SET target support" - depends on IP_NF_SET + depends on IP_SET help The SET target makes possible to add/delete entries in IP sets. diff --git a/kernel/Makefile b/kernel/Makefile index 9ec91f6..ca85e88 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -3,14 +3,5 @@ include Kbuild else KERNELDIR := /lib/modules/`uname -r`/build all:: - $(MAKE) -C $KERNELDIR M=`pwd` $@ - -expand_macros: $(patsubst %.c, %.m.c, $(filter-out %.mod.c %.m.c, $(wildcard ip_set_*.c))) - -%.m.c: %.c - ./expand_macros.pl < $< > $@ - -clean: - rm -rf *.m.c - + $(MAKE) -C $(KERNELDIR) M=`pwd` $@ endif diff --git a/kernel/include/linux/netfilter/ip_set.h b/kernel/include/linux/netfilter/ip_set.h index da17319..d0b47a0 100644 --- a/kernel/include/linux/netfilter/ip_set.h +++ b/kernel/include/linux/netfilter/ip_set.h @@ -4,558 +4,432 @@ /* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu> * Patrick Schaaf <bof@bof.de> * Martin Josefsson <gandalf@wlug.westbo.se> - * Copyright (C) 2003-2004 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> + * Copyright (C) 2003-2010 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -#if 0 +#if 1 #define IP_SET_DEBUG #endif -/* - * A sockopt of such quality has hardly ever been seen before on the open - * market! This little beauty, hardly ever used: above 64, so it's - * traditionally used for firewalling, not touched (even once!) by the - * 2.0, 2.2 and 2.4 kernels! - * - * Comes with its own certificate of authenticity, valid anywhere in the - * Free world! - * - * Rusty, 19.4.2000 - */ -#define SO_IP_SET 83 - -/* - * Heavily modify by Joakim Axelsson 08.03.2002 - * - Made it more modulebased - * - * Additional heavy modifications by Jozsef Kadlecsik 22.02.2004 - * - bindings added - * - in order to "deal with" backward compatibility, renamed to ipset - */ - -/* - * Used so that the kernel module and ipset-binary can match their versions - */ -#define IP_SET_PROTOCOL_UNALIGNED 3 -#define IP_SET_PROTOCOL_VERSION 4 - -#define IP_SET_MAXNAMELEN 32 /* set names and set typenames */ - -/* Lets work with our own typedef for representing an IP address. - * We hope to make the code more portable, possibly to IPv6... - * - * The representation works in HOST byte order, because most set types - * will perform arithmetic operations and compare operations. - * - * For now the type is an uint32_t. - * - * Make sure to ONLY use the functions when translating and parsing - * in order to keep the host byte order and make it more portable: - * parse_ip() - * parse_mask() - * parse_ipandmask() - * ip_tostring() - * (Joakim: where are they???) - */ - -typedef uint32_t ip_set_ip_t; - -/* Sets are identified by an id in kernel space. Tweak with ip_set_id_t - * and IP_SET_INVALID_ID if you want to increase the max number of sets. - */ -typedef uint16_t ip_set_id_t; - -#define IP_SET_INVALID_ID 65535 - -/* How deep we follow bindings */ -#define IP_SET_MAX_BINDINGS 6 - -/* - * Option flags for kernel operations (ipt_set_info) - */ -#define IPSET_SRC 0x01 /* Source match/add */ -#define IPSET_DST 0x02 /* Destination match/add */ -#define IPSET_MATCH_INV 0x04 /* Inverse matching */ - -/* - * Set features - */ -#define IPSET_TYPE_IP 0x01 /* IP address type of set */ -#define IPSET_TYPE_PORT 0x02 /* Port type of set */ -#define IPSET_DATA_SINGLE 0x04 /* Single data storage */ -#define IPSET_DATA_DOUBLE 0x08 /* Double data storage */ -#define IPSET_DATA_TRIPLE 0x10 /* Triple data storage */ -#define IPSET_TYPE_IP1 0x20 /* IP address type of set */ -#define IPSET_TYPE_SETNAME 0x40 /* setname type of set */ - -/* Reserved keywords */ -#define IPSET_TOKEN_DEFAULT ":default:" -#define IPSET_TOKEN_ALL ":all:" - -/* SO_IP_SET operation constants, and their request struct types. - * - * Operation ids: - * 0-99: commands with version checking - * 100-199: add/del/test/bind/unbind - * 200-299: list, save, restore - */ - -/* Single shot operations: - * version, create, destroy, flush, rename and swap - * - * Sets are identified by name. - */ - -#define IP_SET_REQ_STD \ - unsigned op; \ - unsigned version; \ - char name[IP_SET_MAXNAMELEN] - -#define IP_SET_OP_CREATE 0x00000001 /* Create a new (empty) set */ -struct ip_set_req_create { - IP_SET_REQ_STD; - char typename[IP_SET_MAXNAMELEN]; +/* The protocol version */ +#define IPSET_PROTOCOL 5 + +/* The max length of strings: set and type identifiers */ +#define IPSET_MAXNAMELEN 32 + +/* Message types and commands */ +enum ipset_cmd { + IPSET_CMD_NONE, + IPSET_CMD_CREATE, /* Create a new (empty) set */ + IPSET_CMD_DESTROY, /* Remove a (empty) set */ + IPSET_CMD_FLUSH, /* Remove all elements from a set */ + IPSET_CMD_RENAME, /* Rename a set */ + IPSET_CMD_SWAP, /* Swap two sets */ + IPSET_CMD_LIST, /* List sets */ + IPSET_CMD_SAVE, /* Save sets */ + IPSET_CMD_ADD, /* Add an element to a set */ + IPSET_CMD_DEL, /* Delete an element from a set */ + IPSET_CMD_TEST, /* Test an element in a set */ + IPSET_CMD_HEADER, /* Get set header data only */ + IPSET_CMD_TYPE, /* Get set type */ + IPSET_CMD_PROTOCOL, /* Return protocol version */ + IPSET_MSG_MAX, /* Netlink message commands */ + + /* Commands in userspace: */ + IPSET_CMD_RESTORE = IPSET_MSG_MAX, /* Enter restore mode */ + IPSET_CMD_HELP, /* Get help */ + IPSET_CMD_VERSION, /* Get program version */ + + IPSET_CMD_MAX, + + IPSET_CMD_COMMIT = IPSET_CMD_MAX, /* Commit buffered commands */ }; -#define IP_SET_OP_DESTROY 0x00000002 /* Remove a (empty) set */ -struct ip_set_req_std { - IP_SET_REQ_STD; +/* Attributes at command level */ +enum { + IPSET_ATTR_UNSPEC, + IPSET_ATTR_PROTOCOL, /* Protocol version */ + IPSET_ATTR_SETNAME, /* Name of the set */ + IPSET_ATTR_TYPENAME, /* Typename */ + IPSET_ATTR_SETNAME2 = IPSET_ATTR_TYPENAME, /* rename/swap */ + IPSET_ATTR_REVISION, /* Settype revision */ + IPSET_ATTR_FAMILY, /* Settype family */ + IPSET_ATTR_DATA, /* Nested attributes */ + IPSET_ATTR_ADT, /* Multiple data containers */ + IPSET_ATTR_LINENO, /* Restore lineno */ + IPSET_ATTR_PROTOCOL_MIN,/* Minimal supported version number */ + IPSET_ATTR_REVISION_MIN = IPSET_ATTR_PROTOCOL_MIN, /* type rev min */ + __IPSET_ATTR_CMD_MAX, }; - -#define IP_SET_OP_FLUSH 0x00000003 /* Remove all IPs in a set */ -/* Uses ip_set_req_std */ - -#define IP_SET_OP_RENAME 0x00000004 /* Rename a set */ -/* Uses ip_set_req_create */ - -#define IP_SET_OP_SWAP 0x00000005 /* Swap two sets */ -/* Uses ip_set_req_create */ - -union ip_set_name_index { - char name[IP_SET_MAXNAMELEN]; - ip_set_id_t index; +#define IPSET_ATTR_CMD_MAX (__IPSET_ATTR_CMD_MAX - 1) + +/* CADT specific attributes */ +enum { + IPSET_ATTR_IP = IPSET_ATTR_UNSPEC + 1, + IPSET_ATTR_IP_FROM = IPSET_ATTR_IP, + IPSET_ATTR_IP_TO, + IPSET_ATTR_CIDR, + IPSET_ATTR_PORT, + IPSET_ATTR_PORT_FROM = IPSET_ATTR_PORT, + IPSET_ATTR_PORT_TO, + IPSET_ATTR_TIMEOUT, + IPSET_ATTR_FLAGS, + /* IPSET_ATTR_LINENO */ + /* Reserve empty slots */ + IPSET_ATTR_CADT_MAX = 16, + /* Create-only specific attributes */ + IPSET_ATTR_GC, + IPSET_ATTR_HASHSIZE, + IPSET_ATTR_MAXELEM, + IPSET_ATTR_NETMASK, + IPSET_ATTR_PROBES, + IPSET_ATTR_RESIZE, + IPSET_ATTR_SIZE, + /* Kernel-only */ + IPSET_ATTR_ELEMENTS, + IPSET_ATTR_REFERENCES, + IPSET_ATTR_MEMSIZE, + + __IPSET_ATTR_CREATE_MAX, }; - -#define IP_SET_OP_GET_BYNAME 0x00000006 /* Get set index by name */ -struct ip_set_req_get_set { - unsigned op; - unsigned version; - union ip_set_name_index set; +#define IPSET_ATTR_CREATE_MAX (__IPSET_ATTR_CREATE_MAX - 1) + +/* ADT specific attributes */ +enum { + IPSET_ATTR_ETHER = IPSET_ATTR_CADT_MAX + 1, + IPSET_ATTR_NAME, + IPSET_ATTR_NAMEREF, + IPSET_ATTR_IP2, + IPSET_ATTR_CIDR2, + __IPSET_ATTR_ADT_MAX, }; - -#define IP_SET_OP_GET_BYINDEX 0x00000007 /* Get set name by index */ -/* Uses ip_set_req_get_set */ - -#define IP_SET_OP_VERSION 0x00000100 /* Ask kernel version */ -struct ip_set_req_version { - unsigned op; - unsigned version; +#define IPSET_ATTR_ADT_MAX (__IPSET_ATTR_ADT_MAX - 1) + +/* Error codes */ +enum ipset_errno { + IPSET_ERR_PRIVATE = 128, + IPSET_ERR_PROTOCOL, + IPSET_ERR_FIND_TYPE, + IPSET_ERR_MAX_SETS, + IPSET_ERR_BUSY, + IPSET_ERR_EXIST_SETNAME2, + IPSET_ERR_TYPE_MISMATCH, + IPSET_ERR_EXIST, + IPSET_ERR_INVALID_CIDR, + IPSET_ERR_INVALID_NETMASK, + IPSET_ERR_INVALID_FAMILY, + IPSET_ERR_TIMEOUT, + + IPSET_ERR_TYPE_SPECIFIC = 160, }; - -/* Double shots operations: - * add, del, test, bind and unbind. - * - * First we query the kernel to get the index and type of the target set, - * then issue the command. Validity of IP is checked in kernel in order - * to minimalize sockopt operations. - */ - -/* Get minimal set data for add/del/test/bind/unbind IP */ -#define IP_SET_OP_ADT_GET 0x00000010 /* Get set and type */ -struct ip_set_req_adt_get { - unsigned op; - unsigned version; - union ip_set_name_index set; - char typename[IP_SET_MAXNAMELEN]; + +enum ipset_data_flags { + IPSET_FLAG_BIT_EXIST = 0, + IPSET_FLAG_EXIST = (1 << IPSET_FLAG_BIT_EXIST), + + IPSET_FLAG_BIT_BEFORE = 2, + IPSET_FLAG_BEFORE = (1 << IPSET_FLAG_BIT_BEFORE), }; -#define IP_SET_REQ_BYINDEX \ - unsigned op; \ - ip_set_id_t index; - -struct ip_set_req_adt { - IP_SET_REQ_BYINDEX; -}; - -#define IP_SET_OP_ADD_IP 0x00000101 /* Add an IP to a set */ -/* Uses ip_set_req_adt, with type specific addage */ - -#define IP_SET_OP_DEL_IP 0x00000102 /* Remove an IP from a set */ -/* Uses ip_set_req_adt, with type specific addage */ - -#define IP_SET_OP_TEST_IP 0x00000103 /* Test an IP in a set */ -/* Uses ip_set_req_adt, with type specific addage */ - -#define IP_SET_OP_BIND_SET 0x00000104 /* Bind an IP to a set */ -/* Uses ip_set_req_bind, with type specific addage */ -struct ip_set_req_bind { - IP_SET_REQ_BYINDEX; - char binding[IP_SET_MAXNAMELEN]; -}; - -#define IP_SET_OP_UNBIND_SET 0x00000105 /* Unbind an IP from a set */ -/* Uses ip_set_req_bind, with type speficic addage - * index = 0 means unbinding for all sets */ - -#define IP_SET_OP_TEST_BIND_SET 0x00000106 /* Test binding an IP to a set */ -/* Uses ip_set_req_bind, with type specific addage */ - -/* Multiple shots operations: list, save, restore. - * - * - check kernel version and query the max number of sets - * - get the basic information on all sets - * and size required for the next step - * - get actual set data: header, data, bindings - */ - -/* Get max_sets and the index of a queried set - */ -#define IP_SET_OP_MAX_SETS 0x00000020 -struct ip_set_req_max_sets { - unsigned op; - unsigned version; - ip_set_id_t max_sets; /* max_sets */ - ip_set_id_t sets; /* real number of sets */ - union ip_set_name_index set; /* index of set if name used */ +/* Commands with settype-specific attributes */ +enum ipset_adt { + IPSET_ADD, + IPSET_DEL, + IPSET_TEST, + IPSET_CREATE, + IPSET_CADT_MAX, }; -/* Get the id and name of the sets plus size for next step */ -#define IP_SET_OP_LIST_SIZE 0x00000201 -#define IP_SET_OP_SAVE_SIZE 0x00000202 -struct ip_set_req_setnames { - unsigned op; - ip_set_id_t index; /* set to list/save */ - u_int32_t size; /* size to get setdata */ - /* followed by sets number of struct ip_set_name_list */ -}; - -struct ip_set_name_list { - char name[IP_SET_MAXNAMELEN]; - char typename[IP_SET_MAXNAMELEN]; - ip_set_id_t index; - ip_set_id_t id; -}; - -/* The actual list operation */ -#define IP_SET_OP_LIST 0x00000203 -struct ip_set_req_list { - IP_SET_REQ_BYINDEX; - /* sets number of struct ip_set_list in reply */ -}; +#ifndef __KERNEL__ +#ifdef IP_SET_DEBUG +#include <stdio.h> +#include <sys/socket.h> +#include <linux/netlink.h> +#define D(format, args...) do { \ + fprintf(stderr, "%s: %s: ", __FILE__, __FUNCTION__); \ + fprintf(stderr, format "\n" , ## args); \ +} while (0) +static inline void +dump_nla(struct nlattr *nla[], int maxlen) +{ + int i; + + for (i = 0; i < maxlen; i++) + D("nla[%u] does%s exist", i, !nla[i] ? " NOT" : ""); +} -struct ip_set_list { - ip_set_id_t index; - ip_set_id_t binding; - u_int32_t ref; - u_int32_t header_size; /* Set header data of header_size */ - u_int32_t members_size; /* Set members data of members_size */ - u_int32_t bindings_size;/* Set bindings data of bindings_size */ -}; +#else +#define D(format, args...) +#define dump_nla(nla, maxlen) +#endif +#endif /* !__KERNEL__ */ -struct ip_set_hash_list { - ip_set_ip_t ip; - ip_set_id_t binding; -}; +#ifdef __KERNEL__ +#include <linux/ipv6.h> +#include <linux/netlink.h> +#include <net/netlink.h> -/* The save operation */ -#define IP_SET_OP_SAVE 0x00000204 -/* Uses ip_set_req_list, in the reply replaced by - * sets number of struct ip_set_save plus a marker - * ip_set_save followed by ip_set_hash_save structures. +/* Sets are identified by an index in kernel space. Tweak with ip_set_id_t + * and IPSET_INVALID_ID if you want to increase the max number of sets. */ -struct ip_set_save { - ip_set_id_t index; - ip_set_id_t binding; - u_int32_t header_size; /* Set header data of header_size */ - u_int32_t members_size; /* Set members data of members_size */ -}; +typedef uint16_t ip_set_id_t; -/* At restoring, ip == 0 means default binding for the given set: */ -struct ip_set_hash_save { - ip_set_ip_t ip; - ip_set_id_t id; - ip_set_id_t binding; +#define IPSET_INVALID_ID 65535 + +/* Option flags for kernel operations */ +enum ip_set_kopt { + /* Bit 0 is reserved */ + IPSET_SRC_FLAG = 1, + IPSET_SRC = (1 << IPSET_SRC_FLAG), + IPSET_DST_FLAG = 2, + IPSET_DST = (1 << IPSET_DST_FLAG), + IPSET_INV_FLAG = 3, + IPSET_INV = (1 << IPSET_INV_FLAG), }; -/* The restore operation */ -#define IP_SET_OP_RESTORE 0x00000205 -/* Uses ip_set_req_setnames followed by ip_set_restore structures - * plus a marker ip_set_restore, followed by ip_set_hash_save - * structures. - */ -struct ip_set_restore { - char name[IP_SET_MAXNAMELEN]; - char typename[IP_SET_MAXNAMELEN]; - ip_set_id_t index; - u_int32_t header_size; /* Create data of header_size */ - u_int32_t members_size; /* Set members data of members_size */ +/* Set features */ +enum ip_set_feature { + IPSET_TYPE_IP_FLAG = 0, + IPSET_TYPE_IP = (1 << IPSET_TYPE_IP_FLAG), + IPSET_TYPE_PORT_FLAG = 1, + IPSET_TYPE_PORT = (1 << IPSET_TYPE_PORT_FLAG), + IPSET_TYPE_MAC_FLAG = 2, + IPSET_TYPE_MAC = (1 << IPSET_TYPE_MAC_FLAG), + IPSET_TYPE_IP2_FLAG = 3, + IPSET_TYPE_IP2 = (1 << IPSET_TYPE_IP2_FLAG), + IPSET_TYPE_NAME_FLAG = 4, + IPSET_TYPE_NAME = (1 << IPSET_TYPE_NAME_FLAG), }; -static inline int bitmap_bytes(ip_set_ip_t a, ip_set_ip_t b) +static inline int +bitmap_bytes(uint32_t a, uint32_t b) { return 4 * ((((b - a + 8) / 8) + 3) / 4); } -/* General limit for the elements in a set */ -#define MAX_RANGE 0x0000FFFF - -/* Alignment: 'unsigned long' unsupported */ -#define IPSET_ALIGNTO 4 -#define IPSET_ALIGN(len) (((len) + IPSET_ALIGNTO - 1) & ~(IPSET_ALIGNTO - 1)) -#define IPSET_VALIGN(len, old) ((old) ? (len) : IPSET_ALIGN(len)) - -#ifdef __KERNEL__ -#include <linux/netfilter_ipv4/ip_set_compat.h> -#include <linux/netfilter_ipv4/ip_set_malloc.h> - -#define ip_set_printk(format, args...) \ +#define ip_set_printk(format, args...) \ do { \ printk("%s: %s: ", __FILE__, __FUNCTION__); \ printk(format "\n" , ## args); \ } while (0) #if defined(IP_SET_DEBUG) -#define DP(format, args...) \ +#define D(format, args...) \ do { \ printk("%s: %s (DBG): ", __FILE__, __FUNCTION__);\ printk(format "\n" , ## args); \ } while (0) -#define IP_SET_ASSERT(x) \ - do { \ - if (!(x)) \ - printk("IP_SET_ASSERT: %s:%i(%s)\n", \ - __FILE__, __LINE__, __FUNCTION__); \ - } while (0) + +static inline void +dump_nla(const struct nlattr * const nla[], int maxlen) +{ + int i; + + for (i = 0; i < maxlen; i++) + printk("nlattr[%u] does%s exist\n", i, nla[i] ? "" : " NOT"); +} #else -#define DP(format, args...) -#define IP_SET_ASSERT(x) +#define D(format, args...) +#define dump_nla(nla, maxlen) #endif struct ip_set; -/* - * The ip_set_type definition - one per set type, e.g. "ipmap". - * - * Each individual set has a pointer, set->type, going to one - * of these structures. Function pointers inside the structure implement - * the real behaviour of the sets. - * - * If not mentioned differently, the implementation behind the function - * pointers of a set_type, is expected to return 0 if ok, and a negative - * errno (e.g. -EINVAL) on error. - */ +/* Set type, variant-specific part */ +struct ip_set_type_variant { + /* Kernelspace: test/add/del entries */ + int (*kadt)(struct ip_set *set, const struct sk_buff * skb, + enum ipset_adt adt, uint8_t pf, const uint8_t *flags); + + /* Userspace: test/add/del entries */ + int (*uadt)(struct ip_set *set, struct nlattr *head, int len, + enum ipset_adt adt, uint32_t *lineno, uint32_t flags); + + /* When adding entries and set is full, try to resize the set */ + int (*resize)(struct ip_set *set, uint8_t retried); + /* Destroy the set */ + void (*destroy)(struct ip_set *set); + /* Flush the elements */ + void (*flush)(struct ip_set *set); + + /* List set header data */ + int (*head)(struct ip_set *set, struct sk_buff *skb); + /* List elements */ + int (*list)(struct ip_set *set, struct sk_buff *skb, + struct netlink_callback *cb); +}; + +/* Flags for the set type variants */ +enum ip_set_type_flags { + IP_SET_FLAG_VMALLOC_BIT = 0, + IP_SET_FLAG_VMALLOC = (1 << IP_SET_FLAG_VMALLOC_BIT), + IP_SET_FLAG_TIMEOUT_BIT = 1, + IP_SET_FLAG_TIMEOUT = (1 << IP_SET_FLAG_TIMEOUT_BIT), +}; + +/* The core set type structure */ struct ip_set_type { - struct list_head list; /* next in list of set types */ - - /* test for IP in set (kernel: iptables -m set src|dst) - * return 0 if not in set, 1 if in set. - */ - int (*testip_kernel) (struct ip_set *set, - const struct sk_buff * skb, - const u_int32_t *flags); - - /* test for IP in set (userspace: ipset -T set IP) - * return 0 if not in set, 1 if in set. - */ - int (*testip) (struct ip_set *set, - const void *data, u_int32_t size); - - /* - * Size of the data structure passed by when - * adding/deletin/testing an entry. - */ - u_int32_t reqsize; - - /* Add IP into set (userspace: ipset -A set IP) - * Return -EEXIST if the address is already in the set, - * and -ERANGE if the address lies outside the set bounds. - * If the address was not already in the set, 0 is returned. - */ - int (*addip) (struct ip_set *set, - const void *data, u_int32_t size); - - /* Add IP into set (kernel: iptables ... -j SET set src|dst) - * Return -EEXIST if the address is already in the set, - * and -ERANGE if the address lies outside the set bounds. - * If the address was not already in the set, 0 is returned. - */ - int (*addip_kernel) (struct ip_set *set, - const struct sk_buff * skb, - const u_int32_t *flags); - - /* remove IP from set (userspace: ipset -D set --entry x) - * Return -EEXIST if the address is NOT in the set, - * and -ERANGE if the address lies outside the set bounds. - * If the address really was in the set, 0 is returned. - */ - int (*delip) (struct ip_set *set, - const void *data, u_int32_t size); - - /* remove IP from set (kernel: iptables ... -j SET --entry x) - * Return -EEXIST if the address is NOT in the set, - * and -ERANGE if the address lies outside the set bounds. - * If the address really was in the set, 0 is returned. - */ - int (*delip_kernel) (struct ip_set *set, - const struct sk_buff * skb, - const u_int32_t *flags); - - /* new set creation - allocated type specific items - */ - int (*create) (struct ip_set *set, - const void *data, u_int32_t size); - - /* retry the operation after successfully tweaking the set - */ - int (*retry) (struct ip_set *set); - - /* set destruction - free type specific items - * There is no return value. - * Can be called only when child sets are destroyed. - */ - void (*destroy) (struct ip_set *set); - - /* set flushing - reset all bits in the set, or something similar. - * There is no return value. - */ - void (*flush) (struct ip_set *set); - - /* Listing: size needed for header - */ - u_int32_t header_size; - - /* Listing: Get the header - * - * Fill in the information in "data". - * This function is always run after list_header_size() under a - * writelock on the set. Therefor is the length of "data" always - * correct. - */ - void (*list_header) (const struct ip_set *set, - void *data); - - /* Listing: Get the size for the set members - */ - int (*list_members_size) (const struct ip_set *set, char dont_align); - - /* Listing: Get the set members - * - * Fill in the information in "data". - * This function is always run after list_member_size() under a - * writelock on the set. Therefor is the length of "data" always - * correct. - */ - void (*list_members) (const struct ip_set *set, - void *data, char dont_align); - - char typename[IP_SET_MAXNAMELEN]; - unsigned char features; - int protocol_version; + struct list_head list; + + /* Typename */ + char name[IPSET_MAXNAMELEN]; + /* Protocol version */ + uint8_t protocol; + /* Set features to control swapping */ + uint8_t features; + /* Supported family: may be AF_UNSPEC for both AF_INET/AF_INET6 */ + uint8_t family; + /* Type revision */ + uint8_t revision; + + /* Create set */ + int (*create)(struct ip_set *set, + struct nlattr *head, int len, uint32_t flags); /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; }; -extern int ip_set_register_set_type(struct ip_set_type *set_type); -extern void ip_set_unregister_set_type(struct ip_set_type *set_type); +extern int ip_set_type_register(struct ip_set_type *set_type); +extern void ip_set_type_unregister(struct ip_set_type *set_type); -/* A generic ipset */ +/* A generic IP set */ struct ip_set { - char name[IP_SET_MAXNAMELEN]; /* the name of the set */ - rwlock_t lock; /* lock for concurrency control */ - ip_set_id_t id; /* set id for swapping */ - atomic_t ref; /* in kernel and in hash references */ - struct ip_set_type *type; /* the set types */ - void *data; /* pooltype specific data */ + /* The name of the set */ + char name[IPSET_MAXNAMELEN]; + /* Lock protecting the set data */ + rwlock_t lock; + /* References to the set */ + atomic_t ref; + /* The core set type */ + const struct ip_set_type *type; + /* The type variant doing the real job */ + const struct ip_set_type_variant *variant; + /* The actual INET family */ + uint8_t family; + /* Set type flags, filled/modified by create/resize */ + uint8_t flags; + /* The type specific data */ + void *data; }; /* register and unregister set references */ -extern ip_set_id_t ip_set_get_byname(const char name[IP_SET_MAXNAMELEN]); -extern ip_set_id_t ip_set_get_byindex(ip_set_id_t index); +extern ip_set_id_t ip_set_get_byname(const char name[IPSET_MAXNAMELEN]); extern void ip_set_put_byindex(ip_set_id_t index); -extern ip_set_id_t ip_set_id(ip_set_id_t index); -extern ip_set_id_t __ip_set_get_byname(const char name[IP_SET_MAXNAMELEN], - struct ip_set **set); -extern void __ip_set_put_byindex(ip_set_id_t index); /* API for iptables set match, and SET target */ -extern int ip_set_addip_kernel(ip_set_id_t id, - const struct sk_buff *skb, - const u_int32_t *flags); -extern int ip_set_delip_kernel(ip_set_id_t id, - const struct sk_buff *skb, - const u_int32_t *flags); -extern int ip_set_testip_kernel(ip_set_id_t id, - const struct sk_buff *skb, - const u_int32_t *flags); - -/* Macros to generate functions */ - -#define STRUCT(pre, type) CONCAT2(pre, type) -#define CONCAT2(pre, type) struct pre##type - -#define FNAME(pre, mid, post) CONCAT3(pre, mid, post) -#define CONCAT3(pre, mid, post) pre##mid##post - -#define UADT0(type, adt, args...) \ -static int \ -FNAME(type,_u,adt)(struct ip_set *set, const void *data, u_int32_t size)\ -{ \ - const STRUCT(ip_set_req_,type) *req = data; \ - \ - return FNAME(type,_,adt)(set , ## args); \ +extern int ip_set_add(ip_set_id_t id, const struct sk_buff *skb, + uint8_t family, const uint8_t *flags); +extern int ip_set_del(ip_set_id_t id, const struct sk_buff *skb, + uint8_t family, const uint8_t *flags); +extern int ip_set_test(ip_set_id_t id, const struct sk_buff *skb, + uint8_t family, const uint8_t *flags); + +/* Allocate members */ +static inline void * +ip_set_alloc(size_t size, gfp_t gfp_mask, uint8_t *flags) +{ + void *members = kzalloc(size, gfp_mask); + + if (members) { + *flags &= ~IP_SET_FLAG_VMALLOC; + D("allocated with kmalloc %p", members); + return members; + } + + members = __vmalloc(size, gfp_mask | __GFP_ZERO, PAGE_KERNEL); + if (!members) + return NULL; + *flags |= IP_SET_FLAG_VMALLOC; + D("allocated with vmalloc %p", members); + + return members; +} + +static inline void +ip_set_free(void *members, uint8_t flags) +{ + D("free with %s %p", flags & IP_SET_FLAG_VMALLOC ? "vmalloc" : "kmalloc", + members); + if (flags & IP_SET_FLAG_VMALLOC) + vfree(members); + else + kfree(members); } -#define UADT(type, adt, args...) \ - UADT0(type, adt, req->ip , ## args) - -#define KADT(type, adt, getfn, args...) \ -static int \ -FNAME(type,_k,adt)(struct ip_set *set, \ - const struct sk_buff *skb, \ - const u_int32_t *flags) \ -{ \ - ip_set_ip_t ip = getfn(skb, flags); \ - \ - KADT_CONDITION \ - return FNAME(type,_,adt)(set, ip , ##args); \ +/* Useful converters */ +static inline uint32_t +ip_set_get_h32(const struct nlattr *attr) +{ + uint32_t value = nla_get_u32(attr); + + return attr->nla_type & NLA_F_NET_BYTEORDER ? ntohl(value) : value; } -#define REGISTER_MODULE(type) \ -static int __init ip_set_##type##_init(void) \ -{ \ - init_max_page_size(); \ - return ip_set_register_set_type(&ip_set_##type); \ -} \ - \ -static void __exit ip_set_##type##_fini(void) \ -{ \ - /* FIXME: possible race with ip_set_create() */ \ - ip_set_unregister_set_type(&ip_set_##type); \ -} \ - \ -module_init(ip_set_##type##_init); \ -module_exit(ip_set_##type##_fini); - -/* Common functions */ - -static inline ip_set_ip_t -ipaddr(const struct sk_buff *skb, const u_int32_t *flags) +static inline uint16_t +ip_set_get_h16(const struct nlattr *attr) { - return ntohl(flags[0] & IPSET_SRC ? ip_hdr(skb)->saddr : ip_hdr(skb)->daddr); + uint16_t value = nla_get_u16(attr); + + return attr->nla_type & NLA_F_NET_BYTEORDER ? ntohs(value) : value; } -#define jhash_ip(map, i, ip) jhash_1word(ip, *(map->initval + i)) +static inline uint32_t +ip_set_get_n32(const struct nlattr *attr) +{ + uint32_t value = nla_get_u32(attr); + + return attr->nla_type & NLA_F_NET_BYTEORDER ? value : htonl(value); +} + +static inline uint16_t +ip_set_get_n16(const struct nlattr *attr) +{ + uint16_t value = nla_get_u16(attr); + + return attr->nla_type & NLA_F_NET_BYTEORDER ? value : htons(value); +} + +#define ipset_nest_start(skb, attr) nla_nest_start(skb, attr | NLA_F_NESTED) +#define ipset_nest_end(skb, start) nla_nest_end(skb, start) + +#define NLA_PUT_NET32(skb, type, value) \ + NLA_PUT_BE32(skb, type | NLA_F_NET_BYTEORDER, value) + +#define NLA_PUT_NET16(skb, type, value) \ + NLA_PUT_BE16(skb, type | NLA_F_NET_BYTEORDER, value) + +/* Get address from skbuff */ +static inline uint32_t +ip4addr(const struct sk_buff *skb, const uint8_t *flags) +{ + return flags[0] & IPSET_SRC ? ip_hdr(skb)->saddr + : ip_hdr(skb)->daddr; +} + +static inline void +ip4addrptr(const struct sk_buff *skb, const uint8_t *flags, uint32_t *addr) +{ + *addr = flags[0] & IPSET_SRC ? ip_hdr(skb)->saddr + : ip_hdr(skb)->daddr; +} + +static inline void +ip6addrptr(const struct sk_buff *skb, const uint8_t *flags, + struct in6_addr *addr) +{ + memcpy(addr, flags[0] & IPSET_SRC ? &ipv6_hdr(skb)->saddr + : &ipv6_hdr(skb)->daddr, + sizeof(*addr)); +} #define pack_ip_port(map, ip, port) \ (port + ((ip - ((map)->first_ip)) << 16)) -#endif /* __KERNEL__ */ - -#define UNUSED __attribute__ ((unused)) +#endif /* __KERNEL__ */ -#endif /*_IP_SET_H*/ +#endif /*_IP_SET_H */ diff --git a/kernel/include/linux/netfilter/ip_set_bitmap.h b/kernel/include/linux/netfilter/ip_set_bitmap.h index da3493f..49d0f5c 100644 --- a/kernel/include/linux/netfilter/ip_set_bitmap.h +++ b/kernel/include/linux/netfilter/ip_set_bitmap.h @@ -1,120 +1,29 @@ -#ifndef __IP_SET_BITMAPS_H -#define __IP_SET_BITMAPS_H +#ifndef __IP_SET_BITMAP_H +#define __IP_SET_BITMAP_H -/* Macros to generate functions */ +/* Bitmap type specific error codes */ +enum { + IPSET_ERR_BITMAP_RANGE = IPSET_ERR_TYPE_SPECIFIC, + IPSET_ERR_BITMAP_RANGE_SIZE, +}; #ifdef __KERNEL__ -#define BITMAP_CREATE(type) \ -static int \ -type##_create(struct ip_set *set, const void *data, u_int32_t size) \ -{ \ - int newbytes; \ - const struct ip_set_req_##type##_create *req = data; \ - struct ip_set_##type *map; \ - \ - if (req->from > req->to) { \ - DP("bad range"); \ - return -ENOEXEC; \ - } \ - \ - map = kmalloc(sizeof(struct ip_set_##type), GFP_KERNEL); \ - if (!map) { \ - DP("out of memory for %zu bytes", \ - sizeof(struct ip_set_##type)); \ - return -ENOMEM; \ - } \ - map->first_ip = req->from; \ - map->last_ip = req->to; \ - \ - newbytes = __##type##_create(req, map); \ - if (newbytes < 0) { \ - kfree(map); \ - return newbytes; \ - } \ - \ - map->size = newbytes; \ - map->members = ip_set_malloc(newbytes); \ - if (!map->members) { \ - DP("out of memory for %i bytes", newbytes); \ - kfree(map); \ - return -ENOMEM; \ - } \ - memset(map->members, 0, newbytes); \ - \ - set->data = map; \ - return 0; \ -} - -#define BITMAP_DESTROY(type) \ -static void \ -type##_destroy(struct ip_set *set) \ -{ \ - struct ip_set_##type *map = set->data; \ - \ - ip_set_free(map->members, map->size); \ - kfree(map); \ - \ - set->data = NULL; \ +#define IPSET_BITMAP_MAX_RANGE 0x0000FFFF + +/* Common functions */ + +static inline uint32_t +range_to_mask(uint32_t from, uint32_t to, uint8_t *bits) +{ + uint32_t mask = 0xFFFFFFFE; + + *bits = 32; + while (--(*bits) > 0 && mask && (to & mask) != from) + mask <<= 1; + + return mask; } -#define BITMAP_FLUSH(type) \ -static void \ -type##_flush(struct ip_set *set) \ -{ \ - struct ip_set_##type *map = set->data; \ - memset(map->members, 0, map->size); \ -} - -#define BITMAP_LIST_HEADER(type) \ -static void \ -type##_list_header(const struct ip_set *set, void *data) \ -{ \ - const struct ip_set_##type *map = set->data; \ - struct ip_set_req_##type##_create *header = data; \ - \ - header->from = map->first_ip; \ - header->to = map->last_ip; \ - __##type##_list_header(map, header); \ -} - -#define BITMAP_LIST_MEMBERS_SIZE(type, dtype, sizeid, testfn) \ -static int \ -type##_list_members_size(const struct ip_set *set, char dont_align) \ -{ \ - const struct ip_set_##type *map = set->data; \ - ip_set_ip_t i, elements = 0; \ - \ - if (dont_align) \ - return map->size; \ - \ - for (i = 0; i < sizeid; i++) \ - if (testfn) \ - elements++; \ - \ - return elements * IPSET_ALIGN(sizeof(dtype)); \ -} - -#define IP_SET_TYPE(type, __features) \ -struct ip_set_type ip_set_##type = { \ - .typename = #type, \ - .features = __features, \ - .protocol_version = IP_SET_PROTOCOL_VERSION, \ - .create = &type##_create, \ - .destroy = &type##_destroy, \ - .flush = &type##_flush, \ - .reqsize = sizeof(struct ip_set_req_##type), \ - .addip = &type##_uadd, \ - .addip_kernel = &type##_kadd, \ - .delip = &type##_udel, \ - .delip_kernel = &type##_kdel, \ - .testip = &type##_utest, \ - .testip_kernel = &type##_ktest, \ - .header_size = sizeof(struct ip_set_req_##type##_create),\ - .list_header = &type##_list_header, \ - .list_members_size = &type##_list_members_size, \ - .list_members = &type##_list_members, \ - .me = THIS_MODULE, \ -}; -#endif /* __KERNEL */ - -#endif /* __IP_SET_BITMAPS_H */ +#endif /* __KERNEL__ */ + +#endif /* __IP_SET_BITMAP_H */ diff --git a/kernel/include/linux/netfilter/ip_set_getport.h b/kernel/include/linux/netfilter/ip_set_getport.h index 18ed729..855f12a 100644 --- a/kernel/include/linux/netfilter/ip_set_getport.h +++ b/kernel/include/linux/netfilter/ip_set_getport.h @@ -2,45 +2,73 @@ #define _IP_SET_GETPORT_H #ifdef __KERNEL__ +#include <linux/netfilter_ipv6/ip6_tables.h> +#include <net/ip.h> -#define INVALID_PORT (MAX_RANGE + 1) +#define IPSET_INVALID_PORT 65536 /* We must handle non-linear skbs */ -static inline ip_set_ip_t -get_port(const struct sk_buff *skb, const u_int32_t *flags) +static uint32_t +get_port(uint8_t pf, const struct sk_buff *skb, const uint8_t *flags) { - struct iphdr *iph = ip_hdr(skb); - u_int16_t offset = ntohs(iph->frag_off) & IP_OFFSET; - switch (iph->protocol) { - case IPPROTO_TCP: { - struct tcphdr tcph; + unsigned short protocol; + unsigned int protoff; + int fragoff; + + switch (pf) { + case AF_INET: { + const struct iphdr *iph = ip_hdr(skb); + + protocol = iph->protocol; + fragoff = ntohs(iph->frag_off) & IP_OFFSET; + protoff = ip_hdrlen(skb); + break; + } + case AF_INET6: { + int protohdr; + unsigned short frag_off; - /* See comments at tcp_match in ip_tables.c */ - if (offset) - return INVALID_PORT; + protohdr = ipv6_find_hdr(skb, &protoff, -1, &frag_off); + if (protohdr < 0) + return IPSET_INVALID_PORT; - if (skb_copy_bits(skb, ip_hdr(skb)->ihl*4, &tcph, sizeof(tcph)) < 0) + protocol = protohdr; + fragoff = frag_off; + break; + } + default: + return IPSET_INVALID_PORT; + } + + /* See comments at tcp_match in ip_tables.c */ + if (fragoff) + return IPSET_INVALID_PORT; + + switch (protocol) { + case IPPROTO_TCP: { + struct tcphdr _tcph; + const struct tcphdr *th; + + th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph); + if (th == NULL) /* No choice either */ - return INVALID_PORT; + return IPSET_INVALID_PORT; - return ntohs(flags[0] & IPSET_SRC ? - tcph.source : tcph.dest); + return flags[0] & IPSET_SRC ? th->source : th->dest; } case IPPROTO_UDP: { - struct udphdr udph; - - if (offset) - return INVALID_PORT; + struct udphdr _udph; + const struct udphdr *uh; - if (skb_copy_bits(skb, ip_hdr(skb)->ihl*4, &udph, sizeof(udph)) < 0) + uh = skb_header_pointer(skb, protoff, sizeof(_udph), &_udph); + if (uh == NULL) /* No choice either */ - return INVALID_PORT; + return IPSET_INVALID_PORT; - return ntohs(flags[0] & IPSET_SRC ? - udph.source : udph.dest); + return flags[0] & IPSET_SRC ? uh->source : uh->dest; } default: - return INVALID_PORT; + return IPSET_INVALID_PORT; } } #endif /* __KERNEL__ */ diff --git a/kernel/include/linux/netfilter/ip_set_hash.h b/kernel/include/linux/netfilter/ip_set_hash.h index 8eeced3..dd183b7 100644 --- a/kernel/include/linux/netfilter/ip_set_hash.h +++ b/kernel/include/linux/netfilter/ip_set_hash.h @@ -1,314 +1,21 @@ -#ifndef __IP_SET_HASHES_H -#define __IP_SET_HASHES_H +#ifndef __IP_SET_HASH_H +#define __IP_SET_HASH_H -#define initval_t uint32_t - -/* Macros to generate functions */ - -#ifdef __KERNEL__ -#define HASH_RETRY0(type, dtype, cond) \ -static int \ -type##_retry(struct ip_set *set) \ -{ \ - struct ip_set_##type *map = set->data, *tmp; \ - dtype *elem; \ - void *members; \ - u_int32_t i, hashsize = map->hashsize; \ - int res; \ - \ - if (map->resize == 0) \ - return -ERANGE; \ - \ - again: \ - res = 0; \ - \ - /* Calculate new hash size */ \ - hashsize += (hashsize * map->resize)/100; \ - if (hashsize == map->hashsize) \ - hashsize++; \ - \ - ip_set_printk("rehashing of set %s triggered: " \ - "hashsize grows from %lu to %lu", \ - set->name, \ - (long unsigned)map->hashsize, \ - (long unsigned)hashsize); \ - \ - tmp = kmalloc(sizeof(struct ip_set_##type) \ - + map->probes * sizeof(initval_t), GFP_ATOMIC); \ - if (!tmp) { \ - DP("out of memory for %zu bytes", \ - sizeof(struct ip_set_##type) \ - + map->probes * sizeof(initval_t)); \ - return -ENOMEM; \ - } \ - tmp->members = harray_malloc(hashsize, sizeof(dtype), GFP_ATOMIC);\ - if (!tmp->members) { \ - DP("out of memory for %zu bytes", hashsize * sizeof(dtype));\ - kfree(tmp); \ - return -ENOMEM; \ - } \ - tmp->hashsize = hashsize; \ - tmp->elements = 0; \ - tmp->probes = map->probes; \ - tmp->resize = map->resize; \ - memcpy(tmp->initval, map->initval, map->probes * sizeof(initval_t));\ - __##type##_retry(tmp, map); \ - \ - write_lock_bh(&set->lock); \ - map = set->data; /* Play safe */ \ - for (i = 0; i < map->hashsize && res == 0; i++) { \ - elem = HARRAY_ELEM(map->members, dtype *, i); \ - if (cond) \ - res = __##type##_add(tmp, elem); \ - } \ - if (res) { \ - /* Failure, try again */ \ - write_unlock_bh(&set->lock); \ - harray_free(tmp->members); \ - kfree(tmp); \ - goto again; \ - } \ - \ - /* Success at resizing! */ \ - members = map->members; \ - \ - map->hashsize = tmp->hashsize; \ - map->members = tmp->members; \ - write_unlock_bh(&set->lock); \ - \ - harray_free(members); \ - kfree(tmp); \ - \ - return 0; \ -} - -#define HASH_RETRY(type, dtype) \ - HASH_RETRY0(type, dtype, *elem) - -#define HASH_RETRY2(type, dtype) \ - HASH_RETRY0(type, dtype, elem->ip || elem->ip1) - -#define HASH_CREATE(type, dtype) \ -static int \ -type##_create(struct ip_set *set, const void *data, u_int32_t size) \ -{ \ - const struct ip_set_req_##type##_create *req = data; \ - struct ip_set_##type *map; \ - uint16_t i; \ - \ - if (req->hashsize < 1) { \ - ip_set_printk("hashsize too small"); \ - return -ENOEXEC; \ - } \ - \ - if (req->probes < 1) { \ - ip_set_printk("probes too small"); \ - return -ENOEXEC; \ - } \ - \ - map = kmalloc(sizeof(struct ip_set_##type) \ - + req->probes * sizeof(initval_t), GFP_KERNEL); \ - if (!map) { \ - DP("out of memory for %zu bytes", \ - sizeof(struct ip_set_##type) \ - + req->probes * sizeof(initval_t)); \ - return -ENOMEM; \ - } \ - for (i = 0; i < req->probes; i++) \ - get_random_bytes(((initval_t *) map->initval)+i, 4); \ - map->elements = 0; \ - map->hashsize = req->hashsize; \ - map->probes = req->probes; \ - map->resize = req->resize; \ - if (__##type##_create(req, map)) { \ - kfree(map); \ - return -ENOEXEC; \ - } \ - map->members = harray_malloc(map->hashsize, sizeof(dtype), GFP_KERNEL);\ - if (!map->members) { \ - DP("out of memory for %zu bytes", map->hashsize * sizeof(dtype));\ - kfree(map); \ - return -ENOMEM; \ - } \ - \ - set->data = map; \ - return 0; \ -} - -#define HASH_DESTROY(type) \ -static void \ -type##_destroy(struct ip_set *set) \ -{ \ - struct ip_set_##type *map = set->data; \ - \ - harray_free(map->members); \ - kfree(map); \ - \ - set->data = NULL; \ -} - -#define HASH_FLUSH(type, dtype) \ -static void \ -type##_flush(struct ip_set *set) \ -{ \ - struct ip_set_##type *map = set->data; \ - harray_flush(map->members, map->hashsize, sizeof(dtype)); \ - map->elements = 0; \ -} - -#define HASH_FLUSH_CIDR(type, dtype) \ -static void \ -type##_flush(struct ip_set *set) \ -{ \ - struct ip_set_##type *map = set->data; \ - harray_flush(map->members, map->hashsize, sizeof(dtype)); \ - memset(map->cidr, 0, sizeof(map->cidr)); \ - memset(map->nets, 0, sizeof(map->nets)); \ - map->elements = 0; \ -} - -#define HASH_LIST_HEADER(type) \ -static void \ -type##_list_header(const struct ip_set *set, void *data) \ -{ \ - const struct ip_set_##type *map = set->data; \ - struct ip_set_req_##type##_create *header = data; \ - \ - header->hashsize = map->hashsize; \ - header->probes = map->probes; \ - header->resize = map->resize; \ - __##type##_list_header(map, header); \ -} - -#define HASH_LIST_MEMBERS_SIZE(type, dtype) \ -static int \ -type##_list_members_size(const struct ip_set *set, char dont_align) \ -{ \ - const struct ip_set_##type *map = set->data; \ - \ - return (map->elements * IPSET_VALIGN(sizeof(dtype), dont_align));\ -} - -#define HASH_LIST_MEMBERS(type, dtype) \ -static void \ -type##_list_members(const struct ip_set *set, void *data, char dont_align)\ -{ \ - const struct ip_set_##type *map = set->data; \ - dtype *elem, *d; \ - uint32_t i, n = 0; \ - \ - for (i = 0; i < map->hashsize; i++) { \ - elem = HARRAY_ELEM(map->members, dtype *, i); \ - if (*elem) { \ - d = data + n * IPSET_VALIGN(sizeof(dtype), dont_align);\ - *d = *elem; \ - n++; \ - } \ - } \ -} - -#define HASH_LIST_MEMBERS_MEMCPY(type, dtype, nonzero) \ -static void \ -type##_list_members(const struct ip_set *set, void *data, char dont_align)\ -{ \ - const struct ip_set_##type *map = set->data; \ - dtype *elem; \ - uint32_t i, n = 0; \ - \ - for (i = 0; i < map->hashsize; i++) { \ - elem = HARRAY_ELEM(map->members, dtype *, i); \ - if (nonzero) { \ - memcpy(data + n * IPSET_VALIGN(sizeof(dtype), dont_align),\ - elem, sizeof(dtype)); \ - n++; \ - } \ - } \ -} - -#define IP_SET_RTYPE(type, __features) \ -struct ip_set_type ip_set_##type = { \ - .typename = #type, \ - .features = __features, \ - .protocol_version = IP_SET_PROTOCOL_VERSION, \ - .create = &type##_create, \ - .retry = &type##_retry, \ - .destroy = &type##_destroy, \ - .flush = &type##_flush, \ - .reqsize = sizeof(struct ip_set_req_##type), \ - .addip = &type##_uadd, \ - .addip_kernel = &type##_kadd, \ - .delip = &type##_udel, \ - .delip_kernel = &type##_kdel, \ - .testip = &type##_utest, \ - .testip_kernel = &type##_ktest, \ - .header_size = sizeof(struct ip_set_req_##type##_create),\ - .list_header = &type##_list_header, \ - .list_members_size = &type##_list_members_size, \ - .list_members = &type##_list_members, \ - .me = THIS_MODULE, \ +/* Bitmap type specific error codes */ +enum { + IPSET_ERR_HASH_FULL = IPSET_ERR_TYPE_SPECIFIC, + IPSET_ERR_HASH_ELEM, }; -/* Helper functions */ -static inline void -add_cidr_size(uint8_t *cidr, uint8_t size) -{ - uint8_t next; - int i; - - for (i = 0; i < 30 && cidr[i]; i++) { - if (cidr[i] < size) { - next = cidr[i]; - cidr[i] = size; - size = next; - } - } - if (i < 30) - cidr[i] = size; -} - -static inline void -del_cidr_size(uint8_t *cidr, uint8_t size) -{ - int i; - - for (i = 0; i < 29 && cidr[i]; i++) { - if (cidr[i] == size) - cidr[i] = size = cidr[i+1]; - } - cidr[29] = 0; -} -#else -#include <arpa/inet.h> -#endif /* __KERNEL */ - -#ifndef UINT16_MAX -#define UINT16_MAX 65535 -#endif - -static unsigned char shifts[] = {255, 253, 249, 241, 225, 193, 129, 1}; - -static inline ip_set_ip_t -pack_ip_cidr(ip_set_ip_t ip, unsigned char cidr) -{ - ip_set_ip_t addr, *paddr = &addr; - unsigned char n, t, *a; - - addr = htonl(ip & (0xFFFFFFFF << (32 - (cidr)))); #ifdef __KERNEL__ - DP("ip:%u.%u.%u.%u/%u", NIPQUAD(addr), cidr); -#endif - n = cidr / 8; - t = cidr % 8; - a = &((unsigned char *)paddr)[n]; - *a = *a /(1 << (8 - t)) + shifts[t]; -#ifdef __KERNEL__ - DP("n: %u, t: %u, a: %u", n, t, *a); - DP("ip:%u.%u.%u.%u/%u, %u.%u.%u.%u", - HIPQUAD(ip), cidr, NIPQUAD(addr)); -#endif - return ntohl(addr); -} +#define initval_t uint32_t +#define IPSET_DEFAULT_HASHSIZE 1024 +#define IPSET_DEFAULT_MAXELEM 65536 +#define IPSET_DEFAULT_PROBES 4 +#define IPSET_DEFAULT_RESIZE 50 -#endif /* __IP_SET_HASHES_H */ +#endif /* __KERNEL__ */ + +#endif /* __IP_SET_HASH_H */ diff --git a/kernel/include/linux/netfilter/ip_set_jhash.h b/kernel/include/linux/netfilter/ip_set_jhash.h index 2000b9f..90bfcc3 100644 --- a/kernel/include/linux/netfilter/ip_set_jhash.h +++ b/kernel/include/linux/netfilter/ip_set_jhash.h @@ -26,30 +26,30 @@ #define __rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) /* __jhash_mix - mix 3 32-bit values reversibly. */ -#define __jhash_mix(a,b,c) \ -{ \ - a -= c; a ^= __rot(c, 4); c += b; \ - b -= a; b ^= __rot(a, 6); a += c; \ - c -= b; c ^= __rot(b, 8); b += a; \ - a -= c; a ^= __rot(c,16); c += b; \ - b -= a; b ^= __rot(a,19); a += c; \ - c -= b; c ^= __rot(b, 4); b += a; \ +#define __jhash_mix(a,b,c) \ +{ \ + a -= c; a ^= __rot(c, 4); c += b; \ + b -= a; b ^= __rot(a, 6); a += c; \ + c -= b; c ^= __rot(b, 8); b += a; \ + a -= c; a ^= __rot(c,16); c += b; \ + b -= a; b ^= __rot(a,19); a += c; \ + c -= b; c ^= __rot(b, 4); b += a; \ } /* __jhash_final - final mixing of 3 32-bit values (a,b,c) into c */ -#define __jhash_final(a,b,c) \ -{ \ - c ^= b; c -= __rot(b,14); \ - a ^= c; a -= __rot(c,11); \ - b ^= a; b -= __rot(a,25); \ - c ^= b; c -= __rot(b,16); \ - a ^= c; a -= __rot(c,4); \ - b ^= a; b -= __rot(a,14); \ - c ^= b; c -= __rot(b,24); \ +#define __jhash_final(a,b,c) \ +{ \ + c ^= b; c -= __rot(b,14); \ + a ^= c; a -= __rot(c,11); \ + b ^= a; b -= __rot(a,25); \ + c ^= b; c -= __rot(b,16); \ + a ^= c; a -= __rot(c,4); \ + b ^= a; b -= __rot(a,14); \ + c ^= b; c -= __rot(b,24); \ } -/* The golden ration: an arbitrary value */ -#define JHASH_GOLDEN_RATIO 0xdeadbeef +/* An arbitrary value */ +#define JHASH_RANDOM_PARAM 0xdeadbeef /* The most generic version, hashes an arbitrary sequence * of bytes. No alignment or length assumptions are made about @@ -61,7 +61,7 @@ static inline u32 jhash(const void *key, u32 length, u32 initval) const u8 *k = key; /* Set up the internal state */ - a = b = c = JHASH_GOLDEN_RATIO + length + initval; + a = b = c = JHASH_RANDOM_PARAM + length + initval; /* all but the last block: affect some 32 bits of (a,b,c) */ while (length > 12) { @@ -104,7 +104,7 @@ static inline u32 jhash2(const u32 *k, u32 length, u32 initval) u32 a, b, c; /* Set up the internal state */ - a = b = c = JHASH_GOLDEN_RATIO + (length<<2) + initval; + a = b = c = JHASH_RANDOM_PARAM + (length<<2) + initval; /* handle most of the key */ while (length > 3) { @@ -135,9 +135,9 @@ static inline u32 jhash2(const u32 *k, u32 length, u32 initval) */ static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval) { - a += JHASH_GOLDEN_RATIO + initval; - b += JHASH_GOLDEN_RATIO + initval; - c += JHASH_GOLDEN_RATIO + initval; + a += JHASH_RANDOM_PARAM + initval; + b += JHASH_RANDOM_PARAM + initval; + c += JHASH_RANDOM_PARAM + initval; __jhash_final(a, b, c); diff --git a/kernel/ip_set.c b/kernel/ip_set.c index 0ce9d3f..3af8fce 100644 --- a/kernel/ip_set.c +++ b/kernel/ip_set.c @@ -1,6 +1,6 @@ /* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu> * Patrick Schaaf <bof@bof.de> - * Copyright (C) 2003-2004 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> + * Copyright (C) 2003-2010 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -9,55 +9,65 @@ /* Kernel module for IP set management */ -#include <linux/version.h> -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) -#include <linux/config.h> -#endif +#include <linux/init.h> #include <linux/module.h> #include <linux/moduleparam.h> -#include <linux/kmod.h> +#include <linux/kernel.h> #include <linux/ip.h> #include <linux/skbuff.h> -#include <linux/random.h> -#include <linux/netfilter_ipv4/ip_set_jhash.h> -#include <linux/errno.h> -#include <linux/capability.h> -#include <asm/uaccess.h> -#include <asm/bitops.h> -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) -#include <asm/semaphore.h> -#else -#include <linux/semaphore.h> -#endif #include <linux/spinlock.h> +#include <linux/netlink.h> +#include <net/netlink.h> -#define ASSERT_READ_LOCK(x) -#define ASSERT_WRITE_LOCK(x) #include <linux/netfilter.h> -#include <linux/netfilter_ipv4/ip_set.h> +#include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/ip_set.h> +#include <linux/netfilter/ip_set_jhash.h> -static struct list_head set_type_list; /* all registered sets */ +static struct list_head ip_set_type_list; /* all registered sets */ static struct ip_set **ip_set_list; /* all individual sets */ -static DEFINE_RWLOCK(ip_set_lock); /* protects the lists and the hash */ -static struct semaphore ip_set_app_mutex; /* serializes user access */ -static ip_set_id_t ip_set_max = CONFIG_IP_NF_SET_MAX; -static int protocol_version = IP_SET_PROTOCOL_VERSION; +static DEFINE_MUTEX(ip_set_type_mutex); /* protects ip_set_type_lists */ +static ip_set_id_t ip_set_max = CONFIG_IP_SET_MAX; -#define STREQ(a,b) (strncmp(a,b,IP_SET_MAXNAMELEN) == 0) -#define DONT_ALIGN (protocol_version == IP_SET_PROTOCOL_UNALIGNED) -#define ALIGNED(len) IPSET_VALIGN(len, DONT_ALIGN) +#define STREQ(a,b) (strncmp(a,b,IPSET_MAXNAMELEN) == 0) + +static int max_sets; + +module_param(max_sets, int, 0600); +MODULE_PARM_DESC(max_sets, "maximal number of sets"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); +MODULE_DESCRIPTION("core IP set support"); +MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET); /* - * Sets are identified either by the index in ip_set_list or by id. - * The id never changes. The index may change by swapping and used - * by external references (set/SET netfilter modules, etc.) + * The set types are implemented in modules and registered set types + * can be found in ip_set_type_list. Adding/deleting types is + * serialized by ip_set_type_list_lock/ip_set_type_list_unlock. + */ + +static inline void +ip_set_type_list_lock(void) +{ + mutex_lock(&ip_set_type_mutex); +} + +static inline void +ip_set_type_list_unlock(void) +{ + mutex_unlock(&ip_set_type_mutex); +} + +/* + * Creating/destroying/renaming/swapping affect the existence and + * integrity of a set. All of these can be executed from userspace only + * and serialized by nfnl_lock/nfnl_unlock indirectly from nfnetlink. + * + * Sets are identified by their index in ip_set_list and the index + * is used by the external references (set/SET netfilter modules). * - * Userspace requests are serialized by ip_set_mutex and sets can - * be deleted only from userspace. Therefore ip_set_list locking - * must obey the following rules: + * The set behind an index may change by swapping. * - * - kernel requests: read and write locking mandatory - * - user requests: read locking optional, write locking mandatory */ static inline void @@ -75,227 +85,166 @@ __ip_set_put(ip_set_id_t index) /* Add, del and test set entries from kernel */ int -ip_set_testip_kernel(ip_set_id_t index, - const struct sk_buff *skb, - const u_int32_t *flags) +ip_set_test(ip_set_id_t index, const struct sk_buff *skb, + uint8_t family, const uint8_t *flags) { struct ip_set *set; - int res; + int ret = 0; - read_lock_bh(&ip_set_lock); - set = ip_set_list[index]; - IP_SET_ASSERT(set); - DP("set %s, index %u", set->name, index); + rcu_read_lock(); + set = rcu_dereference(ip_set_list[index]); + D("set %s, index %u", set->name, index); read_lock_bh(&set->lock); - res = set->type->testip_kernel(set, skb, flags); + ret = set->variant->kadt(set, skb, IPSET_TEST, family, flags); read_unlock_bh(&set->lock); - read_unlock_bh(&ip_set_lock); + if (ret == -EAGAIN) { + /* Type requests element to be re-added */ + write_lock_bh(&set->lock); + set->variant->kadt(set, skb, IPSET_ADD, family, flags); + write_unlock_bh(&set->lock); + ret = 1; + } + + rcu_read_unlock(); - return (res < 0 ? 0 : res); + return (ret < 0 ? 0 : ret); } int -ip_set_addip_kernel(ip_set_id_t index, - const struct sk_buff *skb, - const u_int32_t *flags) +ip_set_add(ip_set_id_t index, const struct sk_buff *skb, + uint8_t family, const uint8_t *flags) { struct ip_set *set; - int res; + int ret = 0, retried = 0; - retry: - read_lock_bh(&ip_set_lock); - set = ip_set_list[index]; - IP_SET_ASSERT(set); - DP("set %s, index %u", set->name, index); +retry: + rcu_read_lock(); + set = rcu_dereference(ip_set_list[index]); + D("set %s, index %u", set->name, index); write_lock_bh(&set->lock); - res = set->type->addip_kernel(set, skb, flags); + ret = set->variant->kadt(set, skb, IPSET_ADD, family, flags); write_unlock_bh(&set->lock); - read_unlock_bh(&ip_set_lock); - /* Retry function called without holding any lock */ - if (res == -EAGAIN - && set->type->retry - && (res = set->type->retry(set)) == 0) + rcu_read_unlock(); + /* Retry function must be called without holding any lock */ + if (ret == -EAGAIN + && set->variant->resize + && (ret = set->variant->resize(set, retried++)) == 0) goto retry; - return res; + return ret; } int -ip_set_delip_kernel(ip_set_id_t index, - const struct sk_buff *skb, - const u_int32_t *flags) +ip_set_del(ip_set_id_t index, const struct sk_buff *skb, + uint8_t family, const uint8_t *flags) { struct ip_set *set; - int res; + int ret = 0; - read_lock_bh(&ip_set_lock); - set = ip_set_list[index]; - IP_SET_ASSERT(set); - DP("set %s, index %u", set->name, index); + rcu_read_lock(); + set = rcu_dereference(ip_set_list[index]); + D("set %s, index %u", set->name, index); write_lock_bh(&set->lock); - res = set->type->delip_kernel(set, skb, flags); + ret = set->variant->kadt(set, skb, IPSET_DEL, family, flags); write_unlock_bh(&set->lock); - read_unlock_bh(&ip_set_lock); + rcu_read_unlock(); - return res; + return ret; } /* Register and deregister settype */ +#define family_name(f) ((f) == AF_INET ? "inet" : \ + (f) == AF_INET6 ? "inet6" : "any") + static inline struct ip_set_type * -find_set_type(const char *name) +find_set_type(const char *name, uint8_t family, uint8_t revision) { - struct ip_set_type *set_type; + struct ip_set_type *type; - list_for_each_entry(set_type, &set_type_list, list) - if (STREQ(set_type->typename, name)) - return set_type; + list_for_each_entry(type, &ip_set_type_list, list) + if (STREQ(type->name, name) + && (type->family == family || type->family == AF_UNSPEC) + && type->revision == revision) + return type; return NULL; } int -ip_set_register_set_type(struct ip_set_type *set_type) +ip_set_type_register(struct ip_set_type *type) { int ret = 0; - if (set_type->protocol_version != IP_SET_PROTOCOL_VERSION) { - ip_set_printk("'%s' uses wrong protocol version %u (want %u)", - set_type->typename, - set_type->protocol_version, - IP_SET_PROTOCOL_VERSION); + if (type->protocol != IPSET_PROTOCOL) { + printk("set type %s, family %s, revision %u uses " + "wrong protocol version %u (want %u)\n", + type->name, family_name(type->family), type->revision, + type->protocol, IPSET_PROTOCOL); return -EINVAL; } - write_lock_bh(&ip_set_lock); - if (find_set_type(set_type->typename)) { + ip_set_type_list_lock(); + if (find_set_type(type->name, type->family, type->revision)) { /* Duplicate! */ - ip_set_printk("'%s' already registered!", - set_type->typename); + printk("type %s, family %s, revision %u already registered!\n", + type->name, family_name(type->family), type->revision); ret = -EINVAL; goto unlock; } - if (!try_module_get(THIS_MODULE)) { - ret = -EFAULT; - goto unlock; - } - list_add(&set_type->list, &set_type_list); - DP("'%s' registered.", set_type->typename); - unlock: - write_unlock_bh(&ip_set_lock); + list_add(&type->list, &ip_set_type_list); + D("type %s, family %s, revision %u registered.", + type->name, family_name(type->family), type->revision); +unlock: + ip_set_type_list_unlock(); return ret; } void -ip_set_unregister_set_type(struct ip_set_type *set_type) +ip_set_type_unregister(struct ip_set_type *type) { - write_lock_bh(&ip_set_lock); - if (!find_set_type(set_type->typename)) { - ip_set_printk("'%s' not registered?", - set_type->typename); + ip_set_type_list_lock(); + if (!find_set_type(type->name, type->family, type->revision)) { + printk("type %s, family %s, revision %u not registered\n", + type->name, family_name(type->family), type->revision); goto unlock; } - list_del(&set_type->list); - module_put(THIS_MODULE); - DP("'%s' unregistered.", set_type->typename); - unlock: - write_unlock_bh(&ip_set_lock); - + list_del(&type->list); + D("type %s, family %s, revision %u unregistered.", + type->name, family_name(type->family), type->revision); +unlock: + ip_set_type_list_unlock(); } -ip_set_id_t -__ip_set_get_byname(const char *name, struct ip_set **set) -{ - ip_set_id_t i, index = IP_SET_INVALID_ID; - - for (i = 0; i < ip_set_max; i++) { - if (ip_set_list[i] != NULL - && STREQ(ip_set_list[i]->name, name)) { - __ip_set_get(i); - index = i; - *set = ip_set_list[i]; - break; - } - } - return index; -} - -void -__ip_set_put_byindex(ip_set_id_t index) -{ - if (ip_set_list[index]) - __ip_set_put(index); -} - -/* - * Userspace routines - */ +/* Get/put a set with referencing */ /* * Find set by name, reference it once. The reference makes sure the * thing pointed to, does not go away under our feet. Drop the reference - * later, using ip_set_put(). + * later, using ip_set_put*(). */ ip_set_id_t ip_set_get_byname(const char *name) { - ip_set_id_t i, index = IP_SET_INVALID_ID; + ip_set_id_t i, index = IPSET_INVALID_ID; - down(&ip_set_app_mutex); - for (i = 0; i < ip_set_max; i++) { - if (ip_set_list[i] != NULL - && STREQ(ip_set_list[i]->name, name)) { + nfnl_lock(); + for (i = 0; index == IPSET_INVALID_ID && i < ip_set_max; i++) + if (STREQ(ip_set_list[i]->name, name)) { __ip_set_get(i); index = i; - break; } - } - up(&ip_set_app_mutex); - return index; -} - -/* - * Find set by index, reference it once. The reference makes sure the - * thing pointed to, does not go away under our feet. Drop the reference - * later, using ip_set_put(). - */ -ip_set_id_t -ip_set_get_byindex(ip_set_id_t index) -{ - down(&ip_set_app_mutex); + nfnl_unlock(); - if (index >= ip_set_max) - return IP_SET_INVALID_ID; - - if (ip_set_list[index]) - __ip_set_get(index); - else - index = IP_SET_INVALID_ID; - - up(&ip_set_app_mutex); return index; } /* - * Find the set id belonging to the index. - * We are protected by the mutex, so we do not need to use - * ip_set_lock. There is no need to reference the sets either. - */ -ip_set_id_t -ip_set_id(ip_set_id_t index) -{ - if (index >= ip_set_max || !ip_set_list[index]) - return IP_SET_INVALID_ID; - - return ip_set_list[index]->id; -} - -/* * If the given set pointer points to a valid set, decrement * reference count by 1. The caller shall not assume the index * to be valid, after calling this function. @@ -303,1227 +252,1047 @@ ip_set_id(ip_set_id_t index) void ip_set_put_byindex(ip_set_id_t index) { - down(&ip_set_app_mutex); + nfnl_lock(); if (ip_set_list[index]) __ip_set_put(index); - up(&ip_set_app_mutex); + nfnl_unlock(); } -/* Find a set by name or index */ static ip_set_id_t -ip_set_find_byname(const char *name) +find_set_id(const char *name) { - ip_set_id_t i, index = IP_SET_INVALID_ID; + ip_set_id_t i, index = IPSET_INVALID_ID; - for (i = 0; i < ip_set_max; i++) { + for (i = 0; index == IPSET_INVALID_ID && i < ip_set_max; i++) { if (ip_set_list[i] != NULL - && STREQ(ip_set_list[i]->name, name)) { + && STREQ(ip_set_list[i]->name, name)) index = i; - break; - } } return index; } static ip_set_id_t -ip_set_find_byindex(ip_set_id_t index) +find_set_id_rcu(const char *name) { - if (index >= ip_set_max || ip_set_list[index] == NULL) - index = IP_SET_INVALID_ID; + ip_set_id_t i, index = IPSET_INVALID_ID; + struct ip_set *set; + for (i = 0; index == IPSET_INVALID_ID && i < ip_set_max; i++) { + set = rcu_dereference(ip_set_list[i]); + if (set != NULL && STREQ(set->name, name)) + index = i; + } return index; } -/* - * Add, del and test - */ - -static int -ip_set_addip(struct ip_set *set, const void *data, u_int32_t size) +static struct ip_set * +find_set(const char *name) { - int res; - - IP_SET_ASSERT(set); - do { - write_lock_bh(&set->lock); - res = set->type->addip(set, data, size); - write_unlock_bh(&set->lock); - } while (res == -EAGAIN - && set->type->retry - && (res = set->type->retry(set)) == 0); + ip_set_id_t index = find_set_id(name); - return res; + return index == IPSET_INVALID_ID ? NULL : ip_set_list[index]; } -static int -ip_set_delip(struct ip_set *set, const void *data, u_int32_t size) -{ - int res; - - IP_SET_ASSERT(set); - - write_lock_bh(&set->lock); - res = set->type->delip(set, data, size); - write_unlock_bh(&set->lock); +/* Communication protocol with userspace over netlink */ + +/* Create a set */ + +static const struct nla_policy +ip_set_create_policy[IPSET_ATTR_CMD_MAX + 1] __read_mostly = { + [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, + [IPSET_ATTR_SETNAME] = { .type = NLA_STRING, + .len = IPSET_MAXNAMELEN }, + [IPSET_ATTR_TYPENAME] = { .type = NLA_STRING, + .len = IPSET_MAXNAMELEN }, + [IPSET_ATTR_REVISION] = { .type = NLA_U8 }, + [IPSET_ATTR_FAMILY] = { .type = NLA_U8 }, + [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, + [IPSET_ATTR_DATA] = { .type = NLA_NESTED }, +}; - return res; +static inline bool +protocol_failed(const struct nlattr * const tb[]) +{ + return !tb[IPSET_ATTR_PROTOCOL] + || nla_get_u8(tb[IPSET_ATTR_PROTOCOL]) != IPSET_PROTOCOL; } -static int -ip_set_testip(struct ip_set *set, const void *data, u_int32_t size) +static inline uint32_t +flag_exist(const struct nlmsghdr *nlh) { - int res; - - IP_SET_ASSERT(set); - - read_lock_bh(&set->lock); - res = set->type->testip(set, data, size); - read_unlock_bh(&set->lock); + return nlh->nlmsg_flags & NLM_F_EXCL ? 0 : IPSET_FLAG_EXIST; +} - return (res > 0 ? -EEXIST : res); +static inline bool +flag_nested(const struct nlattr *nla) +{ + return nla->nla_type & NLA_F_NESTED; } static struct ip_set_type * -find_set_type_rlock(const char *typename) +find_set_type_lock(const char *name, uint8_t family, uint8_t revision) { struct ip_set_type *type; - read_lock_bh(&ip_set_lock); - type = find_set_type(typename); + ip_set_type_list_lock(); + type = find_set_type(name, family, revision); if (type == NULL) - read_unlock_bh(&ip_set_lock); + ip_set_type_list_unlock(); return type; } static int -find_free_id(const char *name, - ip_set_id_t *index, - ip_set_id_t *id) +find_free_id(const char *name, ip_set_id_t *index, struct ip_set **set) { ip_set_id_t i; - *id = IP_SET_INVALID_ID; + *index = IPSET_INVALID_ID; for (i = 0; i < ip_set_max; i++) { if (ip_set_list[i] == NULL) { - if (*id == IP_SET_INVALID_ID) - *id = *index = i; - } else if (STREQ(name, ip_set_list[i]->name)) + if (*index == IPSET_INVALID_ID) + *index = i; + } else if (STREQ(name, ip_set_list[i]->name)) { /* Name clash */ + *set = ip_set_list[i]; return -EEXIST; - } - if (*id == IP_SET_INVALID_ID) - /* No free slot remained */ - return -ERANGE; - /* Check that index is usable as id (swapping) */ - check: - for (i = 0; i < ip_set_max; i++) { - if (ip_set_list[i] != NULL - && ip_set_list[i]->id == *id) { - *id = i; - goto check; } } + if (*index == IPSET_INVALID_ID) + /* No free slot remained */ + return -IPSET_ERR_MAX_SETS; return 0; } -/* - * Create a set - */ -static int -ip_set_create(const char *name, - const char *typename, - ip_set_id_t restore, - const void *data, - u_int32_t size) +static struct nlmsghdr * +start_msg(struct sk_buff *skb, u32 pid, u32 seq, unsigned int flags, + enum ipset_cmd cmd) { - struct ip_set *set; - ip_set_id_t index = 0, id; - int res = 0; + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + + nlh = nlmsg_put(skb, pid, seq, cmd | (NFNL_SUBSYS_IPSET << 8), + sizeof(*nfmsg), flags); + if (nlh == NULL) + return NULL; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = AF_INET; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + return nlh; +} + +static inline void +load_type_module(const char *typename) +{ + D("try to load ip_set_%s", typename); + request_module("ip_set_%s", typename); +} - DP("setname: %s, typename: %s, id: %u", name, typename, restore); +static int +ip_set_create(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) +{ + struct ip_set *set, *clash; + ip_set_id_t index = IPSET_INVALID_ID; + const char *name, *typename; + uint8_t family, revision; + uint32_t flags = flag_exist(nlh); + int ret = 0, len; + + if (unlikely(protocol_failed(attr) + || attr[IPSET_ATTR_SETNAME] == NULL + || attr[IPSET_ATTR_TYPENAME] == NULL + || attr[IPSET_ATTR_REVISION] == NULL + || attr[IPSET_ATTR_FAMILY] == NULL + || (attr[IPSET_ATTR_DATA] != NULL + && !flag_nested(attr[IPSET_ATTR_DATA])))) + return -IPSET_ERR_PROTOCOL; + + name = nla_data(attr[IPSET_ATTR_SETNAME]); + typename = nla_data(attr[IPSET_ATTR_TYPENAME]); + family = nla_get_u8(attr[IPSET_ATTR_FAMILY]); + revision = nla_get_u8(attr[IPSET_ATTR_REVISION]); + D("setname: %s, typename: %s, family: %s, revision: %u", + name, typename, family_name(family), revision); /* * First, and without any locks, allocate and initialize * a normal base set structure. */ - set = kmalloc(sizeof(struct ip_set), GFP_KERNEL); + set = kzalloc(sizeof(struct ip_set), GFP_KERNEL); if (!set) return -ENOMEM; rwlock_init(&set->lock); - strncpy(set->name, name, IP_SET_MAXNAMELEN); + strncpy(set->name, name, IPSET_MAXNAMELEN); atomic_set(&set->ref, 0); /* - * Next, take the &ip_set_lock, check that we know the type, - * and take a reference on the type, to make sure it - * stays available while constructing our new set. + * Next, check that we know the type, and take + * a reference on the type, to make sure it stays available + * while constructing our new set. * - * After referencing the type, we drop the &ip_set_lock, - * and let the new set construction run without locks. + * After referencing the type, we try to create the type + * specific part of the set without holding any locks. */ - set->type = find_set_type_rlock(typename); + set->type = find_set_type_lock(typename, family, revision); if (set->type == NULL) { /* Try loading the module */ - char modulename[IP_SET_MAXNAMELEN + strlen("ip_set_") + 1]; - strcpy(modulename, "ip_set_"); - strcat(modulename, typename); - DP("try to load %s", modulename); - request_module(modulename); - set->type = find_set_type_rlock(typename); - } - if (set->type == NULL) { - ip_set_printk("no set type '%s', set '%s' not created", - typename, name); - res = -ENOENT; - goto out; + load_type_module(typename); + set->type = find_set_type_lock(typename, family, revision); + if (set->type == NULL) { + printk("Can't find type %s, family %s, revision %u:" + " set '%s' not created", + typename, family_name(family), revision, name); + ret = -IPSET_ERR_FIND_TYPE; + goto out; + } } if (!try_module_get(set->type->me)) { - read_unlock_bh(&ip_set_lock); - res = -EFAULT; + ip_set_type_list_unlock(); + ret = -EFAULT; goto out; } - read_unlock_bh(&ip_set_lock); - - /* Check request size */ - if (size != set->type->header_size) { - ip_set_printk("data length wrong (want %lu, have %lu)", - (long unsigned)set->type->header_size, - (long unsigned)size); - goto put_out; - } + ip_set_type_list_unlock(); /* * Without holding any locks, create private part. */ - res = set->type->create(set, data, size); - if (res != 0) + len = attr[IPSET_ATTR_DATA] ? nla_len(attr[IPSET_ATTR_DATA]) : 0; + D("data len: %u", len); + ret = set->type->create(set, attr[IPSET_ATTR_DATA] ? + nla_data(attr[IPSET_ATTR_DATA]) : NULL, len, + flags); + if (ret != 0) goto put_out; - /* BTW, res==0 here. */ + /* BTW, ret==0 here. */ /* - * Here, we have a valid, constructed set. &ip_set_lock again, - * find free id/index and check that it is not already in - * ip_set_list. + * Here, we have a valid, constructed set and we are protected + * by nfnl_lock. Find the first free index in ip_set_list and + * check clashing. */ - write_lock_bh(&ip_set_lock); - if ((res = find_free_id(set->name, &index, &id)) != 0) { - DP("no free id!"); + if ((ret = find_free_id(set->name, &index, &clash)) != 0) { + /* If this is the same set and requested, ignore error */ + if (ret == -EEXIST + && (flags & IPSET_FLAG_EXIST) + && STREQ(set->type->name, clash->type->name) + && set->type->family == clash->type->family + && set->type->revision == clash->type->revision) + ret = 0; goto cleanup; } - /* Make sure restore gets the same index */ - if (restore != IP_SET_INVALID_ID && index != restore) { - DP("Can't restore, sets are screwed up"); - res = -ERANGE; - goto cleanup; - } - /* * Finally! Add our shiny new set to the list, and be done. */ - DP("create: '%s' created with index %u, id %u!", set->name, index, id); - set->id = id; + D("create: '%s' created with index %u!", set->name, index); ip_set_list[index] = set; - write_unlock_bh(&ip_set_lock); - return res; + + return ret; - cleanup: - write_unlock_bh(&ip_set_lock); - set->type->destroy(set); - put_out: +cleanup: + set->variant->destroy(set); +put_out: module_put(set->type->me); - out: +out: kfree(set); - return res; + return ret; } -/* - * Destroy a given existing set - */ -static void +/* Destroy sets */ + +static const struct nla_policy +ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] __read_mostly = { + [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, + [IPSET_ATTR_SETNAME] = { .type = NLA_STRING, + .len = IPSET_MAXNAMELEN }, +}; + +static inline void ip_set_destroy_set(ip_set_id_t index) { struct ip_set *set = ip_set_list[index]; - IP_SET_ASSERT(set); - DP("set: %s", set->name); - write_lock_bh(&ip_set_lock); + D("set: %s", set->name); ip_set_list[index] = NULL; - write_unlock_bh(&ip_set_lock); /* Must call it without holding any lock */ - set->type->destroy(set); + set->variant->destroy(set); module_put(set->type->me); kfree(set); } -/* - * Destroy a set - or all sets - * Sets must not be referenced/used. - */ static int -ip_set_destroy(ip_set_id_t index) +ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { ip_set_id_t i; + + if (unlikely(protocol_failed(attr))) + return -IPSET_ERR_PROTOCOL; - /* ref modification always protected by the mutex */ - if (index != IP_SET_INVALID_ID) { - if (atomic_read(&ip_set_list[index]->ref)) - return -EBUSY; - ip_set_destroy_set(index); - } else { + /* References are protected by the nfnl mutex */ + if (!attr[IPSET_ATTR_SETNAME]) { for (i = 0; i < ip_set_max; i++) { if (ip_set_list[i] != NULL && (atomic_read(&ip_set_list[i]->ref))) - return -EBUSY; + return -IPSET_ERR_BUSY; } - for (i = 0; i < ip_set_max; i++) { if (ip_set_list[i] != NULL) ip_set_destroy_set(i); } + } else { + i = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME])); + if (i == IPSET_INVALID_ID) + return -EEXIST; + else if (atomic_read(&ip_set_list[i]->ref)) + return -IPSET_ERR_BUSY; + + ip_set_destroy_set(i); } return 0; } -static void +/* Flush sets */ + +static inline void ip_set_flush_set(struct ip_set *set) { - DP("set: %s %u", set->name, set->id); + D("set: %s", set->name); write_lock_bh(&set->lock); - set->type->flush(set); + set->variant->flush(set); write_unlock_bh(&set->lock); } -/* - * Flush data in a set - or in all sets - */ static int -ip_set_flush(ip_set_id_t index) +ip_set_flush(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { - if (index != IP_SET_INVALID_ID) { - IP_SET_ASSERT(ip_set_list[index]); - ip_set_flush_set(ip_set_list[index]); - } else { - ip_set_id_t i; - + ip_set_id_t i; + + if (unlikely(protocol_failed(attr))) + return -EPROTO; + + if (!attr[IPSET_ATTR_SETNAME]) { for (i = 0; i < ip_set_max; i++) if (ip_set_list[i] != NULL) ip_set_flush_set(ip_set_list[i]); + } else { + i = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME])); + if (i == IPSET_INVALID_ID) + return -EEXIST; + + ip_set_flush_set(ip_set_list[i]); } return 0; } /* Rename a set */ + +static const struct nla_policy +ip_set_setname2_policy[IPSET_ATTR_CMD_MAX + 1] __read_mostly = { + [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, + [IPSET_ATTR_SETNAME] = { .type = NLA_STRING, + .len = IPSET_MAXNAMELEN }, + [IPSET_ATTR_SETNAME2] = { .type = NLA_STRING, + .len = IPSET_MAXNAMELEN }, +}; + static int -ip_set_rename(ip_set_id_t index, const char *name) +ip_set_rename(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { - struct ip_set *set = ip_set_list[index]; + struct ip_set *set; + const char *name2; ip_set_id_t i; - int res = 0; - DP("set: %s to %s", set->name, name); - write_lock_bh(&ip_set_lock); + if (unlikely(protocol_failed(attr) + || attr[IPSET_ATTR_SETNAME] == NULL + || attr[IPSET_ATTR_SETNAME2] == NULL)) + return -IPSET_ERR_PROTOCOL; + + set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + if (set == NULL) + return -EEXIST; + + name2 = nla_data(attr[IPSET_ATTR_SETNAME2]); for (i = 0; i < ip_set_max; i++) { if (ip_set_list[i] != NULL - && STREQ(ip_set_list[i]->name, name)) { - res = -EEXIST; - goto unlock; - } + && STREQ(ip_set_list[i]->name, name2)) + return -IPSET_ERR_EXIST_SETNAME2; } - strncpy(set->name, name, IP_SET_MAXNAMELEN); - unlock: - write_unlock_bh(&ip_set_lock); - return res; + strncpy(set->name, name2, IPSET_MAXNAMELEN); + + return 0; } -/* - * Swap two sets so that name/index points to the other. - * References are also swapped. - */ +/* Swap two sets so that name/index points to the other. + * References are also swapped. */ + static int -ip_set_swap(ip_set_id_t from_index, ip_set_id_t to_index) +ip_set_swap(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { - struct ip_set *from = ip_set_list[from_index]; - struct ip_set *to = ip_set_list[to_index]; - char from_name[IP_SET_MAXNAMELEN]; - u_int32_t from_ref; + struct ip_set *from, *to; + ip_set_id_t from_id, to_id; + char from_name[IPSET_MAXNAMELEN]; + uint32_t from_ref; + + if (unlikely(protocol_failed(attr) + || attr[IPSET_ATTR_SETNAME] == NULL + || attr[IPSET_ATTR_SETNAME2] == NULL)) + return -IPSET_ERR_PROTOCOL; - DP("set: %s to %s", from->name, to->name); - /* Features must not change. + from_id = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME])); + if (from_id == IPSET_INVALID_ID) + return -EEXIST; + + to_id = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME2])); + if (to_id == IPSET_INVALID_ID) + return -IPSET_ERR_EXIST_SETNAME2; + + from = ip_set_list[from_id]; + to = ip_set_list[to_id]; + + /* Features must not change. * Not an artifical restriction anymore, as we must prevent * possible loops created by swapping in setlist type of sets. */ - if (from->type->features != to->type->features) - return -ENOEXEC; + if (!(from->type->features == to->type->features + && from->type->family == to->type->family)) + return -IPSET_ERR_TYPE_MISMATCH; /* No magic here: ref munging protected by the mutex */ - write_lock_bh(&ip_set_lock); - strncpy(from_name, from->name, IP_SET_MAXNAMELEN); + strncpy(from_name, from->name, IPSET_MAXNAMELEN); from_ref = atomic_read(&from->ref); - strncpy(from->name, to->name, IP_SET_MAXNAMELEN); + strncpy(from->name, to->name, IPSET_MAXNAMELEN); atomic_set(&from->ref, atomic_read(&to->ref)); - strncpy(to->name, from_name, IP_SET_MAXNAMELEN); + strncpy(to->name, from_name, IPSET_MAXNAMELEN); atomic_set(&to->ref, from_ref); - ip_set_list[from_index] = to; - ip_set_list[to_index] = from; - - write_unlock_bh(&ip_set_lock); + rcu_assign_pointer(ip_set_list[from_id], to); + rcu_assign_pointer(ip_set_list[to_id], from); + synchronize_rcu(); + return 0; } -/* - * List set data - */ +/* List/save set data */ static int -ip_set_list_set(ip_set_id_t index, void *data, int *used, int len) +ip_set_dump_done(struct netlink_callback *cb) { - struct ip_set *set = ip_set_list[index]; - struct ip_set_list *set_list; - - /* Pointer to our header */ - set_list = data + *used; - - DP("set: %s, used: %d len %u %p %p", set->name, *used, len, data, data + *used); - - /* Get and ensure header size */ - if (*used + ALIGNED(sizeof(struct ip_set_list)) > len) - goto not_enough_mem; - *used += ALIGNED(sizeof(struct ip_set_list)); - - read_lock_bh(&set->lock); - /* Get and ensure set specific header size */ - set_list->header_size = ALIGNED(set->type->header_size); - if (*used + set_list->header_size > len) - goto unlock_set; - - /* Fill in the header */ - set_list->index = index; - set_list->binding = IP_SET_INVALID_ID; - set_list->ref = atomic_read(&set->ref); - - /* Fill in set spefific header data */ - set->type->list_header(set, data + *used); - *used += set_list->header_size; - - /* Get and ensure set specific members size */ - set_list->members_size = set->type->list_members_size(set, DONT_ALIGN); - if (*used + set_list->members_size > len) - goto unlock_set; - - /* Fill in set spefific members data */ - set->type->list_members(set, data + *used, DONT_ALIGN); - *used += set_list->members_size; - read_unlock_bh(&set->lock); - - /* Bindings */ - set_list->bindings_size = 0; - + if (cb->args[2]) + __ip_set_put((ip_set_id_t) cb->args[1]); return 0; +} - unlock_set: - read_unlock_bh(&set->lock); - not_enough_mem: - DP("not enough mem, try again"); - return -EAGAIN; +static inline void +dump_attrs(struct nlmsghdr *nlh) +{ + struct nlattr *attr; + int rem; + + D("dump nlmsg"); + nlmsg_for_each_attr(attr, nlh, sizeof(struct nfgenmsg), rem) { + D("type: %u, len %u", nla_type(attr), attr->nla_len); + } } -/* - * Save sets - */ -static inline int -ip_set_save_marker(void *data, int *used, int len) +static int +ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) { - struct ip_set_save *set_save; + ip_set_id_t index = IPSET_INVALID_ID, max; + struct ip_set *set = NULL; + struct nlmsghdr *nlh = NULL; + unsigned int flags = NETLINK_CB(cb->skb).pid ? NLM_F_MULTI : 0; + int ret = 0; - DP("used %u, len %u", *used, len); - /* Get and ensure header size */ - if (*used + ALIGNED(sizeof(struct ip_set_save)) > len) - return -ENOMEM; + max = cb->args[0] ? cb->args[1] + 1 : ip_set_max; + rcu_read_lock(); + for (; cb->args[1] < max; cb->args[1]++) { + index = (ip_set_id_t) cb->args[1]; + set = rcu_dereference(ip_set_list[index]); + if (set == NULL) { + if (cb->args[0]) { + ret = -EEXIST; + goto unlock; + } + continue; + } + D("List set: %s", set->name); + if (!cb->args[2]) { + /* Start listing: make sure set won't be destroyed */ + D("reference set"); + __ip_set_get(index); + } + nlh = start_msg(skb, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, flags, + IPSET_CMD_LIST); + if (!nlh) { + ret = -EFAULT; + goto release_refcount; + } + NLA_PUT_U8(skb, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL); + NLA_PUT_STRING(skb, IPSET_ATTR_SETNAME, set->name); + switch (cb->args[2]) { + case 0: + /* Core header data */ + NLA_PUT_STRING(skb, IPSET_ATTR_TYPENAME, + set->type->name); + NLA_PUT_U8(skb, IPSET_ATTR_FAMILY, + set->type->family); + NLA_PUT_U8(skb, IPSET_ATTR_REVISION, + set->type->revision); + ret = set->variant->head(set, skb); + if (ret < 0) + goto release_refcount; + /* Fall through and add elements */ + default: + read_lock_bh(&set->lock); + ret = set->variant->list(set, skb, cb); + read_unlock_bh(&set->lock); + if (!cb->args[2]) + /* Set is done, proceed with next one */ + cb->args[1]++; + goto release_refcount; + } + } + goto unlock; + +nla_put_failure: + ret = -EFAULT; +release_refcount: + /* If there was an error or set is done, release set */ + if (ret || !cb->args[2]) { + D("release set"); + __ip_set_put(index); + } +unlock: + rcu_read_unlock(); - /* Marker: just for backward compatibility */ - set_save = data + *used; - set_save->index = IP_SET_INVALID_ID; - set_save->header_size = 0; - set_save->members_size = 0; - *used += ALIGNED(sizeof(struct ip_set_save)); + if (nlh) { + nlmsg_end(skb, nlh); + D("nlmsg_len: %u", nlh->nlmsg_len); + dump_attrs(nlh); + } - return 0; + return ret < 0 ? ret : skb->len; } static int -ip_set_save_set(ip_set_id_t index, void *data, int *used, int len) +ip_set_dump(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { - struct ip_set *set; - struct ip_set_save *set_save; - - /* Pointer to our header */ - set_save = data + *used; + ip_set_id_t index; + + if (unlikely(protocol_failed(attr))) + return -IPSET_ERR_PROTOCOL; + + if (!attr[IPSET_ATTR_SETNAME]) + return netlink_dump_start(ctnl, skb, nlh, + ip_set_dump_start, + ip_set_dump_done); + + rcu_read_lock(); + index = find_set_id_rcu(nla_data(attr[IPSET_ATTR_SETNAME])); + if (index == IPSET_INVALID_ID) { + rcu_read_unlock(); + return -EEXIST; + } + rcu_read_unlock(); - /* Get and ensure header size */ - if (*used + ALIGNED(sizeof(struct ip_set_save)) > len) - goto not_enough_mem; - *used += ALIGNED(sizeof(struct ip_set_save)); + /* cb->args[0] : 1 => dump single set, + * : 0 => dump all sets + * [1] : set index + * [..]: type specific + */ + return netlink_dump_init(ctnl, skb, nlh, + ip_set_dump_start, + ip_set_dump_done, + 2, 1, index); +} - set = ip_set_list[index]; - DP("set: %s, used: %d(%d) %p %p", set->name, *used, len, - data, data + *used); +/* Add, del and test */ - read_lock_bh(&set->lock); - /* Get and ensure set specific header size */ - set_save->header_size = ALIGNED(set->type->header_size); - if (*used + set_save->header_size > len) - goto unlock_set; - - /* Fill in the header */ - set_save->index = index; - set_save->binding = IP_SET_INVALID_ID; - - /* Fill in set spefific header data */ - set->type->list_header(set, data + *used); - *used += set_save->header_size; - - DP("set header filled: %s, used: %d(%lu) %p %p", set->name, *used, - (unsigned long)set_save->header_size, data, data + *used); - /* Get and ensure set specific members size */ - set_save->members_size = set->type->list_members_size(set, DONT_ALIGN); - if (*used + set_save->members_size > len) - goto unlock_set; - - /* Fill in set spefific members data */ - set->type->list_members(set, data + *used, DONT_ALIGN); - *used += set_save->members_size; - read_unlock_bh(&set->lock); - DP("set members filled: %s, used: %d(%lu) %p %p", set->name, *used, - (unsigned long)set_save->members_size, data, data + *used); - return 0; +static const struct nla_policy +ip_set_adt_policy[IPSET_ATTR_CMD_MAX + 1] __read_mostly = { + [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, + [IPSET_ATTR_SETNAME] = { .type = NLA_STRING, + .len = IPSET_MAXNAMELEN }, + [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, + [IPSET_ATTR_DATA] = { .type = NLA_NESTED }, + [IPSET_ATTR_ADT] = { .type = NLA_NESTED }, +}; - unlock_set: - read_unlock_bh(&set->lock); - not_enough_mem: - DP("not enough mem, try again"); - return -EAGAIN; +static int +call_ad(struct sock *ctnl, struct sk_buff *skb, + const struct nlattr * const attr[], + struct ip_set *set, const struct nlattr *nla, + enum ipset_adt adt, uint32_t flags) +{ + struct nlattr *head = nla_data(nla); + int ret, len = nla_len(nla), retried = 0; + uint32_t lineno = 0; + bool eexist = flags & IPSET_FLAG_EXIST; + + do { + write_lock_bh(&set->lock); + ret = set->variant->uadt(set, head, len, adt, + &lineno, flags); + write_unlock_bh(&set->lock); + } while (ret == -EAGAIN + && set->variant->resize + && (ret = set->variant->resize(set, retried++)) == 0); + + if (!ret || (ret == -IPSET_ERR_EXIST && eexist)) + return 0; + if (lineno && attr[IPSET_ATTR_LINENO]) { + /* Error in restore/batch mode: send back lineno */ + uint32_t *errline = nla_data(attr[IPSET_ATTR_LINENO]); + + *errline = lineno; + } + + return ret; } -/* - * Restore sets - */ static int -ip_set_restore(void *data, int len) +ip_set_uadd(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { - int res = 0; - int line = 0, used = 0, members_size; struct ip_set *set; - struct ip_set_restore *set_restore; - ip_set_id_t index; + const struct nlattr *nla; + uint32_t flags = flag_exist(nlh); + int ret = 0; - /* Loop to restore sets */ - while (1) { - line++; - - DP("%d %zu %d", used, ALIGNED(sizeof(struct ip_set_restore)), len); - /* Get and ensure header size */ - if (used + ALIGNED(sizeof(struct ip_set_restore)) > len) - return line; - set_restore = data + used; - used += ALIGNED(sizeof(struct ip_set_restore)); - - /* Ensure data size */ - if (used - + set_restore->header_size - + set_restore->members_size > len) - return line; - - /* Check marker */ - if (set_restore->index == IP_SET_INVALID_ID) { - line--; - goto finish; - } - - /* Try to create the set */ - DP("restore %s %s", set_restore->name, set_restore->typename); - res = ip_set_create(set_restore->name, - set_restore->typename, - set_restore->index, - data + used, - set_restore->header_size); + if (unlikely(protocol_failed(attr) + || attr[IPSET_ATTR_SETNAME] == NULL + || !((attr[IPSET_ATTR_DATA] != NULL) ^ + (attr[IPSET_ATTR_ADT] != NULL)) + || (attr[IPSET_ATTR_DATA] != NULL + && !flag_nested(attr[IPSET_ATTR_DATA])) + || (attr[IPSET_ATTR_ADT] != NULL + && (!flag_nested(attr[IPSET_ATTR_ADT]) + || attr[IPSET_ATTR_LINENO] == NULL)))) + return -IPSET_ERR_PROTOCOL; + + set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + if (set == NULL) + return -EEXIST; + + if (attr[IPSET_ATTR_DATA]) { + ret = call_ad(ctnl, skb, attr, + set, attr[IPSET_ATTR_DATA], IPSET_ADD, flags); + } else { + int nla_rem; - if (res != 0) - return line; - used += ALIGNED(set_restore->header_size); - - index = ip_set_find_byindex(set_restore->index); - DP("index %u, restore_index %u", index, set_restore->index); - if (index != set_restore->index) - return line; - /* Try to restore members data */ - set = ip_set_list[index]; - members_size = 0; - DP("members_size %lu reqsize %lu", - (unsigned long)set_restore->members_size, - (unsigned long)set->type->reqsize); - while (members_size + ALIGNED(set->type->reqsize) <= - set_restore->members_size) { - line++; - DP("members: %d, line %d", members_size, line); - res = ip_set_addip(set, - data + used + members_size, - set->type->reqsize); - if (!(res == 0 || res == -EEXIST)) - return line; - members_size += ALIGNED(set->type->reqsize); + nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) { + if (nla_type(nla) != IPSET_ATTR_DATA + || !flag_nested(nla)) + return -IPSET_ERR_PROTOCOL; + ret = call_ad(ctnl, skb, attr, + set, nla, IPSET_ADD, flags); + if (ret < 0) + return ret; } - - DP("members_size %lu %d", - (unsigned long)set_restore->members_size, members_size); - if (members_size != set_restore->members_size) - return line++; - used += set_restore->members_size; } - - finish: - if (used != len) - return line; - - return 0; + return ret; } static int -ip_set_sockfn_set(struct sock *sk, int optval, void *user, unsigned int len) +ip_set_udel(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { - void *data; - int res = 0; /* Assume OK */ - size_t offset; - unsigned *op; - struct ip_set_req_adt *req_adt; - ip_set_id_t index = IP_SET_INVALID_ID; - int (*adtfn)(struct ip_set *set, - const void *data, u_int32_t size); - struct fn_table { - int (*fn)(struct ip_set *set, - const void *data, u_int32_t size); - } adtfn_table[] = - { { ip_set_addip }, { ip_set_delip }, { ip_set_testip}, - }; - - DP("optval=%d, user=%p, len=%d", optval, user, len); - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - if (optval != SO_IP_SET) - return -EBADF; - if (len <= sizeof(unsigned)) { - ip_set_printk("short userdata (want >%zu, got %u)", - sizeof(unsigned), len); - return -EINVAL; - } - data = vmalloc(len); - if (!data) { - DP("out of mem for %u bytes", len); - return -ENOMEM; - } - if (copy_from_user(data, user, len) != 0) { - res = -EFAULT; - goto done; - } - if (down_interruptible(&ip_set_app_mutex)) { - res = -EINTR; - goto done; - } + struct ip_set *set; + const struct nlattr *nla; + uint32_t flags = flag_exist(nlh); + int ret = 0; - op = (unsigned *)data; - DP("op=%x", *op); + if (unlikely(protocol_failed(attr) + || attr[IPSET_ATTR_SETNAME] == NULL + || !((attr[IPSET_ATTR_DATA] != NULL) ^ + (attr[IPSET_ATTR_ADT] != NULL)) + || (attr[IPSET_ATTR_DATA] != NULL + && !flag_nested(attr[IPSET_ATTR_DATA])) + || (attr[IPSET_ATTR_ADT] != NULL + && (!flag_nested(attr[IPSET_ATTR_ADT]) + || attr[IPSET_ATTR_LINENO] == NULL)))) + return -IPSET_ERR_PROTOCOL; - if (*op < IP_SET_OP_VERSION) { - /* Check the version at the beginning of operations */ - struct ip_set_req_version *req_version = data; - if (!(req_version->version == IP_SET_PROTOCOL_UNALIGNED - || req_version->version == IP_SET_PROTOCOL_VERSION)) { - res = -EPROTO; - goto done; - } - protocol_version = req_version->version; - } - - switch (*op) { - case IP_SET_OP_CREATE:{ - struct ip_set_req_create *req_create = data; - offset = ALIGNED(sizeof(struct ip_set_req_create)); - - if (len < offset) { - ip_set_printk("short CREATE data (want >=%zu, got %u)", - offset, len); - res = -EINVAL; - goto done; - } - req_create->name[IP_SET_MAXNAMELEN - 1] = '\0'; - req_create->typename[IP_SET_MAXNAMELEN - 1] = '\0'; - res = ip_set_create(req_create->name, - req_create->typename, - IP_SET_INVALID_ID, - data + offset, - len - offset); - goto done; - } - case IP_SET_OP_DESTROY:{ - struct ip_set_req_std *req_destroy = data; + set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + if (set == NULL) + return -EEXIST; + + if (attr[IPSET_ATTR_DATA]) { + ret = call_ad(ctnl, skb, attr, + set, attr[IPSET_ATTR_DATA], IPSET_DEL, flags); + } else { + int nla_rem; - if (len != sizeof(struct ip_set_req_std)) { - ip_set_printk("invalid DESTROY data (want %zu, got %u)", - sizeof(struct ip_set_req_std), len); - res = -EINVAL; - goto done; - } - if (STREQ(req_destroy->name, IPSET_TOKEN_ALL)) { - /* Destroy all sets */ - index = IP_SET_INVALID_ID; - } else { - req_destroy->name[IP_SET_MAXNAMELEN - 1] = '\0'; - index = ip_set_find_byname(req_destroy->name); - - if (index == IP_SET_INVALID_ID) { - res = -ENOENT; - goto done; - } - } - - res = ip_set_destroy(index); - goto done; - } - case IP_SET_OP_FLUSH:{ - struct ip_set_req_std *req_flush = data; - - if (len != sizeof(struct ip_set_req_std)) { - ip_set_printk("invalid FLUSH data (want %zu, got %u)", - sizeof(struct ip_set_req_std), len); - res = -EINVAL; - goto done; + nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) { + if (nla_type(nla) != IPSET_ATTR_DATA + || !flag_nested(nla)) + return -IPSET_ERR_PROTOCOL; + ret = call_ad(ctnl, skb, attr, + set, nla, IPSET_DEL, flags); + if (ret < 0) + return ret; } - if (STREQ(req_flush->name, IPSET_TOKEN_ALL)) { - /* Flush all sets */ - index = IP_SET_INVALID_ID; - } else { - req_flush->name[IP_SET_MAXNAMELEN - 1] = '\0'; - index = ip_set_find_byname(req_flush->name); - - if (index == IP_SET_INVALID_ID) { - res = -ENOENT; - goto done; - } - } - res = ip_set_flush(index); - goto done; } - case IP_SET_OP_RENAME:{ - struct ip_set_req_create *req_rename = data; - - if (len != sizeof(struct ip_set_req_create)) { - ip_set_printk("invalid RENAME data (want %zu, got %u)", - sizeof(struct ip_set_req_create), len); - res = -EINVAL; - goto done; - } + return ret; +} - req_rename->name[IP_SET_MAXNAMELEN - 1] = '\0'; - req_rename->typename[IP_SET_MAXNAMELEN - 1] = '\0'; - - index = ip_set_find_byname(req_rename->name); - if (index == IP_SET_INVALID_ID) { - res = -ENOENT; - goto done; - } - res = ip_set_rename(index, req_rename->typename); - goto done; - } - case IP_SET_OP_SWAP:{ - struct ip_set_req_create *req_swap = data; - ip_set_id_t to_index; - - if (len != sizeof(struct ip_set_req_create)) { - ip_set_printk("invalid SWAP data (want %zu, got %u)", - sizeof(struct ip_set_req_create), len); - res = -EINVAL; - goto done; - } +static int +ip_set_utest(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) +{ + struct ip_set *set; + int ret = 0; - req_swap->name[IP_SET_MAXNAMELEN - 1] = '\0'; - req_swap->typename[IP_SET_MAXNAMELEN - 1] = '\0'; + if (unlikely(protocol_failed(attr) + || attr[IPSET_ATTR_SETNAME] == NULL + || attr[IPSET_ATTR_DATA] == NULL + || !flag_nested(attr[IPSET_ATTR_DATA]))) + return -IPSET_ERR_PROTOCOL; + + set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + if (set == NULL) + return -EEXIST; + + read_lock_bh(&set->lock); + ret = set->variant->uadt(set, + nla_data(attr[IPSET_ATTR_DATA]), + nla_len(attr[IPSET_ATTR_DATA]), + IPSET_TEST, NULL, 0); + read_unlock_bh(&set->lock); + /* Userspace can't trigger element to be re-added */ + if (ret == -EAGAIN) + ret = 1; + + return ret < 0 ? ret : ret > 0 ? 0 : -IPSET_ERR_EXIST; +} - index = ip_set_find_byname(req_swap->name); - if (index == IP_SET_INVALID_ID) { - res = -ENOENT; - goto done; - } - to_index = ip_set_find_byname(req_swap->typename); - if (to_index == IP_SET_INVALID_ID) { - res = -ENOENT; - goto done; - } - res = ip_set_swap(index, to_index); - goto done; - } - default: - break; /* Set identified by id */ - } +/* Get headed data of a set */ + +static int +ip_set_header(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) +{ + struct ip_set *set; + struct sk_buff *skb2; + struct nlmsghdr *nlh2; + ip_set_id_t index; + int ret = 0; + + if (unlikely(protocol_failed(attr) + || attr[IPSET_ATTR_SETNAME] == NULL)) + return -IPSET_ERR_PROTOCOL; - /* There we may have add/del/test/bind/unbind/test_bind operations */ - if (*op < IP_SET_OP_ADD_IP || *op > IP_SET_OP_TEST_IP) { - res = -EBADMSG; - goto done; - } - adtfn = adtfn_table[*op - IP_SET_OP_ADD_IP].fn; + index = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME])); + if (index == IPSET_INVALID_ID) + return -EEXIST; + set = ip_set_list[index]; - if (len < ALIGNED(sizeof(struct ip_set_req_adt))) { - ip_set_printk("short data in adt request (want >=%zu, got %u)", - ALIGNED(sizeof(struct ip_set_req_adt)), len); - res = -EINVAL; - goto done; - } - req_adt = data; + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb2 == NULL) + return -ENOMEM; + + nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, + IPSET_CMD_HEADER); + if (!nlh2) + goto nlmsg_failure; + NLA_PUT_U8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL); + NLA_PUT_STRING(skb2, IPSET_ATTR_SETNAME, set->name); + NLA_PUT_STRING(skb2, IPSET_ATTR_TYPENAME, set->type->name); + NLA_PUT_U8(skb2, IPSET_ATTR_FAMILY, set->type->family); + NLA_PUT_U8(skb2, IPSET_ATTR_REVISION, set->type->revision); + nlmsg_end(skb2, nlh2); + + ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + if (ret < 0) + return -EFAULT; + + return 0; - index = ip_set_find_byindex(req_adt->index); - if (index == IP_SET_INVALID_ID) { - res = -ENOENT; - goto done; - } - do { - struct ip_set *set = ip_set_list[index]; - size_t offset = ALIGNED(sizeof(struct ip_set_req_adt)); +nla_put_failure: + nlmsg_cancel(skb2, nlh2); +nlmsg_failure: + kfree_skb(skb2); + return -EFAULT; +} - IP_SET_ASSERT(set); +/* Get type data */ - if (len - offset != set->type->reqsize) { - ip_set_printk("data length wrong (want %lu, have %zu)", - (long unsigned)set->type->reqsize, - len - offset); - res = -EINVAL; - goto done; +static const struct nla_policy +ip_set_type_policy[IPSET_ATTR_CMD_MAX + 1] __read_mostly = { + [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, + [IPSET_ATTR_TYPENAME] = { .type = NLA_STRING, + .len = IPSET_MAXNAMELEN }, + [IPSET_ATTR_FAMILY] = { .type = NLA_U8 }, +}; + +static bool +find_set_type_minmax(const char *name, uint8_t family, + uint8_t *min, uint8_t *max) +{ + struct ip_set_type *type; + bool ret = false; + + *min = *max = 0; + ip_set_type_list_lock(); + list_for_each_entry(type, &ip_set_type_list, list) + if (STREQ(type->name, name) + && (type->family == family || type->family == AF_UNSPEC)) { + ret = true; + if (type->revision < *min) + *min = type->revision; + else if (type->revision > *max) + *max = type->revision; } - res = adtfn(set, data + offset, len - offset); - } while (0); - - done: - up(&ip_set_app_mutex); - vfree(data); - if (res > 0) - res = 0; - DP("final result %d", res); - return res; + ip_set_type_list_unlock(); + + return ret; } static int -ip_set_sockfn_get(struct sock *sk, int optval, void *user, int *len) +ip_set_type(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { - int res = 0; - unsigned *op; - ip_set_id_t index = IP_SET_INVALID_ID; - void *data; - int copylen = *len; - - DP("optval=%d, user=%p, len=%d", optval, user, *len); - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - if (optval != SO_IP_SET) - return -EBADF; - if (*len < sizeof(unsigned)) { - ip_set_printk("short userdata (want >=%zu, got %d)", - sizeof(unsigned), *len); - return -EINVAL; - } - data = vmalloc(*len); - if (!data) { - DP("out of mem for %d bytes", *len); - return -ENOMEM; - } - if (copy_from_user(data, user, *len) != 0) { - res = -EFAULT; - goto done; - } - if (down_interruptible(&ip_set_app_mutex)) { - res = -EINTR; - goto done; - } - - op = (unsigned *) data; - DP("op=%x", *op); + struct sk_buff *skb2; + struct nlmsghdr *nlh2; + uint8_t family, min, max; + const char *typename; + int ret = 0; - if (*op < IP_SET_OP_VERSION) { - /* Check the version at the beginning of operations */ - struct ip_set_req_version *req_version = data; - if (!(req_version->version == IP_SET_PROTOCOL_UNALIGNED - || req_version->version == IP_SET_PROTOCOL_VERSION)) { - res = -EPROTO; - goto done; + if (unlikely(protocol_failed(attr) + || attr[IPSET_ATTR_TYPENAME] == NULL + || attr[IPSET_ATTR_FAMILY] == NULL)) + return -IPSET_ERR_PROTOCOL; + + family = nla_get_u8(attr[IPSET_ATTR_FAMILY]); + typename = nla_data(attr[IPSET_ATTR_TYPENAME]); + if (!find_set_type_minmax(typename, family, &min, &max)) { + /* Try to load in the type module */ + load_type_module(typename); + if (!find_set_type_minmax(typename, family, &min, &max)) { + D("can't find: %s, family: %u", typename, family); + return -EEXIST; } - protocol_version = req_version->version; } - switch (*op) { - case IP_SET_OP_VERSION: { - struct ip_set_req_version *req_version = data; + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb2 == NULL) + return -ENOMEM; + + nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, + IPSET_CMD_TYPE); + if (!nlh2) + goto nlmsg_failure; + NLA_PUT_U8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL); + NLA_PUT_STRING(skb2, IPSET_ATTR_TYPENAME, typename); + NLA_PUT_U8(skb2, IPSET_ATTR_FAMILY, family); + NLA_PUT_U8(skb2, IPSET_ATTR_REVISION, max); + NLA_PUT_U8(skb2, IPSET_ATTR_REVISION_MIN, min); + nlmsg_end(skb2, nlh2); + + D("Send TYPE, nlmsg_len: %u", nlh2->nlmsg_len); + ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + if (ret < 0) + return -EFAULT; + + return 0; - if (*len != sizeof(struct ip_set_req_version)) { - ip_set_printk("invalid VERSION (want %zu, got %d)", - sizeof(struct ip_set_req_version), - *len); - res = -EINVAL; - goto done; - } +nla_put_failure: + nlmsg_cancel(skb2, nlh2); +nlmsg_failure: + kfree_skb(skb2); + return -EFAULT; +} - req_version->version = IP_SET_PROTOCOL_VERSION; - res = copy_to_user(user, req_version, - sizeof(struct ip_set_req_version)); - goto done; - } - case IP_SET_OP_GET_BYNAME: { - struct ip_set_req_get_set *req_get = data; - - if (*len != sizeof(struct ip_set_req_get_set)) { - ip_set_printk("invalid GET_BYNAME (want %zu, got %d)", - sizeof(struct ip_set_req_get_set), *len); - res = -EINVAL; - goto done; - } - req_get->set.name[IP_SET_MAXNAMELEN - 1] = '\0'; - index = ip_set_find_byname(req_get->set.name); - req_get->set.index = index; - goto copy; - } - case IP_SET_OP_GET_BYINDEX: { - struct ip_set_req_get_set *req_get = data; - - if (*len != sizeof(struct ip_set_req_get_set)) { - ip_set_printk("invalid GET_BYINDEX (want %zu, got %d)", - sizeof(struct ip_set_req_get_set), *len); - res = -EINVAL; - goto done; - } - req_get->set.name[IP_SET_MAXNAMELEN - 1] = '\0'; - index = ip_set_find_byindex(req_get->set.index); - strncpy(req_get->set.name, - index == IP_SET_INVALID_ID ? "" - : ip_set_list[index]->name, IP_SET_MAXNAMELEN); - goto copy; - } - case IP_SET_OP_ADT_GET: { - struct ip_set_req_adt_get *req_get = data; - - if (*len != sizeof(struct ip_set_req_adt_get)) { - ip_set_printk("invalid ADT_GET (want %zu, got %d)", - sizeof(struct ip_set_req_adt_get), *len); - res = -EINVAL; - goto done; - } - req_get->set.name[IP_SET_MAXNAMELEN - 1] = '\0'; - index = ip_set_find_byname(req_get->set.name); - if (index != IP_SET_INVALID_ID) { - req_get->set.index = index; - strncpy(req_get->typename, - ip_set_list[index]->type->typename, - IP_SET_MAXNAMELEN - 1); - } else { - res = -ENOENT; - goto done; - } - goto copy; - } - case IP_SET_OP_MAX_SETS: { - struct ip_set_req_max_sets *req_max_sets = data; - ip_set_id_t i; - - if (*len != sizeof(struct ip_set_req_max_sets)) { - ip_set_printk("invalid MAX_SETS (want %zu, got %d)", - sizeof(struct ip_set_req_max_sets), *len); - res = -EINVAL; - goto done; - } +/* Get protocol version */ - if (STREQ(req_max_sets->set.name, IPSET_TOKEN_ALL)) { - req_max_sets->set.index = IP_SET_INVALID_ID; - } else { - req_max_sets->set.name[IP_SET_MAXNAMELEN - 1] = '\0'; - req_max_sets->set.index = - ip_set_find_byname(req_max_sets->set.name); - if (req_max_sets->set.index == IP_SET_INVALID_ID) { - res = -ENOENT; - goto done; - } - } - req_max_sets->max_sets = ip_set_max; - req_max_sets->sets = 0; - for (i = 0; i < ip_set_max; i++) { - if (ip_set_list[i] != NULL) - req_max_sets->sets++; - } - goto copy; - } - case IP_SET_OP_LIST_SIZE: - case IP_SET_OP_SAVE_SIZE: { - struct ip_set_req_setnames *req_setnames = data; - struct ip_set_name_list *name_list; - struct ip_set *set; - ip_set_id_t i; - int used; - - if (*len < ALIGNED(sizeof(struct ip_set_req_setnames))) { - ip_set_printk("short LIST_SIZE (want >=%zu, got %d)", - ALIGNED(sizeof(struct ip_set_req_setnames)), - *len); - res = -EINVAL; - goto done; - } +static const struct nla_policy +ip_set_protocol_policy[IPSET_ATTR_CMD_MAX + 1] __read_mostly = { + [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, +}; - req_setnames->size = 0; - used = ALIGNED(sizeof(struct ip_set_req_setnames)); - for (i = 0; i < ip_set_max; i++) { - if (ip_set_list[i] == NULL) - continue; - name_list = data + used; - used += ALIGNED(sizeof(struct ip_set_name_list)); - if (used > copylen) { - res = -EAGAIN; - goto done; - } - set = ip_set_list[i]; - /* Fill in index, name, etc. */ - name_list->index = i; - name_list->id = set->id; - strncpy(name_list->name, - set->name, - IP_SET_MAXNAMELEN - 1); - strncpy(name_list->typename, - set->type->typename, - IP_SET_MAXNAMELEN - 1); - DP("filled %s of type %s, index %u\n", - name_list->name, name_list->typename, - name_list->index); - if (!(req_setnames->index == IP_SET_INVALID_ID - || req_setnames->index == i)) - continue; - /* Update size */ - req_setnames->size += - (*op == IP_SET_OP_LIST_SIZE ? - ALIGNED(sizeof(struct ip_set_list)) : - ALIGNED(sizeof(struct ip_set_save))) - + ALIGNED(set->type->header_size) - + set->type->list_members_size(set, DONT_ALIGN); - } - if (copylen != used) { - res = -EAGAIN; - goto done; - } - goto copy; - } - case IP_SET_OP_LIST: { - struct ip_set_req_list *req_list = data; - ip_set_id_t i; - int used; - - if (*len < sizeof(struct ip_set_req_list)) { - ip_set_printk("short LIST (want >=%zu, got %d)", - sizeof(struct ip_set_req_list), *len); - res = -EINVAL; - goto done; - } - index = req_list->index; - if (index != IP_SET_INVALID_ID - && ip_set_find_byindex(index) != index) { - res = -ENOENT; - goto done; - } - used = 0; - if (index == IP_SET_INVALID_ID) { - /* List all sets */ - for (i = 0; i < ip_set_max && res == 0; i++) { - if (ip_set_list[i] != NULL) - res = ip_set_list_set(i, data, &used, *len); - } - } else { - /* List an individual set */ - res = ip_set_list_set(index, data, &used, *len); - } - if (res != 0) - goto done; - else if (copylen != used) { - res = -EAGAIN; - goto done; - } - goto copy; - } - case IP_SET_OP_SAVE: { - struct ip_set_req_list *req_save = data; - ip_set_id_t i; - int used; - - if (*len < sizeof(struct ip_set_req_list)) { - ip_set_printk("short SAVE (want >=%zu, got %d)", - sizeof(struct ip_set_req_list), *len); - res = -EINVAL; - goto done; - } - index = req_save->index; - if (index != IP_SET_INVALID_ID - && ip_set_find_byindex(index) != index) { - res = -ENOENT; - goto done; - } +static int +ip_set_protocol(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) +{ + struct sk_buff *skb2; + struct nlmsghdr *nlh2; + int ret = 0; -#define SETLIST(set) (strcmp(set->type->typename, "setlist") == 0) - - used = 0; - if (index == IP_SET_INVALID_ID) { - /* Save all sets: ugly setlist type dependency */ - int setlist = 0; - setlists: - for (i = 0; i < ip_set_max && res == 0; i++) { - if (ip_set_list[i] != NULL - && !(setlist ^ SETLIST(ip_set_list[i]))) - res = ip_set_save_set(i, data, &used, *len); - } - if (!setlist) { - setlist = 1; - goto setlists; - } - } else { - /* Save an individual set */ - res = ip_set_save_set(index, data, &used, *len); - } - if (res == 0) - res = ip_set_save_marker(data, &used, *len); - - if (res != 0) - goto done; - else if (copylen != used) { - res = -EAGAIN; - goto done; - } - goto copy; - } - case IP_SET_OP_RESTORE: { - struct ip_set_req_setnames *req_restore = data; - size_t offset = ALIGNED(sizeof(struct ip_set_req_setnames)); - int line; - - if (*len < offset || *len != req_restore->size) { - ip_set_printk("invalid RESTORE (want =%lu, got %d)", - (long unsigned)req_restore->size, *len); - res = -EINVAL; - goto done; - } - line = ip_set_restore(data + offset, req_restore->size - offset); - DP("ip_set_restore: %d", line); - if (line != 0) { - res = -EAGAIN; - req_restore->size = line; - copylen = sizeof(struct ip_set_req_setnames); - goto copy; - } - goto done; - } - default: - res = -EBADMSG; - goto done; - } /* end of switch(op) */ - - copy: - DP("set %s, copylen %d", index != IP_SET_INVALID_ID - && ip_set_list[index] - ? ip_set_list[index]->name - : ":all:", copylen); - res = copy_to_user(user, data, copylen); - - done: - up(&ip_set_app_mutex); - vfree(data); - if (res > 0) - res = 0; - DP("final result %d", res); - return res; + if (unlikely(attr[IPSET_ATTR_PROTOCOL] == NULL)) + return -IPSET_ERR_PROTOCOL; + + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb2 == NULL) + return -ENOMEM; + + nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, + IPSET_CMD_PROTOCOL); + if (!nlh2) + goto nlmsg_failure; + NLA_PUT_U8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL); + nlmsg_end(skb2, nlh2); + + ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + if (ret < 0) + return -EFAULT; + + return 0; + +nla_put_failure: + nlmsg_cancel(skb2, nlh2); +nlmsg_failure: + kfree_skb(skb2); + return -EFAULT; } -static struct nf_sockopt_ops so_set = { - .pf = PF_INET, - .set_optmin = SO_IP_SET, - .set_optmax = SO_IP_SET + 1, - .set = &ip_set_sockfn_set, - .get_optmin = SO_IP_SET, - .get_optmax = SO_IP_SET + 1, - .get = &ip_set_sockfn_get, -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) - .use = 0, -#else - .owner = THIS_MODULE, -#endif +static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = { + [IPSET_CMD_CREATE] = { + .call = ip_set_create, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_create_policy, + }, + [IPSET_CMD_DESTROY] = { + .call = ip_set_destroy, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_setname_policy, + }, + [IPSET_CMD_FLUSH] = { + .call = ip_set_flush, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_setname_policy, + }, + [IPSET_CMD_RENAME] = { + .call = ip_set_rename, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_setname2_policy, + }, + [IPSET_CMD_SWAP] = { + .call = ip_set_swap, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_setname2_policy, + }, + [IPSET_CMD_LIST] = { + .call = ip_set_dump, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_setname_policy, + }, + [IPSET_CMD_SAVE] = { + .call = ip_set_dump, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_setname_policy, + }, + [IPSET_CMD_ADD] = { + .call = ip_set_uadd, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_adt_policy, + }, + [IPSET_CMD_DEL] = { + .call = ip_set_udel, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_adt_policy, + }, + [IPSET_CMD_TEST] = { + .call = ip_set_utest, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_adt_policy, + }, + [IPSET_CMD_HEADER] = { + .call = ip_set_header, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_setname_policy, + }, + [IPSET_CMD_TYPE] = { + .call = ip_set_type, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_type_policy, + }, + [IPSET_CMD_PROTOCOL] = { + .call = ip_set_protocol, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_protocol_policy, + }, }; -static int max_sets; - -module_param(max_sets, int, 0600); -MODULE_PARM_DESC(max_sets, "maximal number of sets"); -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); -MODULE_DESCRIPTION("module implementing core IP set support"); +static struct nfnetlink_subsystem ip_set_netlink_subsys = { + .name = "ip_set", + .subsys_id = NFNL_SUBSYS_IPSET, + .cb_count = IPSET_MSG_MAX, + .cb = ip_set_netlink_subsys_cb, +}; static int __init ip_set_init(void) { - int res; - - /* For the -rt branch, DECLARE_MUTEX/init_MUTEX avoided */ - sema_init(&ip_set_app_mutex, 1); + int ret; if (max_sets) ip_set_max = max_sets; - if (ip_set_max >= IP_SET_INVALID_ID) - ip_set_max = IP_SET_INVALID_ID - 1; + if (ip_set_max >= IPSET_INVALID_ID) + ip_set_max = IPSET_INVALID_ID - 1; - ip_set_list = vmalloc(sizeof(struct ip_set *) * ip_set_max); + ip_set_list = kzalloc(sizeof(struct ip_set *) * ip_set_max, GFP_KERNEL); if (!ip_set_list) { printk(KERN_ERR "Unable to create ip_set_list\n"); return -ENOMEM; } - memset(ip_set_list, 0, sizeof(struct ip_set *) * ip_set_max); - INIT_LIST_HEAD(&set_type_list); + INIT_LIST_HEAD(&ip_set_type_list); - res = nf_register_sockopt(&so_set); - if (res != 0) { - ip_set_printk("SO_SET registry failed: %d", res); - vfree(ip_set_list); - return res; + ret = nfnetlink_subsys_register(&ip_set_netlink_subsys); + if (ret != 0) { + printk("ip_set_init: cannot register with nfnetlink.\n"); + kfree(ip_set_list); + return ret; } - printk("ip_set version %u loaded\n", IP_SET_PROTOCOL_VERSION); + printk("ip_set with protocol version %u loaded\n", IPSET_PROTOCOL); return 0; } static void __exit ip_set_fini(void) { - /* There can't be any existing set or binding */ - nf_unregister_sockopt(&so_set); - vfree(ip_set_list); - DP("these are the famous last words"); + /* There can't be any existing set */ + nfnetlink_subsys_unregister(&ip_set_netlink_subsys); + kfree(ip_set_list); + D("these are the famous last words"); } -EXPORT_SYMBOL(ip_set_register_set_type); -EXPORT_SYMBOL(ip_set_unregister_set_type); +EXPORT_SYMBOL(ip_set_type_register); +EXPORT_SYMBOL(ip_set_type_unregister); EXPORT_SYMBOL(ip_set_get_byname); -EXPORT_SYMBOL(ip_set_get_byindex); EXPORT_SYMBOL(ip_set_put_byindex); -EXPORT_SYMBOL(ip_set_id); -EXPORT_SYMBOL(__ip_set_get_byname); -EXPORT_SYMBOL(__ip_set_put_byindex); -EXPORT_SYMBOL(ip_set_addip_kernel); -EXPORT_SYMBOL(ip_set_delip_kernel); -EXPORT_SYMBOL(ip_set_testip_kernel); +EXPORT_SYMBOL(ip_set_add); +EXPORT_SYMBOL(ip_set_del); +EXPORT_SYMBOL(ip_set_test); module_init(ip_set_init); module_exit(ip_set_fini); diff --git a/kernel/ip_set_bitmap_ip.c b/kernel/ip_set_bitmap_ip.c index be3c538..ccb5473 100644 --- a/kernel/ip_set_bitmap_ip.c +++ b/kernel/ip_set_bitmap_ip.c @@ -1,13 +1,13 @@ /* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu> * Patrick Schaaf <bof@bof.de> - * Copyright (C) 2003-2008 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> + * Copyright (C) 2003-2010 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -/* Kernel module implementing an IP set type: the single bitmap type */ +/* Kernel module implementing an IP set type: the bitmap:ip type */ #include <linux/module.h> #include <linux/ip.h> @@ -16,143 +16,699 @@ #include <asm/uaccess.h> #include <asm/bitops.h> #include <linux/spinlock.h> +#include <linux/netlink.h> +#include <linux/delay.h> +#include <linux/jiffies.h> +#include <linux/timer.h> +#include <net/netlink.h> +#include <net/pfxlen.h> +#include <net/tcp.h> -#include <linux/netfilter_ipv4/ip_set_ipmap.h> +#include <linux/netfilter/ip_set.h> +#include <linux/netfilter/ip_set_bitmap.h> +#define IP_SET_BITMAP_TIMEOUT +#include <linux/netfilter/ip_set_timeout.h> -static inline ip_set_ip_t -ip_to_id(const struct ip_set_ipmap *map, ip_set_ip_t ip) +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); +MODULE_DESCRIPTION("bitmap:ip type of IP sets"); +MODULE_ALIAS("ip_set_bitmap:ip"); + +/* Base variant */ + +struct bitmap_ip { + void *members; /* the set members */ + uint32_t first_ip; /* host byte order, included in range */ + uint32_t last_ip; /* host byte order, included in range */ + uint32_t elements; /* number of max elements in the set */ + uint32_t hosts; /* number of hosts in a subnet */ + size_t memsize; /* members size */ + uint8_t netmask; /* subnet netmask */ +}; + +static inline uint32_t +ip_to_id(const struct bitmap_ip *map, uint32_t ip) { - return ((ip & map->netmask) - map->first_ip)/map->hosts; + return ((ip & HOSTMASK(map->netmask)) - map->first_ip)/map->hosts; } static inline int -ipmap_test(const struct ip_set *set, ip_set_ip_t ip) +bitmap_ip_test(const struct bitmap_ip *map, uint32_t id) { - const struct ip_set_ipmap *map = set->data; - - if (ip < map->first_ip || ip > map->last_ip) - return -ERANGE; - - DP("set: %s, ip:%u.%u.%u.%u", set->name, HIPQUAD(ip)); - return !!test_bit(ip_to_id(map, ip), map->members); + return !!test_bit(id, map->members); } -#define KADT_CONDITION +static inline int +bitmap_ip_add(struct bitmap_ip *map, uint32_t id) +{ + if (test_and_set_bit(id, map->members)) + return -IPSET_ERR_EXIST; -UADT(ipmap, test) -KADT(ipmap, test, ipaddr) + return 0; +} static inline int -ipmap_add(struct ip_set *set, ip_set_ip_t ip) +bitmap_ip_del(struct bitmap_ip *map, uint32_t id) +{ + if (!test_and_clear_bit(id, map->members)) + return -IPSET_ERR_EXIST; + + return 0; +} + +static int +bitmap_ip_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, uint8_t pf, const uint8_t *flags) { - struct ip_set_ipmap *map = set->data; + struct bitmap_ip *map = set->data; + uint32_t ip = ntohl(ip4addr(skb, flags)); + + if (pf != AF_INET) + return -EINVAL; if (ip < map->first_ip || ip > map->last_ip) - return -ERANGE; + return -IPSET_ERR_BITMAP_RANGE; - DP("set: %s, ip:%u.%u.%u.%u", set->name, HIPQUAD(ip)); - if (test_and_set_bit(ip_to_id(map, ip), map->members)) - return -EEXIST; + ip = ip_to_id(map, ip); - return 0; + switch (adt) { + case IPSET_TEST: + return bitmap_ip_test(map, ip); + case IPSET_ADD: + return bitmap_ip_add(map, ip); + case IPSET_DEL: + return bitmap_ip_del(map, ip); + default: + return -EINVAL; + } } -UADT(ipmap, add) -KADT(ipmap, add, ipaddr) +static const struct nla_policy +bitmap_ip_adt_policy[IPSET_ATTR_ADT_MAX+1] __read_mostly = { + [IPSET_ATTR_IP] = { .type = NLA_U32 }, + [IPSET_ATTR_IP_TO] = { .type = NLA_U32 }, + [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, +}; -static inline int -ipmap_del(struct ip_set *set, ip_set_ip_t ip) +static int +bitmap_ip_uadt(struct ip_set *set, struct nlattr *head, int len, + enum ipset_adt adt, uint32_t *lineno, uint32_t flags) { - struct ip_set_ipmap *map = set->data; + struct bitmap_ip *map = set->data; + struct nlattr *tb[IPSET_ATTR_ADT_MAX]; + bool eexist = flags & IPSET_FLAG_EXIST; + uint32_t ip, ip_to, id; + int ret = 0; + + if (nla_parse(tb, IPSET_ATTR_ADT_MAX, head, len, + bitmap_ip_adt_policy)) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_IP]) + ip = ip_set_get_h32(tb[IPSET_ATTR_IP]); + else + return -IPSET_ERR_PROTOCOL; if (ip < map->first_ip || ip > map->last_ip) - return -ERANGE; + return -IPSET_ERR_BITMAP_RANGE; + + if (tb[IPSET_ATTR_TIMEOUT]) + return -IPSET_ERR_TIMEOUT; + + if (adt == IPSET_TEST) + return bitmap_ip_test(map, ip_to_id(map, ip)); + + if (tb[IPSET_ATTR_IP_TO]) { + ip_to = ip_set_get_h32(tb[IPSET_ATTR_IP_TO]); + if (ip > ip_to) { + swap(ip, ip_to); + if (ip < map->first_ip) + return -IPSET_ERR_BITMAP_RANGE; + } + } else if (tb[IPSET_ATTR_CIDR]) { + uint8_t cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + + if (cidr > 32) + return -IPSET_ERR_INVALID_CIDR; + ip_to = ip | ~HOSTMASK(cidr); + } else + ip_to = ip; + + if (ip_to > map->last_ip) + return -IPSET_ERR_BITMAP_RANGE; + + for (; !before(ip_to, ip); ip += map->hosts) { + id = ip_to_id(map, ip); + ret = adt == IPSET_ADD ? bitmap_ip_add(map, id) + : bitmap_ip_del(map, id); + + if (ret && !(ret == -IPSET_ERR_EXIST && eexist)) { + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + return ret; + } + }; + return ret; +} + +static void +bitmap_ip_destroy(struct ip_set *set) +{ + struct bitmap_ip *map = set->data; + + ip_set_free(map->members, set->flags); + kfree(map); + + set->data = NULL; +} - DP("set: %s, ip:%u.%u.%u.%u", set->name, HIPQUAD(ip)); - if (!test_and_clear_bit(ip_to_id(map, ip), map->members)) - return -EEXIST; +static void +bitmap_ip_flush(struct ip_set *set) +{ + struct bitmap_ip *map = set->data; + + memset(map->members, 0, map->memsize); +} + +static int +bitmap_ip_head(struct ip_set *set, struct sk_buff *skb) +{ + const struct bitmap_ip *map = set->data; + struct nlattr *nested; + uint32_t id, elements; + + for (id = 0, elements = 0; id < map->elements; id++) + if (bitmap_ip_test(map, id)) + elements++; + + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) + goto nla_put_failure; + NLA_PUT_NET32(skb, IPSET_ATTR_IP, htonl(map->first_ip)); + NLA_PUT_NET32(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip)); + if (map->netmask != 32) + NLA_PUT_U8(skb, IPSET_ATTR_NETMASK, map->netmask); + NLA_PUT_NET32(skb, IPSET_ATTR_ELEMENTS, htonl(elements)); + NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, + htonl(atomic_read(&set->ref) - 1)); + NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(map->memsize)); + ipset_nest_end(skb, nested); return 0; +nla_put_failure: + return -EFAULT; } -UADT(ipmap, del) -KADT(ipmap, del, ipaddr) +static int +bitmap_ip_list(struct ip_set *set, + struct sk_buff *skb, struct netlink_callback *cb) +{ + const struct bitmap_ip *map = set->data; + struct nlattr *atd, *nested; + uint32_t id, first = cb->args[2]; -static inline int -__ipmap_create(const struct ip_set_req_ipmap_create *req, - struct ip_set_ipmap *map) + atd = ipset_nest_start(skb, IPSET_ATTR_ADT); + if (!atd) + return -EFAULT; + for (; cb->args[2] < map->elements; cb->args[2]++) { + id = cb->args[2]; + if (!bitmap_ip_test(map, id)) + continue; + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) { + if (id == first) { + nla_nest_cancel(skb, atd); + return -EFAULT; + } else + goto nla_put_failure; + } + NLA_PUT_NET32(skb, IPSET_ATTR_IP, + htonl(map->first_ip + id * map->hosts)); + if (map->netmask != 32) + NLA_PUT_U8(skb, IPSET_ATTR_CIDR, map->netmask); + ipset_nest_end(skb, nested); + } + ipset_nest_end(skb, atd); + /* Set listing finished */ + cb->args[2] = 0; + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nested); + ipset_nest_end(skb, atd); + return 0; +} + +static const struct ip_set_type_variant bitmap_ip __read_mostly = { + .kadt = bitmap_ip_kadt, + .uadt = bitmap_ip_uadt, + .destroy = bitmap_ip_destroy, + .flush = bitmap_ip_flush, + .head = bitmap_ip_head, + .list = bitmap_ip_list, +}; + +/* Timeout variant */ + +struct bitmap_ip_timeout { + void *members; /* the set members */ + uint32_t first_ip; /* host byte order, included in range */ + uint32_t last_ip; /* host byte order, included in range */ + uint32_t elements; /* number of max elements in the set */ + uint32_t hosts; /* number of hosts in a subnet */ + size_t memsize; /* members size */ + uint8_t netmask; /* subnet netmask */ + + uint32_t timeout; /* timeout parameter */ + struct timer_list gc; /* garbage collection */ +}; + +static inline bool +bitmap_ip_timeout_test(const struct bitmap_ip_timeout *map, uint32_t id) { - map->netmask = req->netmask; + unsigned long *table = map->members; - if (req->netmask == 0xFFFFFFFF) { - map->hosts = 1; - map->sizeid = map->last_ip - map->first_ip + 1; - } else { - unsigned int mask_bits, netmask_bits; - ip_set_ip_t mask; + return ip_set_timeout_test(table[id]); +} + +static int +bitmap_ip_timeout_add(struct bitmap_ip_timeout *map, + uint32_t id, uint32_t timeout) +{ + unsigned long *table = map->members; + + if (bitmap_ip_timeout_test(map, id)) + return -IPSET_ERR_EXIST; + + table[id] = ip_set_timeout_set(timeout); + + return 0; +} + +static int +bitmap_ip_timeout_del(struct bitmap_ip_timeout *map, uint32_t id) +{ + unsigned long *table = map->members; + int ret = -IPSET_ERR_EXIST; + + if (bitmap_ip_timeout_test(map, id)) + ret = 0; + + table[id] = IPSET_ELEM_UNSET; + return ret; +} + +static int +bitmap_ip_timeout_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, uint8_t pf, const uint8_t *flags) +{ + struct bitmap_ip_timeout *map = set->data; + uint32_t ip = ntohl(ip4addr(skb, flags)); + + if (pf != AF_INET) + return -EINVAL; + + if (ip < map->first_ip || ip > map->last_ip) + return -IPSET_ERR_BITMAP_RANGE; + + ip = ip_to_id((const struct bitmap_ip *)map, ip); + + switch (adt) { + case IPSET_TEST: + return bitmap_ip_timeout_test(map, ip); + case IPSET_ADD: + return bitmap_ip_timeout_add(map, ip, map->timeout); + case IPSET_DEL: + return bitmap_ip_timeout_del(map, ip); + default: + return -EINVAL; + } +} + +static int +bitmap_ip_timeout_uadt(struct ip_set *set, struct nlattr *head, int len, + enum ipset_adt adt, uint32_t *lineno, uint32_t flags) +{ + struct bitmap_ip_timeout *map = set->data; + struct nlattr *tb[IPSET_ATTR_ADT_MAX]; + bool eexist = flags & IPSET_FLAG_EXIST; + uint32_t ip, ip_to, id, timeout = map->timeout; + int ret = 0; - map->first_ip &= map->netmask; /* Should we better bark? */ + if (nla_parse(tb, IPSET_ATTR_ADT_MAX, head, len, + bitmap_ip_adt_policy)) + return -IPSET_ERR_PROTOCOL; - mask = range_to_mask(map->first_ip, map->last_ip, &mask_bits); - netmask_bits = mask_to_bits(map->netmask); + if (tb[IPSET_ATTR_IP]) + ip = ip_set_get_h32(tb[IPSET_ATTR_IP]); + else + return -IPSET_ERR_PROTOCOL; - if ((!mask && (map->first_ip || map->last_ip != 0xFFFFFFFF)) - || netmask_bits <= mask_bits) - return -ENOEXEC; + if (ip < map->first_ip || ip > map->last_ip) + return -IPSET_ERR_BITMAP_RANGE; + + if (adt == IPSET_TEST) + return bitmap_ip_timeout_test(map, + ip_to_id((const struct bitmap_ip *)map, ip)); - DP("mask_bits %u, netmask_bits %u", - mask_bits, netmask_bits); - map->hosts = 2 << (32 - netmask_bits - 1); - map->sizeid = 2 << (netmask_bits - mask_bits - 1); + if (tb[IPSET_ATTR_IP_TO]) { + ip_to = ip_set_get_h32(tb[IPSET_ATTR_IP_TO]); + if (ip > ip_to) { + swap(ip, ip_to); + if (ip < map->first_ip) + return -IPSET_ERR_BITMAP_RANGE; + } + } else if (tb[IPSET_ATTR_CIDR]) { + uint8_t cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + + if (cidr > 32) + return -IPSET_ERR_INVALID_CIDR; + ip_to = ip | ~HOSTMASK(cidr); + } else + ip_to = ip; + + if (ip_to > map->last_ip) + return -IPSET_ERR_BITMAP_RANGE; + + if (tb[IPSET_ATTR_TIMEOUT]) { + timeout = ip_set_get_h32(tb[IPSET_ATTR_TIMEOUT]); } - if (map->sizeid > MAX_RANGE + 1) { - ip_set_printk("range too big, %d elements (max %d)", - map->sizeid, MAX_RANGE+1); - return -ENOEXEC; + + for (; !before(ip_to, ip); ip += map->hosts) { + id = ip_to_id((const struct bitmap_ip *)map, ip); + ret = adt == IPSET_ADD + ? bitmap_ip_timeout_add(map, id, timeout) + : bitmap_ip_timeout_del(map, id); + + if (ret && !(ret == -IPSET_ERR_EXIST && eexist)) { + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + return ret; + } } - DP("hosts %u, sizeid %u", map->hosts, map->sizeid); - return bitmap_bytes(0, map->sizeid - 1); + return ret; } -BITMAP_CREATE(ipmap) -BITMAP_DESTROY(ipmap) -BITMAP_FLUSH(ipmap) +static void +bitmap_ip_timeout_destroy(struct ip_set *set) +{ + struct bitmap_ip_timeout *map = set->data; -static inline void -__ipmap_list_header(const struct ip_set_ipmap *map, - struct ip_set_req_ipmap_create *header) + /* gc might be running: del_timer_sync can't be used */ + while (!del_timer(&map->gc)) + msleep(IPSET_DESTROY_TIMER_SLEEP); + + ip_set_free(map->members, set->flags); + kfree(map); + + set->data = NULL; +} + +static void +bitmap_ip_timeout_flush(struct ip_set *set) { - header->netmask = map->netmask; + struct bitmap_ip_timeout *map = set->data; + + memset(map->members, 0, map->memsize); } -BITMAP_LIST_HEADER(ipmap) -BITMAP_LIST_MEMBERS_SIZE(ipmap, ip_set_ip_t, map->sizeid, - test_bit(i, map->members)) +static int +bitmap_ip_timeout_head(struct ip_set *set, struct sk_buff *skb) +{ + const struct bitmap_ip_timeout *map = set->data; + struct nlattr *nested; + uint32_t id, elements; + + for (id = 0, elements = 0; id < map->elements; id++) + if (bitmap_ip_timeout_test(map, id)) + elements++; + + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) + goto nla_put_failure; + NLA_PUT_NET32(skb, IPSET_ATTR_IP, htonl(map->first_ip)); + NLA_PUT_NET32(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip)); + if (map->netmask != 32) + NLA_PUT_U8(skb, IPSET_ATTR_NETMASK, map->netmask); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT , htonl(map->timeout)); + NLA_PUT_NET32(skb, IPSET_ATTR_ELEMENTS, htonl(elements)); + NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, + htonl(atomic_read(&set->ref) - 1)); + NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(map->memsize)); + ipset_nest_end(skb, nested); + + return 0; +nla_put_failure: + return -EFAULT; +} + +static int +bitmap_ip_timeout_list(struct ip_set *set, + struct sk_buff *skb, struct netlink_callback *cb) +{ + const struct bitmap_ip_timeout *map = set->data; + struct nlattr *adt, *nested; + uint32_t id, first = cb->args[2]; + unsigned long *table = map->members; + + adt = ipset_nest_start(skb, IPSET_ATTR_ADT); + if (!adt) + return -EFAULT; + for (; cb->args[2] < map->elements; cb->args[2]++) { + id = cb->args[2]; + if (!bitmap_ip_timeout_test(map, id)) + continue; + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) { + if (id == first) { + nla_nest_cancel(skb, adt); + return -EFAULT; + } else + goto nla_put_failure; + } + NLA_PUT_NET32(skb, IPSET_ATTR_IP, + htonl(map->first_ip + id * map->hosts)); + if (map->netmask != 32) + NLA_PUT_U8(skb, IPSET_ATTR_CIDR, map->netmask); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(table[id]))); + ipset_nest_end(skb, nested); + } + ipset_nest_end(skb, adt); + + /* Set listing finished */ + cb->args[2] = 0; + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nested); + ipset_nest_end(skb, adt); + return 0; +} + +static const struct ip_set_type_variant bitmap_ip_timeout __read_mostly = { + .kadt = bitmap_ip_timeout_kadt, + .uadt = bitmap_ip_timeout_uadt, + .destroy = bitmap_ip_timeout_destroy, + .flush = bitmap_ip_timeout_flush, + .head = bitmap_ip_timeout_head, + .list = bitmap_ip_timeout_list, +}; static void -ipmap_list_members(const struct ip_set *set, void *data, char dont_align) +bitmap_ip_timeout_gc(unsigned long ul_set) { - const struct ip_set_ipmap *map = set->data; - uint32_t i, n = 0; - ip_set_ip_t *d; + struct ip_set *set = (struct ip_set *) ul_set; + struct bitmap_ip_timeout *map = set->data; + unsigned long *table = map->members; + uint32_t id; + + /* We run parallel with other readers (test element) + * but adding/deleting new entries is locked out */ + read_lock_bh(&set->lock); + for (id = 0; id < map->elements; id++) + if (ip_set_timeout_expired(table[id])) + table[id] = IPSET_ELEM_UNSET; + read_unlock_bh(&set->lock); + + map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + add_timer(&map->gc); +} + +static inline void +bitmap_ip_gc_init(struct ip_set *set) +{ + struct bitmap_ip_timeout *map = set->data; + + init_timer(&map->gc); + map->gc.data = (unsigned long) set; + map->gc.function = bitmap_ip_timeout_gc; + map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + add_timer(&map->gc); +} + +/* Create bitmap:ip type of sets */ + +static const struct nla_policy +bitmap_ip_create_policy[IPSET_ATTR_CREATE_MAX+1] __read_mostly = { + [IPSET_ATTR_IP] = { .type = NLA_U32 }, + [IPSET_ATTR_IP_TO] = { .type = NLA_U32 }, + [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, + [IPSET_ATTR_NETMASK] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, +}; + +static bool +init_map_ip(struct ip_set *set, struct bitmap_ip *map, + uint32_t first_ip, uint32_t last_ip, + uint32_t elements, uint32_t hosts, uint8_t netmask) +{ + map->members = ip_set_alloc(map->memsize, GFP_KERNEL, &set->flags); + if (!map->members) + return false; + map->first_ip = first_ip; + map->last_ip = last_ip; + map->elements = elements; + map->hosts = hosts; + map->netmask = netmask; + + set->data = map; + set->family = AF_INET; - if (dont_align) { - memcpy(data, map->members, map->size); - return; + return true; +} + +static int +bitmap_ip_create(struct ip_set *set, struct nlattr *head, int len, + uint32_t flags) +{ + struct nlattr *tb[IPSET_ATTR_CREATE_MAX]; + uint32_t first_ip, last_ip, hosts, elements; + uint8_t netmask = 32; + + if (nla_parse(tb, IPSET_ATTR_CREATE_MAX, head, len, + bitmap_ip_create_policy)) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_IP]) + first_ip = ip_set_get_h32(tb[IPSET_ATTR_IP]); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_IP_TO]) { + last_ip = ip_set_get_h32(tb[IPSET_ATTR_IP_TO]); + if (first_ip > last_ip) { + uint32_t tmp = first_ip; + + first_ip = last_ip; + last_ip = tmp; + } + } else if (tb[IPSET_ATTR_CIDR]) { + uint8_t cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + + if (cidr >= 32) + return -IPSET_ERR_INVALID_CIDR; + last_ip = first_ip | ~HOSTMASK(cidr); + } else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_NETMASK]) { + netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]); + + if (netmask > 32) + return -IPSET_ERR_INVALID_NETMASK; + + first_ip &= HOSTMASK(netmask); + last_ip |= ~HOSTMASK(netmask); } - for (i = 0; i < map->sizeid; i++) - if (test_bit(i, map->members)) { - d = data + n * IPSET_ALIGN(sizeof(ip_set_ip_t)); - *d = map->first_ip + i * map->hosts; - n++; + if (netmask == 32) { + hosts = 1; + elements = last_ip - first_ip + 1; + } else { + uint8_t mask_bits; + uint32_t mask; + + mask = range_to_mask(first_ip, last_ip, &mask_bits); + + if ((!mask && (first_ip || last_ip != 0xFFFFFFFF)) + || netmask <= mask_bits) + return -IPSET_ERR_BITMAP_RANGE; + + D("mask_bits %u, netmask %u", mask_bits, netmask); + hosts = 2 << (32 - netmask - 1); + elements = 2 << (netmask - mask_bits - 1); + } + if (elements > IPSET_BITMAP_MAX_RANGE + 1) { + return -IPSET_ERR_BITMAP_RANGE_SIZE; + } + D("hosts %u, elements %u", hosts, elements); + + if (tb[IPSET_ATTR_TIMEOUT]) { + struct bitmap_ip_timeout *map; + + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (!map) + return -ENOMEM; + + map->memsize = elements * sizeof(unsigned long); + + if (!init_map_ip(set, (struct bitmap_ip *)map, + first_ip, last_ip, + elements, hosts, netmask)) { + kfree(map); + return -ENOMEM; + } + + map->timeout = ip_set_get_h32(tb[IPSET_ATTR_TIMEOUT]); + set->flags |= IP_SET_FLAG_TIMEOUT; + set->variant = &bitmap_ip_timeout; + + bitmap_ip_gc_init(set); + } else { + struct bitmap_ip *map; + + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (!map) + return -ENOMEM; + + map->memsize = bitmap_bytes(0, elements - 1); + + if (!init_map_ip(set, map, + first_ip, last_ip, + elements, hosts, netmask)) { + kfree(map); + return -ENOMEM; } + + set->variant = &bitmap_ip; + } + return 0; } -IP_SET_TYPE(ipmap, IPSET_TYPE_IP | IPSET_DATA_SINGLE) +static struct ip_set_type bitmap_ip_type = { + .name = "bitmap:ip", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_IP, + .family = AF_INET, + .revision = 0, + .create = bitmap_ip_create, + .me = THIS_MODULE, +}; -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); -MODULE_DESCRIPTION("ipmap type of IP sets"); +static int __init +bitmap_ip_init(void) +{ + return ip_set_type_register(&bitmap_ip_type); +} + +static void __exit +bitmap_ip_fini(void) +{ + ip_set_type_unregister(&bitmap_ip_type); +} -REGISTER_MODULE(ipmap) +module_init(bitmap_ip_init); +module_exit(bitmap_ip_fini); diff --git a/kernel/ip_set_bitmap_ipmac.c b/kernel/ip_set_bitmap_ipmac.c index 89e907b..45335dd 100644 --- a/kernel/ip_set_bitmap_ipmac.c +++ b/kernel/ip_set_bitmap_ipmac.c @@ -1,14 +1,14 @@ /* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu> * Patrick Schaaf <bof@bof.de> - * Martin Josefsson <gandalf@wlug.westbo.se> - * Copyright (C) 2003-2008 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> + * Martin Josefsson <gandalf@wlug.westbo.se> + * Copyright (C) 2003-2010 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -/* Kernel module implementing an IP set type: the macipmap type */ +/* Kernel module implementing an IP set type: the bitmap:ip,mac type */ #include <linux/module.h> #include <linux/ip.h> @@ -18,162 +18,528 @@ #include <asm/bitops.h> #include <linux/spinlock.h> #include <linux/if_ether.h> +#include <linux/netlink.h> +#include <linux/delay.h> +#include <linux/jiffies.h> +#include <linux/timer.h> +#include <net/netlink.h> +#include <net/pfxlen.h> -#include <linux/netfilter_ipv4/ip_set_macipmap.h> +#include <linux/netfilter/ip_set.h> +#include <linux/netfilter/ip_set_timeout.h> +#include <linux/netfilter/ip_set_bitmap.h> -static int -macipmap_utest(struct ip_set *set, const void *data, u_int32_t size) -{ - const struct ip_set_macipmap *map = set->data; - const struct ip_set_macip *table = map->members; - const struct ip_set_req_macipmap *req = data; - - if (req->ip < map->first_ip || req->ip > map->last_ip) - return -ERANGE; - - DP("set: %s, ip:%u.%u.%u.%u", set->name, HIPQUAD(req->ip)); - if (table[req->ip - map->first_ip].match) { - return (memcmp(req->ethernet, - &table[req->ip - map->first_ip].ethernet, - ETH_ALEN) == 0); - } else { - return (map->flags & IPSET_MACIP_MATCHUNSET ? 1 : 0); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); +MODULE_DESCRIPTION("bitmap:ip,mac type of IP sets"); +MODULE_ALIAS("ip_set_bitmap:ip,mac"); + +enum { + MAC_EMPTY, /* element is not set */ + MAC_FILLED, /* element is set with MAC */ + MAC_UNSET, /* element is set, without MAC */ +}; + +/* Member element without and with timeout */ + +struct ipmac { + unsigned char ether[ETH_ALEN]; + unsigned char match; +}; + +struct ipmac_timeout { + unsigned char ether[ETH_ALEN]; + unsigned char match; + unsigned long timeout; +}; + +struct bitmap_ipmac { + void *members; /* the set members */ + uint32_t first_ip; /* host byte order, included in range */ + uint32_t last_ip; /* host byte order, included in range */ + uint32_t timeout; /* timeout value */ + struct timer_list gc; /* garbage collector */ + size_t elem_size; /* size of element */ +}; + +static inline void * +bitmap_ipmac_elem(const struct bitmap_ipmac *map, uint32_t id) +{ + return (void *)((char *)map->members + id * map->elem_size); +} + +static inline bool +bitmap_timeout(const struct bitmap_ipmac *map, uint32_t id) +{ + const struct ipmac_timeout *elem = bitmap_ipmac_elem(map, id); + + return ip_set_timeout_test(elem->timeout); +} + +static inline bool +bitmap_expired(const struct bitmap_ipmac *map, uint32_t id) +{ + const struct ipmac_timeout *elem = bitmap_ipmac_elem(map, id); + + return ip_set_timeout_expired(elem->timeout); +} + +static inline int +bitmap_ipmac_exist(const struct ipmac *elem, bool with_timeout) +{ + const struct ipmac_timeout *e = (const struct ipmac_timeout *) elem; + + return elem->match == MAC_UNSET + || (elem->match == MAC_FILLED + && !(with_timeout && ip_set_timeout_expired(e->timeout))); +} + +static inline int +bitmap_ipmac_test(const struct bitmap_ipmac *map, bool with_timeout, + uint32_t id, const unsigned char *ether) +{ + const struct ipmac *elem = bitmap_ipmac_elem(map, id); + + switch (elem->match) { + case MAC_UNSET: + /* Trigger kernel to fill out the ethernet address */ + return -EAGAIN; + case MAC_FILLED: + return (ether == NULL + || memcmp(ether, elem->ether, ETH_ALEN) == 0) + && (!with_timeout || bitmap_timeout(map, id)); } + return 0; } static int -macipmap_ktest(struct ip_set *set, - const struct sk_buff *skb, - const u_int32_t *flags) +bitmap_ipmac_add(struct bitmap_ipmac *map, bool with_timeout, + uint32_t id, const unsigned char *ether, + uint32_t timeout) { - const struct ip_set_macipmap *map = set->data; - const struct ip_set_macip *table = map->members; - ip_set_ip_t ip; - - ip = ipaddr(skb, flags); + struct ipmac *elem = bitmap_ipmac_elem(map, id); + struct ipmac_timeout *e = (struct ipmac_timeout *) elem; - if (ip < map->first_ip || ip > map->last_ip) - return 0; - - DP("set: %s, ip:%u.%u.%u.%u", set->name, HIPQUAD(ip)); - if (table[ip - map->first_ip].match) { - /* Is mac pointer valid? - * If so, compare... */ - return (skb_mac_header(skb) >= skb->head - && (skb_mac_header(skb) + ETH_HLEN) <= skb->data - && (memcmp(eth_hdr(skb)->h_source, - &table[ip - map->first_ip].ethernet, - ETH_ALEN) == 0)); - } else { - return (map->flags & IPSET_MACIP_MATCHUNSET ? 1 : 0); + switch (elem->match) { + case MAC_UNSET: + if (!ether) + /* Already added without ethernet address */ + return -IPSET_ERR_EXIST; + /* Fill the MAC address and activate the timer */ + memcpy(elem->ether, ether, ETH_ALEN); + elem->match = MAC_FILLED; + if (with_timeout) { + if (timeout == map->timeout) + /* Timeout was not specified, get stored one */ + timeout = e->timeout; + e->timeout = ip_set_timeout_set(timeout); + } + break; + case MAC_FILLED: + if (!(with_timeout && bitmap_expired(map, id))) + return -IPSET_ERR_EXIST; + /* Fall through */ + case MAC_EMPTY: + if (ether) { + memcpy(elem->ether, ether, ETH_ALEN); + elem->match = MAC_FILLED; + } else + elem->match = MAC_UNSET; + if (with_timeout) { + /* If MAC is unset yet, we store plain timeout + * because the timer is not activated yet + * and we can reuse it later when MAC is filled out, + * possibly by the kernel */ + e->timeout = ether ? ip_set_timeout_set(timeout) + : timeout; + } + break; } + + return 0; } -/* returns 0 on success */ -static inline int -macipmap_add(struct ip_set *set, - ip_set_ip_t ip, const unsigned char *ethernet) +static int +bitmap_ipmac_del(struct bitmap_ipmac *map, bool with_timeout, + uint32_t id) { - struct ip_set_macipmap *map = set->data; - struct ip_set_macip *table = map->members; + struct ipmac *elem = bitmap_ipmac_elem(map, id); - if (ip < map->first_ip || ip > map->last_ip) - return -ERANGE; - if (table[ip - map->first_ip].match) - return -EEXIST; + if (elem->match == MAC_EMPTY + || (with_timeout && bitmap_expired(map, id))) + return -IPSET_ERR_EXIST; + + elem->match = MAC_EMPTY; - DP("set: %s, ip: %u.%u.%u.%u", set->name, HIPQUAD(ip)); - memcpy(&table[ip - map->first_ip].ethernet, ethernet, ETH_ALEN); - table[ip - map->first_ip].match = IPSET_MACIP_ISSET; return 0; } -#define KADT_CONDITION \ - if (!(skb_mac_header(skb) >= skb->head \ - && (skb_mac_header(skb) + ETH_HLEN) <= skb->data))\ +static int +bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, uint8_t pf, const uint8_t *flags) +{ + struct bitmap_ipmac *map = set->data; + uint32_t ip = ntohl(ip4addr(skb, flags)); + bool with_timeout = set->flags & IP_SET_FLAG_TIMEOUT; + + if (pf != AF_INET) return -EINVAL; -UADT(macipmap, add, req->ethernet) -KADT(macipmap, add, ipaddr, eth_hdr(skb)->h_source) + if (ip < map->first_ip || ip > map->last_ip) + return -IPSET_ERR_BITMAP_RANGE; -static inline int -macipmap_del(struct ip_set *set, ip_set_ip_t ip) + if (skb_mac_header(skb) < skb->head + || (skb_mac_header(skb) + ETH_HLEN) > skb->data) + return -EINVAL; + + ip -= map->first_ip; + + switch (adt) { + case IPSET_TEST: + return bitmap_ipmac_test(map, with_timeout, + ip, eth_hdr(skb)->h_source); + case IPSET_ADD: + return bitmap_ipmac_add(map, with_timeout, + ip, eth_hdr(skb)->h_source, + map->timeout); + case IPSET_DEL: + return bitmap_ipmac_del(map, with_timeout, ip); + default: + return -EINVAL; + } +} + +static const struct nla_policy +bitmap_ipmac_adt_policy[IPSET_ATTR_ADT_MAX + 1] __read_mostly = { + [IPSET_ATTR_IP] = { .type = NLA_U32 }, + [IPSET_ATTR_ETHER] = { .type = NLA_BINARY, .len = ETH_ALEN }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, +}; + +static int +bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *head, int len, + enum ipset_adt adt, uint32_t *lineno, uint32_t flags) { - struct ip_set_macipmap *map = set->data; - struct ip_set_macip *table = map->members; + struct bitmap_ipmac *map = set->data; + struct nlattr *tb[IPSET_ATTR_ADT_MAX]; + bool eexist = flags & IPSET_FLAG_EXIST; + bool with_timeout = set->flags & IP_SET_FLAG_TIMEOUT; + uint32_t ip, timeout = map->timeout; + unsigned char *ether = NULL; + int ret = 0; + + if (nla_parse(tb, IPSET_ATTR_ADT_MAX, head, len, + bitmap_ipmac_adt_policy)) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_IP]) + ip = ip_set_get_h32(tb[IPSET_ATTR_IP]); + else + return -IPSET_ERR_PROTOCOL; if (ip < map->first_ip || ip > map->last_ip) - return -ERANGE; - if (!table[ip - map->first_ip].match) - return -EEXIST; + return -IPSET_ERR_BITMAP_RANGE; - table[ip - map->first_ip].match = 0; - DP("set: %s, ip: %u.%u.%u.%u", set->name, HIPQUAD(ip)); - return 0; + if (tb[IPSET_ATTR_ETHER]) + ether = nla_data(tb[IPSET_ATTR_ETHER]); + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_get_h32(tb[IPSET_ATTR_TIMEOUT]); + } + + ip -= map->first_ip; + + if (adt == IPSET_TEST) + return bitmap_ipmac_test(map, with_timeout, ip, ether); + + ret = adt == IPSET_ADD ? bitmap_ipmac_add(map, with_timeout, + ip, ether, timeout) + : bitmap_ipmac_del(map, with_timeout, ip); + + if (ret && !(ret == -IPSET_ERR_EXIST && eexist)) { + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + return ret; + } + return ret; } -#undef KADT_CONDITION -#define KADT_CONDITION +static void +bitmap_ipmac_destroy(struct ip_set *set) +{ + struct bitmap_ipmac *map = set->data; -UADT(macipmap, del) -KADT(macipmap, del, ipaddr) + /* gc might be running: del_timer_sync can't be used */ + if (set->flags & IP_SET_FLAG_TIMEOUT) + while (!del_timer(&map->gc)) + msleep(IPSET_DESTROY_TIMER_SLEEP); + + ip_set_free(map->members, set->flags); + kfree(map); + + set->data = NULL; +} -static inline int -__macipmap_create(const struct ip_set_req_macipmap_create *req, - struct ip_set_macipmap *map) +static void +bitmap_ipmac_flush(struct ip_set *set) +{ + struct bitmap_ipmac *map = set->data; + + memset(map->members, 0, + (map->last_ip - map->first_ip + 1) * map->elem_size); +} + +static int +bitmap_ipmac_head(struct ip_set *set, struct sk_buff *skb) { - if (req->to - req->from > MAX_RANGE) { - ip_set_printk("range too big, %d elements (max %d)", - req->to - req->from + 1, MAX_RANGE+1); - return -ENOEXEC; + const struct bitmap_ipmac *map = set->data; + struct nlattr *nested; + const struct ipmac *elem; + uint32_t id, elements = 0, last = map->last_ip - map->first_ip; + bool with_timeout = set->flags & IP_SET_FLAG_TIMEOUT; + + for (id = 0; id <= last; id++) { + elem = bitmap_ipmac_elem(map, id); + if (bitmap_ipmac_exist(elem, with_timeout)) + elements++; } - map->flags = req->flags; - return (req->to - req->from + 1) * sizeof(struct ip_set_macip); + + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) + goto nla_put_failure; + NLA_PUT_NET32(skb, IPSET_ATTR_IP, htonl(map->first_ip)); + NLA_PUT_NET32(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip)); + NLA_PUT_NET32(skb, IPSET_ATTR_ELEMENTS, htonl(elements)); + NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, + htonl(atomic_read(&set->ref) - 1)); + NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, + htonl((map->last_ip - map->first_ip + 1) + * map->elem_size)); + if (with_timeout) + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout)); + ipset_nest_end(skb, nested); + + return 0; +nla_put_failure: + return -EFAULT; } -BITMAP_CREATE(macipmap) -BITMAP_DESTROY(macipmap) -BITMAP_FLUSH(macipmap) +static int +bitmap_ipmac_list(struct ip_set *set, + struct sk_buff *skb, struct netlink_callback *cb) +{ + const struct bitmap_ipmac *map = set->data; + const struct ipmac *elem; + struct nlattr *atd, *nested; + uint32_t id, first = cb->args[2]; + uint32_t last = map->last_ip - map->first_ip; + bool with_timeout = set->flags & IP_SET_FLAG_TIMEOUT; + + atd = ipset_nest_start(skb, IPSET_ATTR_ADT); + if (!atd) + return -EFAULT; + for (; cb->args[2] <= last; cb->args[2]++) { + id = cb->args[2]; + elem = bitmap_ipmac_elem(map, id); + if (!bitmap_ipmac_exist(elem, with_timeout)) + continue; + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) { + if (id == first) { + nla_nest_cancel(skb, atd); + return -EFAULT; + } else + goto nla_put_failure; + } + NLA_PUT_NET32(skb, IPSET_ATTR_IP, + htonl(map->first_ip + id)); + if (elem->match == MAC_FILLED) + NLA_PUT(skb, IPSET_ATTR_ETHER, ETH_ALEN, + elem->ether); + if (with_timeout) { + const struct ipmac_timeout *e = + (const struct ipmac_timeout *)elem; + uint32_t timeout = e->match == MAC_UNSET ? e->timeout + : ip_set_timeout_get(e->timeout); + + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(timeout)); + } + ipset_nest_end(skb, nested); + } + ipset_nest_end(skb, atd); + /* Set listing finished */ + cb->args[2] = 0; + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nested); + ipset_nest_end(skb, atd); + return 0; +} + +const struct ip_set_type_variant bitmap_ipmac __read_mostly = { + .kadt = bitmap_ipmac_kadt, + .uadt = bitmap_ipmac_uadt, + .destroy = bitmap_ipmac_destroy, + .flush = bitmap_ipmac_flush, + .head = bitmap_ipmac_head, + .list = bitmap_ipmac_list, +}; + +static void +bitmap_ipmac_timeout_gc(unsigned long ul_set) +{ + struct ip_set *set = (struct ip_set *) ul_set; + struct bitmap_ipmac *map = set->data; + struct ipmac_timeout *elem; + uint32_t id, last = map->last_ip - map->first_ip; + + /* We run parallel with other readers (test element) + * but adding/deleting new entries is locked out */ + read_lock_bh(&set->lock); + for (id = 0; id <= last; id++) { + elem = bitmap_ipmac_elem(map, id); + if (elem->match == MAC_FILLED + && ip_set_timeout_expired(elem->timeout)) + elem->match = MAC_EMPTY; + } + read_unlock_bh(&set->lock); + + map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + add_timer(&map->gc); +} static inline void -__macipmap_list_header(const struct ip_set_macipmap *map, - struct ip_set_req_macipmap_create *header) +bitmap_ipmac_timeout_gc_init(struct ip_set *set) { - header->flags = map->flags; + struct bitmap_ipmac *map = set->data; + + init_timer(&map->gc); + map->gc.data = (unsigned long) set; + map->gc.function = bitmap_ipmac_timeout_gc; + map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + add_timer(&map->gc); } -BITMAP_LIST_HEADER(macipmap) -BITMAP_LIST_MEMBERS_SIZE(macipmap, struct ip_set_req_macipmap, - (map->last_ip - map->first_ip + 1), - ((const struct ip_set_macip *)map->members)[i].match) +/* Create bitmap:ip,mac type of sets */ +static const struct nla_policy +bitmap_ipmac_create_policy[IPSET_ATTR_CREATE_MAX+1] __read_mostly = { + [IPSET_ATTR_IP] = { .type = NLA_U32 }, + [IPSET_ATTR_IP_TO] = { .type = NLA_U32 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, +}; -static void -macipmap_list_members(const struct ip_set *set, void *data, char dont_align) +static bool +init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map, + uint32_t first_ip, uint32_t last_ip) { - const struct ip_set_macipmap *map = set->data; - const struct ip_set_macip *table = map->members; - uint32_t i, n = 0; - struct ip_set_req_macipmap *d; + map->members = ip_set_alloc((last_ip - first_ip + 1) * map->elem_size, + GFP_KERNEL, &set->flags); + if (!map->members) + return false; + map->first_ip = first_ip; + map->last_ip = last_ip; + + set->data = map; + set->family = AF_INET; - if (dont_align) { - memcpy(data, map->members, map->size); - return; - } + return true; +} + +static int +bitmap_ipmac_create(struct ip_set *set, struct nlattr *head, int len, + uint32_t flags) +{ + struct nlattr *tb[IPSET_ATTR_CREATE_MAX]; + uint32_t first_ip, last_ip, elements; + struct bitmap_ipmac *map; + + if (nla_parse(tb, IPSET_ATTR_CREATE_MAX, head, len, + bitmap_ipmac_create_policy)) + return -IPSET_ERR_PROTOCOL; - for (i = 0; i < map->last_ip - map->first_ip + 1; i++) - if (table[i].match) { - d = data + n * IPSET_ALIGN(sizeof(struct ip_set_req_macipmap)); - d->ip = map->first_ip + i; - memcpy(d->ethernet, &table[i].ethernet, ETH_ALEN); - n++; + if (tb[IPSET_ATTR_IP]) + first_ip = ip_set_get_h32(tb[IPSET_ATTR_IP]); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_IP_TO]) { + last_ip = ip_set_get_h32(tb[IPSET_ATTR_IP_TO]); + if (first_ip > last_ip) { + uint32_t tmp = first_ip; + + first_ip = last_ip; + last_ip = tmp; } + } else if (tb[IPSET_ATTR_CIDR]) { + uint8_t cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + + if (cidr >= 32) + return -IPSET_ERR_INVALID_CIDR; + last_ip = first_ip | ~HOSTMASK(cidr); + } else + return -IPSET_ERR_PROTOCOL; + + elements = last_ip - first_ip + 1; + + if (elements > IPSET_BITMAP_MAX_RANGE + 1) + return -IPSET_ERR_BITMAP_RANGE_SIZE; + + set->variant = &bitmap_ipmac; + + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (!map) + return -ENOMEM; + + if (tb[IPSET_ATTR_TIMEOUT]) { + map->elem_size = sizeof(struct ipmac_timeout); + + if (!init_map_ipmac(set, map, first_ip, last_ip)) { + kfree(map); + return -ENOMEM; + } + + map->timeout = ip_set_get_h32(tb[IPSET_ATTR_TIMEOUT]); + set->flags |= IP_SET_FLAG_TIMEOUT; + + bitmap_ipmac_timeout_gc_init(set); + } else { + map->elem_size = sizeof(struct ipmac); + + if (!init_map_ipmac(set, map, first_ip, last_ip)) { + kfree(map); + return -ENOMEM; + } + } + return 0; } -IP_SET_TYPE(macipmap, IPSET_TYPE_IP | IPSET_DATA_SINGLE) +struct ip_set_type bitmap_ipmac_type = { + .name = "bitmap:ip,mac", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_IP, + .family = AF_INET, + .revision = 0, + .create = bitmap_ipmac_create, + .me = THIS_MODULE, +}; -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); -MODULE_DESCRIPTION("macipmap type of IP sets"); +static int __init +bitmap_ipmac_init(void) +{ + return ip_set_type_register(&bitmap_ipmac_type); +} + +static void __exit +bitmap_ipmac_fini(void) +{ + ip_set_type_unregister(&bitmap_ipmac_type); +} -REGISTER_MODULE(macipmap) +module_init(bitmap_ipmac_init); +module_exit(bitmap_ipmac_fini); diff --git a/kernel/ip_set_bitmap_port.c b/kernel/ip_set_bitmap_port.c index 8bb6e76..3afd031 100644 --- a/kernel/ip_set_bitmap_port.c +++ b/kernel/ip_set_bitmap_port.c @@ -1,11 +1,11 @@ -/* Copyright (C) 2003-2008 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> +/* Copyright (C) 2003-2010 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -/* Kernel module implementing a port set type as a bitmap */ +/* Kernel module implementing an IP set type: the bitmap:port type */ #include <linux/module.h> #include <linux/ip.h> @@ -16,115 +16,629 @@ #include <asm/uaccess.h> #include <asm/bitops.h> #include <linux/spinlock.h> +#include <linux/netlink.h> +#include <linux/delay.h> +#include <linux/jiffies.h> +#include <linux/timer.h> +#include <net/netlink.h> +#include <net/pfxlen.h> -#include <net/ip.h> +#include <linux/netfilter/ip_set.h> +#include <linux/netfilter/ip_set_bitmap.h> +#include <linux/netfilter/ip_set_getport.h> +#define IP_SET_BITMAP_TIMEOUT +#include <linux/netfilter/ip_set_timeout.h> -#include <linux/netfilter_ipv4/ip_set_portmap.h> -#include <linux/netfilter_ipv4/ip_set_getport.h> +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); +MODULE_DESCRIPTION("bitmap:port type of IP sets"); +MODULE_ALIAS("ip_set_bitmap:port"); + +/* Base variant */ + +struct bitmap_port { + void *members; /* the set members */ + uint16_t first_port; /* host byte order, included in range */ + uint16_t last_port; /* host byte order, included in range */ + size_t memsize; /* members size */ +}; static inline int -portmap_test(const struct ip_set *set, ip_set_ip_t port) +bitmap_port_test(const struct bitmap_port *map, uint16_t id) { - const struct ip_set_portmap *map = set->data; + return !!test_bit(id, map->members); +} - if (port < map->first_ip || port > map->last_ip) - return -ERANGE; - - DP("set: %s, port: %u", set->name, port); - return !!test_bit(port - map->first_ip, map->members); +static inline int +bitmap_port_add(struct bitmap_port *map, uint16_t id) +{ + if (test_and_set_bit(id, map->members)) + return -IPSET_ERR_EXIST; + + return 0; } -#define KADT_CONDITION \ - if (ip == INVALID_PORT) \ - return 0; +static int +bitmap_port_del(struct bitmap_port *map, uint16_t id) +{ + if (!test_and_clear_bit(id, map->members)) + return -IPSET_ERR_EXIST; -UADT(portmap, test) -KADT(portmap, test, get_port) + return 0; +} -static inline int -portmap_add(struct ip_set *set, ip_set_ip_t port) +static int +bitmap_port_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, uint8_t pf, const uint8_t *flags) { - struct ip_set_portmap *map = set->data; + struct bitmap_port *map = set->data; + uint32_t port = get_port(pf, skb, flags); + + if (port == IPSET_INVALID_PORT) + return 0; + + port = ntohs(port); - if (port < map->first_ip || port > map->last_ip) - return -ERANGE; - if (test_and_set_bit(port - map->first_ip, map->members)) - return -EEXIST; + if (port < map->first_port || port > map->last_port) + return -IPSET_ERR_BITMAP_RANGE; - DP("set: %s, port %u", set->name, port); - return 0; + port -= map->first_port; + + switch (adt) { + case IPSET_TEST: + return bitmap_port_test(map, port); + case IPSET_ADD: + return bitmap_port_add(map, port); + case IPSET_DEL: + return bitmap_port_del(map, port); + default: + return -EINVAL; + } } -UADT(portmap, add) -KADT(portmap, add, get_port) +static const struct nla_policy +bitmap_port_adt_policy[IPSET_ATTR_ADT_MAX+1] __read_mostly = { + [IPSET_ATTR_PORT] = { .type = NLA_U16 }, + [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, +}; -static inline int -portmap_del(struct ip_set *set, ip_set_ip_t port) +static int +bitmap_port_uadt(struct ip_set *set, struct nlattr *head, int len, + enum ipset_adt adt, uint32_t *lineno, uint32_t flags) { - struct ip_set_portmap *map = set->data; + struct bitmap_port *map = set->data; + struct nlattr *tb[IPSET_ATTR_ADT_MAX]; + bool eexist = flags & IPSET_FLAG_EXIST; + uint32_t port; + uint16_t id, port_to; + int ret = 0; + + if (nla_parse(tb, IPSET_ATTR_ADT_MAX, head, len, + bitmap_port_adt_policy)) + return -IPSET_ERR_PROTOCOL; - if (port < map->first_ip || port > map->last_ip) - return -ERANGE; - if (!test_and_clear_bit(port - map->first_ip, map->members)) - return -EEXIST; + if (tb[IPSET_ATTR_PORT]) + port = ip_set_get_h16(tb[IPSET_ATTR_PORT]); + else + return -IPSET_ERR_PROTOCOL; - DP("set: %s, port %u", set->name, port); - return 0; + if (port < map->first_port || port > map->last_port) + return -IPSET_ERR_BITMAP_RANGE; + + if (tb[IPSET_ATTR_TIMEOUT]) + return -IPSET_ERR_TIMEOUT; + + if (adt == IPSET_TEST) + return bitmap_port_test(map, port - map->first_port); + + if (tb[IPSET_ATTR_PORT_TO]) { + port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); + if (port > port_to) { + swap(port, port_to); + if (port < map->first_port) + return -IPSET_ERR_BITMAP_RANGE; + } + } else + port_to = port; + + if (port_to > map->last_port) + return -IPSET_ERR_BITMAP_RANGE; + + for (; port <= port_to; port++) { + id = port - map->first_port; + ret = adt == IPSET_ADD ? bitmap_port_add(map, id) + : bitmap_port_del(map, id); + + if (ret && !(ret == -IPSET_ERR_EXIST && eexist)) { + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + return ret; + } + } + return ret; } -UADT(portmap, del) -KADT(portmap, del, get_port) +static void +bitmap_port_destroy(struct ip_set *set) +{ + struct bitmap_port *map = set->data; + + ip_set_free(map->members, set->flags); + kfree(map); + + set->data = NULL; +} -static inline int -__portmap_create(const struct ip_set_req_portmap_create *req, - struct ip_set_portmap *map) +static void +bitmap_port_flush(struct ip_set *set) { - if (req->to - req->from > MAX_RANGE) { - ip_set_printk("range too big, %d elements (max %d)", - req->to - req->from + 1, MAX_RANGE+1); - return -ENOEXEC; + struct bitmap_port *map = set->data; + + memset(map->members, 0, map->memsize); +} + +static int +bitmap_port_head(struct ip_set *set, struct sk_buff *skb) +{ + struct bitmap_port *map = set->data; + struct nlattr *nested; + uint32_t id; + uint16_t elements, last = map->last_port - map->first_port; + + for (id = 0, elements = 0; id <= last; id++) + if (test_bit(id, map->members)) + elements++; + + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) + goto nla_put_failure; + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, htons(map->first_port)); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT_TO, htons(map->last_port)); + NLA_PUT_NET32(skb, IPSET_ATTR_ELEMENTS, htonl(elements)); + NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, + htonl(atomic_read(&set->ref) - 1)); + NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(map->memsize)); + ipset_nest_end(skb, nested); + + return 0; +nla_put_failure: + return -EFAULT; +} + +static int +bitmap_port_list(struct ip_set *set, + struct sk_buff *skb, struct netlink_callback *cb) +{ + struct bitmap_port *map = set->data; + struct nlattr *atd, *nested; + uint16_t id, first = cb->args[2]; + uint16_t last = map->last_port - map->first_port; + + atd = ipset_nest_start(skb, IPSET_ATTR_ADT); + if (!atd) + return -EFAULT; + for (; cb->args[2] <= last; cb->args[2]++) { + id = cb->args[2]; + if (!test_bit(id, map->members)) + continue; + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) { + if (id == first) { + nla_nest_cancel(skb, atd); + return -EFAULT; + } else + goto nla_put_failure; + } + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, + htons(map->first_port + id)); + ipset_nest_end(skb, nested); } - return bitmap_bytes(req->from, req->to); + ipset_nest_end(skb, atd); + /* Set listing finished */ + cb->args[2] = 0; + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nested); + ipset_nest_end(skb, atd); + return 0; } -BITMAP_CREATE(portmap) -BITMAP_DESTROY(portmap) -BITMAP_FLUSH(portmap) +const struct ip_set_type_variant bitmap_port __read_mostly = { + .kadt = bitmap_port_kadt, + .uadt = bitmap_port_uadt, + .destroy = bitmap_port_destroy, + .flush = bitmap_port_flush, + .head = bitmap_port_head, + .list = bitmap_port_list, +}; -static inline void -__portmap_list_header(const struct ip_set_portmap *map, - struct ip_set_req_portmap_create *header) +/* Timeout variant */ + +struct bitmap_port_timeout { + void *members; /* the set members */ + uint16_t first_port; /* host byte order, included in range */ + uint16_t last_port; /* host byte order, included in range */ + size_t memsize; /* members size */ + + uint32_t timeout; /* timeout parameter */ + struct timer_list gc; /* garbage collection */ +}; + +static inline bool +bitmap_port_timeout_test(const struct bitmap_port_timeout *map, uint16_t id) +{ + unsigned long *timeout = map->members; + + return ip_set_timeout_test(timeout[id]); +} + +static int +bitmap_port_timeout_add(const struct bitmap_port_timeout *map, + uint16_t id, uint32_t timeout) +{ + unsigned long *table = map->members; + + if (bitmap_port_timeout_test(map, id)) + return -IPSET_ERR_EXIST; + + table[id] = ip_set_timeout_set(timeout); + + return 0; +} + +static int +bitmap_port_timeout_del(const struct bitmap_port_timeout *map, + uint16_t id) +{ + unsigned long *table = map->members; + int ret = -IPSET_ERR_EXIST; + + if (bitmap_port_timeout_test(map, id)) + ret = 0; + + table[id] = IPSET_ELEM_UNSET; + return ret; +} + +static int +bitmap_port_timeout_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, uint8_t pf, const uint8_t *flags) +{ + struct bitmap_port_timeout *map = set->data; + uint32_t port = get_port(pf, skb, flags); + + if (port == IPSET_INVALID_PORT) + return 0; + + port = ntohs(port); + + if (port < map->first_port || port > map->last_port) + return -IPSET_ERR_BITMAP_RANGE; + + port -= map->first_port; + + switch (adt) { + case IPSET_TEST: + return bitmap_port_timeout_test(map, port); + case IPSET_ADD: + return bitmap_port_timeout_add(map, port, map->timeout); + case IPSET_DEL: + return bitmap_port_timeout_del(map, port); + default: + return -EINVAL; + } +} + +static int +bitmap_port_timeout_uadt(struct ip_set *set, struct nlattr *head, int len, + enum ipset_adt adt, uint32_t *lineno, uint32_t flags) { + const struct bitmap_port_timeout *map = set->data; + struct nlattr *tb[IPSET_ATTR_ADT_MAX]; + bool eexist = flags & IPSET_FLAG_EXIST; + uint16_t port_to, id; + uint32_t port, timeout = map->timeout; + int ret = 0; + + if (nla_parse(tb, IPSET_ATTR_ADT_MAX, head, len, + bitmap_port_adt_policy)) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_PORT]) + port = ip_set_get_h16(tb[IPSET_ATTR_PORT]); + else + return -IPSET_ERR_PROTOCOL; + + if (port < map->first_port || port > map->last_port) + return -IPSET_ERR_BITMAP_RANGE; + + if (adt == IPSET_TEST) + return bitmap_port_timeout_test(map, port - map->first_port); + + if (tb[IPSET_ATTR_PORT_TO]) { + port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); + if (port > port_to) { + swap(port, port_to); + if (port < map->first_port) + return -IPSET_ERR_BITMAP_RANGE; + } + } else + port_to = port; + + if (port_to > map->last_port) + return -IPSET_ERR_BITMAP_RANGE; + + if (tb[IPSET_ATTR_TIMEOUT]) + timeout = ip_set_get_h32(tb[IPSET_ATTR_TIMEOUT]); + + for (; port <= port_to; port++) { + id = port - map->first_port; + ret = adt == IPSET_ADD + ? bitmap_port_timeout_add(map, id, timeout) + : bitmap_port_timeout_del(map, id); + + if (ret && !(ret == -IPSET_ERR_EXIST && eexist)) { + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + return ret; + } + } + return ret; } -BITMAP_LIST_HEADER(portmap) -BITMAP_LIST_MEMBERS_SIZE(portmap, ip_set_ip_t, (map->last_ip - map->first_ip + 1), - test_bit(i, map->members)) +static void +bitmap_port_timeout_destroy(struct ip_set *set) +{ + struct bitmap_port_timeout *map = set->data; + + /* gc might be running: del_timer_sync can't be used */ + while (!del_timer(&map->gc)) + msleep(IPSET_DESTROY_TIMER_SLEEP); + + ip_set_free(map->members, set->flags); + kfree(map); + + set->data = NULL; +} static void -portmap_list_members(const struct ip_set *set, void *data, char dont_align) +bitmap_port_timeout_flush(struct ip_set *set) { - const struct ip_set_portmap *map = set->data; - uint32_t i, n = 0; - ip_set_ip_t *d; + struct bitmap_port_timeout *map = set->data; - if (dont_align) { - memcpy(data, map->members, map->size); - return; + memset(map->members, 0, map->memsize); +} + +static int +bitmap_port_timeout_head(struct ip_set *set, struct sk_buff *skb) +{ + struct bitmap_port_timeout *map = set->data; + struct nlattr *nested; + uint32_t id; + uint16_t elements, last = map->last_port - map->first_port; + + for (id = 0, elements = 0; id <= last; id++) + if (bitmap_port_timeout_test(map, id)) + elements++; + + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) + goto nla_put_failure; + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, htons(map->first_port)); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT_TO, htons(map->last_port)); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT , htonl(map->timeout)); + NLA_PUT_NET32(skb, IPSET_ATTR_ELEMENTS, htonl(elements)); + NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, + htonl(atomic_read(&set->ref) - 1)); + NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(map->memsize)); + ipset_nest_end(skb, nested); + + return 0; +nla_put_failure: + return -EFAULT; +} + +static int +bitmap_port_timeout_list(struct ip_set *set, + struct sk_buff *skb, struct netlink_callback *cb) +{ + struct bitmap_port_timeout *map = set->data; + struct nlattr *adt, *nested; + uint16_t id, first = cb->args[2]; + uint16_t last = map->last_port - map->first_port; + unsigned long *table = map->members; + + adt = ipset_nest_start(skb, IPSET_ATTR_ADT); + if (!adt) + return -EFAULT; + for (; cb->args[2] <= last; cb->args[2]++) { + id = cb->args[2]; + if (!bitmap_port_timeout_test(map, id)) + continue; + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) { + if (id == first) { + nla_nest_cancel(skb, adt); + return -EFAULT; + } else + goto nla_put_failure; + } + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, + htons(map->first_port + id)); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(table[id]))); + ipset_nest_end(skb, nested); } + ipset_nest_end(skb, adt); + + /* Set listing finished */ + cb->args[2] = 0; - for (i = 0; i < map->last_ip - map->first_ip + 1; i++) - if (test_bit(i, map->members)) { - d = data + n * IPSET_ALIGN(sizeof(ip_set_ip_t)); - *d = map->first_ip + i; - n++; + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nested); + ipset_nest_end(skb, adt); + return 0; +} + +const struct ip_set_type_variant bitmap_port_timeout __read_mostly = { + .kadt = bitmap_port_timeout_kadt, + .uadt = bitmap_port_timeout_uadt, + .destroy = bitmap_port_timeout_destroy, + .flush = bitmap_port_timeout_flush, + .head = bitmap_port_timeout_head, + .list = bitmap_port_timeout_list, +}; + +static void +bitmap_port_timeout_gc(unsigned long ul_set) +{ + struct ip_set *set = (struct ip_set *) ul_set; + struct bitmap_port_timeout *map = set->data; + unsigned long *table = map->members; + uint16_t id, last = map->last_port - map->first_port; + + /* We run parallel with other readers (test element) + * but adding/deleting new entries is locked out */ + read_lock_bh(&set->lock); + for (id = 0; id <= last; id++) + if (ip_set_timeout_expired(table[id])) + table[id] = IPSET_ELEM_UNSET; + read_unlock_bh(&set->lock); + + map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + add_timer(&map->gc); +} + +static inline void +bitmap_port_timeout_gc_init(struct ip_set *set) +{ + struct bitmap_port_timeout *map = set->data; + + init_timer(&map->gc); + map->gc.data = (unsigned long) set; + map->gc.function = bitmap_port_timeout_gc; + map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + add_timer(&map->gc); +} + +/* Create bitmap:ip type of sets */ + +static const struct nla_policy +bitmap_port_create_policy[IPSET_ATTR_CREATE_MAX+1] __read_mostly = { + [IPSET_ATTR_PORT] = { .type = NLA_U16 }, + [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, +}; + +static bool +init_map_port(struct ip_set *set, struct bitmap_port *map, + uint16_t first_port, uint16_t last_port) +{ + map->members = ip_set_alloc(map->memsize, GFP_KERNEL, &set->flags); + if (!map->members) + return false; + map->first_port = first_port; + map->last_port = last_port; + + set->data = map; + set->family = AF_UNSPEC; + + return true; +} + +static int +bitmap_port_create(struct ip_set *set, struct nlattr *head, int len, + uint32_t flags) +{ + struct nlattr *tb[IPSET_ATTR_CREATE_MAX]; + uint16_t first_port, last_port; + + if (nla_parse(tb, IPSET_ATTR_CREATE_MAX, head, len, + bitmap_port_create_policy)) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_PORT]) + first_port = ip_set_get_h16(tb[IPSET_ATTR_PORT]); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_PORT_TO]) { + last_port = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); + if (first_port > last_port) { + uint16_t tmp = first_port; + + first_port = last_port; + last_port = tmp; + } + } else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_TIMEOUT]) { + struct bitmap_port_timeout *map; + + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (!map) + return -ENOMEM; + + map->memsize = (last_port - first_port + 1) + * sizeof(unsigned long); + + if (!init_map_port(set, (struct bitmap_port *) map, + first_port, last_port)) { + kfree(map); + return -ENOMEM; + } + + map->timeout = ip_set_get_h32(tb[IPSET_ATTR_TIMEOUT]); + set->flags |= IP_SET_FLAG_TIMEOUT; + set->variant = &bitmap_port_timeout; + + bitmap_port_timeout_gc_init(set); + } else { + struct bitmap_port *map; + + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (!map) + return -ENOMEM; + + map->memsize = bitmap_bytes(0, last_port - first_port); + D("memsize: %zu", map->memsize); + if (!init_map_port(set, map, first_port, last_port)) { + kfree(map); + return -ENOMEM; } + + set->variant = &bitmap_port; + } + return 0; } -IP_SET_TYPE(portmap, IPSET_TYPE_PORT | IPSET_DATA_SINGLE) +struct ip_set_type bitmap_port_type = { + .name = "bitmap:port", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_PORT, + .family = AF_UNSPEC, + .revision = 0, + .create = bitmap_port_create, + .me = THIS_MODULE, +}; -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); -MODULE_DESCRIPTION("portmap type of IP sets"); +static int __init +bitmap_port_init(void) +{ + return ip_set_type_register(&bitmap_port_type); +} + +static void __exit +bitmap_port_fini(void) +{ + ip_set_type_unregister(&bitmap_port_type); +} -REGISTER_MODULE(portmap) +module_init(bitmap_port_init); +module_exit(bitmap_port_fini); diff --git a/kernel/ip_set_hash_ip.c b/kernel/ip_set_hash_ip.c index 1accbe3..d99c99b 100644 --- a/kernel/ip_set_hash_ip.c +++ b/kernel/ip_set_hash_ip.c @@ -1,164 +1,512 @@ -/* Copyright (C) 2003-2008 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> +/* Copyright (C) 2003-2010 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -/* Kernel module implementing an ip hash set */ +/* Kernel module implementing an IP set type: the hash:ip type */ #include <linux/module.h> -#include <linux/moduleparam.h> #include <linux/ip.h> #include <linux/skbuff.h> -#include <linux/netfilter_ipv4/ip_set_jhash.h> #include <linux/errno.h> #include <asm/uaccess.h> #include <asm/bitops.h> #include <linux/spinlock.h> #include <linux/random.h> - #include <net/ip.h> +#include <net/ipv6.h> +#include <net/netlink.h> +#include <net/pfxlen.h> + +#include <linux/netfilter.h> +#include <linux/netfilter/ip_set.h> +#include <linux/netfilter/ip_set_timeout.h> +#include <linux/netfilter/ip_set_hash.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); +MODULE_DESCRIPTION("hash:ip type of IP sets"); +MODULE_ALIAS("ip_set_hash:ip"); + +/* Member elements without timeout */ +struct ip4_elem { + uint32_t ip; +}; + +struct ip6_elem { + union nf_inet_addr ip; +}; + +/* Member elements with timeout support */ +struct ip4_elem_timeout { + uint32_t ip; + unsigned long timeout; +}; -#include <linux/netfilter_ipv4/ip_set_iphash.h> +struct ip6_elem_timeout { + union nf_inet_addr ip; + unsigned long timeout; +}; -static int limit = MAX_RANGE; +/* The hash:ip type structure */ +struct hash_ip { + void *members; /* the set members */ + uint32_t hashsize; /* hash size */ + uint32_t maxelem; /* max number of elements/hashsize */ + uint8_t probes; /* max number of probes */ + uint8_t resize; /* resize factor in percent */ + uint8_t netmask; /* netmask */ + uint32_t timeout; /* timeout value */ + uint32_t elements; /* number of elements */ + struct timer_list gc; /* garbage collector */ + size_t elem_size; /* size of element */ + initval_t initval[0]; /* initvals for jhash_1word */ +}; -static inline __u32 -iphash_id(struct ip_set *set, ip_set_ip_t ip) +static inline void * +hash_ip_elem(const struct hash_ip *map, uint32_t id) { - struct ip_set_iphash *map = set->data; - __u32 id; - u_int16_t i; - ip_set_ip_t *elem; + return (void *)((char *)map->members + id * map->elem_size); +} +static inline unsigned long +get_ip4_elem_timeout(const struct ip4_elem *elem) +{ + return ((const struct ip4_elem_timeout *)elem)->timeout; +} - ip &= map->netmask; - DP("set: %s, ip:%u.%u.%u.%u", set->name, HIPQUAD(ip)); - for (i = 0; i < map->probes; i++) { - id = jhash_ip(map, i, ip) % map->hashsize; - DP("hash key: %u", id); - elem = HARRAY_ELEM(map->members, ip_set_ip_t *, id); - if (*elem == ip) - return id; - /* No shortcut - there can be deleted entries. */ - } - return UINT_MAX; +static inline unsigned long +get_ip6_elem_timeout(const struct ip6_elem *elem) +{ + return ((const struct ip6_elem_timeout *)elem)->timeout; +} + +static inline uint32_t +ip4_hash(struct ip4_elem *elem, initval_t initval, uint32_t hashsize) +{ + return jhash_1word(elem->ip, initval) % hashsize; +} + +static inline uint32_t +ip6_hash(struct ip6_elem *elem, initval_t initval, uint32_t hashsize) +{ + return jhash2((u32 *)&elem->ip, 4, initval) % hashsize; +} + +static inline bool +ip4_cmp(struct ip4_elem *ip1, struct ip4_elem *ip2) +{ + return ip1->ip == ip2->ip; +} + +static inline bool +ip6_cmp(struct ip6_elem *ip1, struct ip6_elem *ip2) +{ + return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6); +} + +static inline bool +ip4_null(struct ip4_elem *elem) +{ + return elem->ip == 0; +} + +static inline bool +ip6_null(struct ip6_elem *elem) +{ + return ipv6_addr_any(&elem->ip.in6); } -static inline int -iphash_test(struct ip_set *set, ip_set_ip_t ip) +static inline void +ip4_cpy(struct ip4_elem *dst, const struct ip4_elem *src) { - return (ip && iphash_id(set, ip) != UINT_MAX); + dst->ip = src->ip; } -#define KADT_CONDITION +static inline void +ip6_cpy(struct ip6_elem *dst, const struct ip6_elem *src) +{ + ipv6_addr_copy(&dst->ip.in6, &src->ip.in6); +} -UADT(iphash, test) -KADT(iphash, test, ipaddr) +/* Zero valued IP addresses (network order) cannot be stored */ +static inline void +ip4_zero_out(struct ip4_elem *elem) +{ + elem->ip = 0; +} -static inline int -__iphash_add(struct ip_set_iphash *map, ip_set_ip_t *ip) +static inline void +ip6_zero_out(struct ip6_elem *elem) { - __u32 probe; - u_int16_t i; - ip_set_ip_t *elem, *slot = NULL; + ipv6_addr_set(&elem->ip.in6, 0, 0, 0, 0); +} + +static inline void +ip6_netmask(union nf_inet_addr *ip, uint8_t prefix) +{ + ip->ip6[0] &= NETMASK6(prefix)[0]; + ip->ip6[1] &= NETMASK6(prefix)[1]; + ip->ip6[2] &= NETMASK6(prefix)[2]; + ip->ip6[3] &= NETMASK6(prefix)[3]; +} + +/* The type variant functions: generic ones */ + +static void +hash_ip_destroy(struct ip_set *set) +{ + struct hash_ip *map = set->data; + + /* gc might be running: del_timer_sync can't be used */ + if (set->flags & IP_SET_FLAG_TIMEOUT) + while (!del_timer(&map->gc)) + msleep(IPSET_DESTROY_TIMER_SLEEP); + + ip_set_free(map->members, set->flags); + kfree(map); - for (i = 0; i < map->probes; i++) { - probe = jhash_ip(map, i, *ip) % map->hashsize; - elem = HARRAY_ELEM(map->members, ip_set_ip_t *, probe); - if (*elem == *ip) - return -EEXIST; - if (!(slot || *elem)) - slot = elem; - /* There can be deleted entries, must check all slots */ - } - if (slot) { - *slot = *ip; - map->elements++; - return 0; - } - /* Trigger rehashing */ - return -EAGAIN; + set->data = NULL; } -static inline int -iphash_add(struct ip_set *set, ip_set_ip_t ip) +#define hash_ip4_destroy hash_ip_destroy +#define hash_ip6_destroy hash_ip_destroy + +static void +hash_ip_flush(struct ip_set *set) { - struct ip_set_iphash *map = set->data; + struct hash_ip *map = set->data; - if (!ip || map->elements >= limit) - return -ERANGE; + memset(map->members, 0, map->hashsize * map->elem_size); + map->elements = 0; +} + +#define hash_ip4_flush hash_ip_flush +#define hash_ip6_flush hash_ip_flush + +/* IPv4 variant */ - ip &= map->netmask; - return __iphash_add(map, &ip); +#define PF 4 +#include "ip_set_hash_ip_src.c" +#undef PF + +static int +hash_ip4_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, uint8_t pf, const uint8_t *flags) +{ + struct hash_ip *map = set->data; + bool with_timeout = set->flags & IP_SET_FLAG_TIMEOUT; + uint32_t ip; + + if (pf != AF_INET) + return -EINVAL; + + ip4addrptr(skb, flags, &ip); + ip &= NETMASK(map->netmask); + if (ip == 0) + return -EINVAL; + + switch (adt) { + case IPSET_TEST: + return hash_ip4_test(map, with_timeout, + (struct ip4_elem *)&ip); + case IPSET_ADD: + return hash_ip4_add(map, with_timeout, + (struct ip4_elem *)&ip, map->timeout); + case IPSET_DEL: + return hash_ip4_del(map, with_timeout, (struct ip4_elem *)&ip); + default: + BUG(); + } + return 0; } -UADT(iphash, add) -KADT(iphash, add, ipaddr) +static const struct nla_policy +hash_ip4_adt_policy[IPSET_ATTR_ADT_MAX + 1] __read_mostly = { + [IPSET_ATTR_IP] = { .type = NLA_U32 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, +}; -static inline void -__iphash_retry(struct ip_set_iphash *tmp, struct ip_set_iphash *map) +static int +hash_ip4_uadt(struct ip_set *set, struct nlattr *head, int len, + enum ipset_adt adt, uint32_t *lineno, uint32_t flags) { - tmp->netmask = map->netmask; + struct hash_ip *map = set->data; + struct nlattr *tb[IPSET_ATTR_ADT_MAX]; + bool with_timeout = set->flags & IP_SET_FLAG_TIMEOUT; + uint32_t ip, timeout = map->timeout; + + if (nla_parse(tb, IPSET_ATTR_ADT_MAX, head, len, + hash_ip4_adt_policy)) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_IP]) + ip = ip_set_get_n32(tb[IPSET_ATTR_IP]); + else + return -IPSET_ERR_PROTOCOL; + + ip &= NETMASK(map->netmask); + if (ip == 0) + return -IPSET_ERR_HASH_ELEM; + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_get_h32(tb[IPSET_ATTR_TIMEOUT]); + } + + switch (adt) { + case IPSET_TEST: + return hash_ip4_test(map, with_timeout, + (struct ip4_elem *)&ip); + case IPSET_ADD: + return hash_ip4_add(map, with_timeout, + (struct ip4_elem *)&ip, timeout); + case IPSET_DEL: + return hash_ip4_del(map, with_timeout, + (struct ip4_elem *)&ip); + default: + BUG(); + } + + return 0; } -HASH_RETRY(iphash, ip_set_ip_t) +/* IPv6 variants */ + +#define PF 6 +#include "ip_set_hash_ip_src.c" +#undef PF -static inline int -iphash_del(struct ip_set *set, ip_set_ip_t ip) +static int +hash_ip6_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, uint8_t pf, const uint8_t *flags) { - struct ip_set_iphash *map = set->data; - ip_set_ip_t id, *elem; + struct hash_ip *map = set->data; + bool with_timeout = set->flags & IP_SET_FLAG_TIMEOUT; + union nf_inet_addr ip; - if (!ip) - return -ERANGE; + if (pf != AF_INET6) + return -EINVAL; - id = iphash_id(set, ip); - if (id == UINT_MAX) - return -EEXIST; - - elem = HARRAY_ELEM(map->members, ip_set_ip_t *, id); - *elem = 0; - map->elements--; + ip6addrptr(skb, flags, &ip.in6); + ip6_netmask(&ip, map->netmask); + if (ipv6_addr_any(&ip.in6)) + return -EINVAL; + switch (adt) { + case IPSET_TEST: + return hash_ip6_test(map, with_timeout, + (struct ip6_elem *)&ip); + case IPSET_ADD: + return hash_ip6_add(map, with_timeout, + (struct ip6_elem *)&ip, map->timeout); + case IPSET_DEL: + return hash_ip6_del(map, with_timeout, + (struct ip6_elem *)&ip); + default: + BUG(); + } return 0; } -UADT(iphash, del) -KADT(iphash, del, ipaddr) +static const struct nla_policy +hash_ip6_adt_policy[IPSET_ATTR_ADT_MAX + 1] __read_mostly = { + [IPSET_ATTR_IP] = { .type = NLA_BINARY, + .len = sizeof(struct in6_addr) }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, +}; -static inline int -__iphash_create(const struct ip_set_req_iphash_create *req, - struct ip_set_iphash *map) +static int +hash_ip6_uadt(struct ip_set *set, struct nlattr *head, int len, + enum ipset_adt adt, uint32_t *lineno, uint32_t flags) { - map->netmask = req->netmask; + struct hash_ip *map = set->data; + struct nlattr *tb[IPSET_ATTR_ADT_MAX]; + union nf_inet_addr *ip; + bool with_timeout = set->flags & IP_SET_FLAG_TIMEOUT; + uint32_t timeout = map->timeout; + + if (nla_parse(tb, IPSET_ATTR_ADT_MAX, head, len, + hash_ip6_adt_policy)) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_IP]) + ip = nla_data(tb[IPSET_ATTR_IP]); + else + return -IPSET_ERR_PROTOCOL; + + ip6_netmask(ip, map->netmask); + if (ipv6_addr_any(&ip->in6)) + return -IPSET_ERR_HASH_ELEM; + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_get_h32(tb[IPSET_ATTR_TIMEOUT]); + } + + switch (adt) { + case IPSET_TEST: + return hash_ip6_test(map, with_timeout, + (struct ip6_elem *)ip); + case IPSET_ADD: + return hash_ip6_add(map, with_timeout, + (struct ip6_elem *)ip, timeout); + case IPSET_DEL: + return hash_ip6_del(map, with_timeout, + (struct ip6_elem *)ip); + default: + BUG(); + } return 0; } -HASH_CREATE(iphash, ip_set_ip_t) -HASH_DESTROY(iphash) +/* Create hash:ip type of sets */ -HASH_FLUSH(iphash, ip_set_ip_t) +static const struct nla_policy +hash_ip_create_policy[IPSET_ATTR_CREATE_MAX+1] __read_mostly = { + [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, + [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, + [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, +}; -static inline void -__iphash_list_header(const struct ip_set_iphash *map, - struct ip_set_req_iphash_create *header) -{ - header->netmask = map->netmask; +static bool +init_map_ip(struct ip_set *set, struct hash_ip *map, uint32_t maxelem, + uint32_t probes, uint32_t resize, uint8_t netmask, uint8_t family) +{ + map->members = ip_set_alloc(map->hashsize * map->elem_size, + GFP_KERNEL, &set->flags); + if (!map->members) + return false; + + map->maxelem = maxelem; + map->probes = probes; + map->resize = resize; + map->netmask = netmask; + + set->data = map; + set->family = family; + + return true; } -HASH_LIST_HEADER(iphash) -HASH_LIST_MEMBERS_SIZE(iphash, ip_set_ip_t) -HASH_LIST_MEMBERS(iphash, ip_set_ip_t) +static int +hash_ip_create(struct ip_set *set, struct nlattr *head, int len, + uint32_t flags) +{ + struct nlattr *tb[IPSET_ATTR_CREATE_MAX]; + uint32_t hashsize, maxelem; + uint8_t probes, resize, netmask, family, i; + struct hash_ip *map; -IP_SET_RTYPE(iphash, IPSET_TYPE_IP | IPSET_DATA_SINGLE) + if (nla_parse(tb, IPSET_ATTR_CREATE_MAX, head, len, + hash_ip_create_policy)) + return -IPSET_ERR_PROTOCOL; -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); -MODULE_DESCRIPTION("iphash type of IP sets"); -module_param(limit, int, 0600); -MODULE_PARM_DESC(limit, "maximal number of elements stored in the sets"); + hashsize = IPSET_DEFAULT_HASHSIZE; + maxelem = IPSET_DEFAULT_MAXELEM; + probes = IPSET_DEFAULT_PROBES; + resize = IPSET_DEFAULT_RESIZE; + family = AF_INET; + + if (tb[IPSET_ATTR_HASHSIZE]) + hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]); + + if (tb[IPSET_ATTR_MAXELEM]) + maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]); + + if (tb[IPSET_ATTR_PROBES]) + probes = nla_get_u8(tb[IPSET_ATTR_PROBES]); + + if (tb[IPSET_ATTR_RESIZE]) + resize = nla_get_u8(tb[IPSET_ATTR_RESIZE]); + + if (tb[IPSET_ATTR_FAMILY]) + family = nla_get_u8(tb[IPSET_ATTR_FAMILY]); + if (!(family == AF_INET || family == AF_INET6)) + return -IPSET_ERR_INVALID_FAMILY; + netmask = family == AF_INET ? 32 : 128; + + if (tb[IPSET_ATTR_NETMASK]) { + netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]); + + if ((family == AF_INET && netmask > 32) + || (family == AF_INET6 && netmask > 128)) + return -IPSET_ERR_INVALID_NETMASK; + } + + map = kzalloc(sizeof(*map) + probes * sizeof(initval_t), GFP_KERNEL); + if (!map) + return -ENOMEM; + + map->hashsize = hashsize; + if (tb[IPSET_ATTR_TIMEOUT]) { + map->elem_size = family == AF_INET + ? sizeof(struct ip4_elem_timeout) + : sizeof(struct ip6_elem_timeout); + + if (!init_map_ip(set, map, maxelem, probes, resize, netmask, + family)) { + kfree(map); + return -ENOMEM; + } + + map->timeout = ip_set_get_h32(tb[IPSET_ATTR_TIMEOUT]); + set->flags |= IP_SET_FLAG_TIMEOUT; + + if (family == AF_INET) + hash_ip4_gc_init(set); + else + hash_ip6_gc_init(set); + } else { + map->elem_size = family == AF_INET + ? sizeof(struct ip4_elem) + : sizeof(struct ip6_elem); + + if (!init_map_ip(set, map, maxelem, probes, resize, netmask, + family)) { + kfree(map); + return -ENOMEM; + } + } + for (i = 0; i < map->probes; i++) + get_random_bytes(((initval_t *) map->initval)+i, + sizeof(initval_t)); + + set->variant = family == AF_INET ? &hash_ip4 : &hash_ip6; + D("create %s hashsize %u maxelem %u probes %u resize %u", + set->name, map->hashsize, map->maxelem, map->probes, map->resize); + + return 0; +} + +static struct ip_set_type hash_ip_type = { + .name = "hash:ip", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_IP, + .family = AF_UNSPEC, + .revision = 0, + .create = hash_ip_create, + .me = THIS_MODULE, +}; + +static int __init +hash_ip_init(void) +{ + return ip_set_type_register(&hash_ip_type); +} + +static void __exit +hash_ip_fini(void) +{ + ip_set_type_unregister(&hash_ip_type); +} -REGISTER_MODULE(iphash) +module_init(hash_ip_init); +module_exit(hash_ip_fini); |