summaryrefslogtreecommitdiffstats
path: root/kernel/ip_set_iptree.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/ip_set_iptree.c')
-rw-r--r--kernel/ip_set_iptree.c607
1 files changed, 607 insertions, 0 deletions
diff --git a/kernel/ip_set_iptree.c b/kernel/ip_set_iptree.c
new file mode 100644
index 0000000..1a5c32a
--- /dev/null
+++ b/kernel/ip_set_iptree.c
@@ -0,0 +1,607 @@
+/* Copyright (C) 2005 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* Kernel module implementing an IP set type: the iptree type */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ip_set.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+#include <asm/bitops.h>
+#include <linux/spinlock.h>
+
+/* Backward compatibility */
+#ifndef __nocast
+#define __nocast
+#endif
+
+#include <linux/netfilter_ipv4/ip_set_iptree.h>
+
+static int limit = MAX_RANGE;
+
+/* Garbage collection interval in seconds: */
+#define IPTREE_GC_TIME 5*60
+/* Sleep so many milliseconds before trying again
+ * to delete the gc timer at destroying/flushing a set */
+#define IPTREE_DESTROY_SLEEP 100
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,21)
+static struct kmem_cache *branch_cachep;
+static struct kmem_cache *leaf_cachep;
+#else
+static kmem_cache_t *branch_cachep;
+static kmem_cache_t *leaf_cachep;
+#endif
+
+#if defined(__LITTLE_ENDIAN)
+#define ABCD(a,b,c,d,addrp) do { \
+ a = ((unsigned char *)addrp)[3]; \
+ b = ((unsigned char *)addrp)[2]; \
+ c = ((unsigned char *)addrp)[1]; \
+ d = ((unsigned char *)addrp)[0]; \
+} while (0)
+#elif defined(__BIG_ENDIAN)
+#define ABCD(a,b,c,d,addrp) do { \
+ a = ((unsigned char *)addrp)[0]; \
+ b = ((unsigned char *)addrp)[1]; \
+ c = ((unsigned char *)addrp)[2]; \
+ d = ((unsigned char *)addrp)[3]; \
+} while (0)
+#else
+#error "Please fix asm/byteorder.h"
+#endif /* __LITTLE_ENDIAN */
+
+#define TESTIP_WALK(map, elem, branch) do { \
+ if ((map)->tree[elem]) { \
+ branch = (map)->tree[elem]; \
+ } else \
+ return 0; \
+} while (0)
+
+static inline int
+__testip(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip)
+{
+ struct ip_set_iptree *map = set->data;
+ struct ip_set_iptreeb *btree;
+ struct ip_set_iptreec *ctree;
+ struct ip_set_iptreed *dtree;
+ unsigned char a,b,c,d;
+
+ if (!ip)
+ return -ERANGE;
+
+ *hash_ip = ip;
+ ABCD(a, b, c, d, hash_ip);
+ DP("%u %u %u %u timeout %u", a, b, c, d, map->timeout);
+ TESTIP_WALK(map, a, btree);
+ TESTIP_WALK(btree, b, ctree);
+ TESTIP_WALK(ctree, c, dtree);
+ DP("%lu %lu", dtree->expires[d], jiffies);
+ return dtree->expires[d]
+ && (!map->timeout
+ || time_after(dtree->expires[d], jiffies));
+}
+
+static int
+testip(struct ip_set *set, const void *data, size_t size,
+ ip_set_ip_t *hash_ip)
+{
+ const struct ip_set_req_iptree *req = data;
+
+ if (size != sizeof(struct ip_set_req_iptree)) {
+ ip_set_printk("data length wrong (want %zu, have %zu)",
+ sizeof(struct ip_set_req_iptree),
+ size);
+ return -EINVAL;
+ }
+ return __testip(set, req->ip, hash_ip);
+}
+
+static int
+testip_kernel(struct ip_set *set,
+ const struct sk_buff *skb,
+ ip_set_ip_t *hash_ip,
+ const u_int32_t *flags,
+ unsigned char index)
+{
+ int res;
+
+ DP("flag: %s src: %u.%u.%u.%u dst: %u.%u.%u.%u",
+ flags[index] & IPSET_SRC ? "SRC" : "DST",
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
+ NIPQUAD(ip_hdr(skb)->saddr),
+ NIPQUAD(ip_hdr(skb)->daddr));
+#else
+ NIPQUAD(skb->nh.iph->saddr),
+ NIPQUAD(skb->nh.iph->daddr));
+#endif
+
+ res = __testip(set,
+ ntohl(flags[index] & IPSET_SRC
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
+ ? ip_hdr(skb)->saddr
+ : ip_hdr(skb)->daddr),
+#else
+ ? skb->nh.iph->saddr
+ : skb->nh.iph->daddr),
+#endif
+ hash_ip);
+ return (res < 0 ? 0 : res);
+}
+
+#define ADDIP_WALK(map, elem, branch, type, cachep) do { \
+ if ((map)->tree[elem]) { \
+ DP("found %u", elem); \
+ branch = (map)->tree[elem]; \
+ } else { \
+ branch = (type *) \
+ kmem_cache_alloc(cachep, GFP_ATOMIC); \
+ if (branch == NULL) \
+ return -ENOMEM; \
+ memset(branch, 0, sizeof(*branch)); \
+ (map)->tree[elem] = branch; \
+ DP("alloc %u", elem); \
+ } \
+} while (0)
+
+static inline int
+__addip(struct ip_set *set, ip_set_ip_t ip, unsigned int timeout,
+ ip_set_ip_t *hash_ip)
+{
+ struct ip_set_iptree *map = set->data;
+ struct ip_set_iptreeb *btree;
+ struct ip_set_iptreec *ctree;
+ struct ip_set_iptreed *dtree;
+ unsigned char a,b,c,d;
+ int ret = 0;
+
+ if (!ip || map->elements >= limit)
+ /* We could call the garbage collector
+ * but it's probably overkill */
+ return -ERANGE;
+
+ *hash_ip = ip;
+ ABCD(a, b, c, d, hash_ip);
+ DP("%u %u %u %u timeout %u", a, b, c, d, timeout);
+ ADDIP_WALK(map, a, btree, struct ip_set_iptreeb, branch_cachep);
+ ADDIP_WALK(btree, b, ctree, struct ip_set_iptreec, branch_cachep);
+ ADDIP_WALK(ctree, c, dtree, struct ip_set_iptreed, leaf_cachep);
+ if (dtree->expires[d]
+ && (!map->timeout || time_after(dtree->expires[d], jiffies)))
+ ret = -EEXIST;
+ dtree->expires[d] = map->timeout ? (timeout * HZ + jiffies) : 1;
+ /* Lottery: I won! */
+ if (dtree->expires[d] == 0)
+ dtree->expires[d] = 1;
+ DP("%u %lu", d, dtree->expires[d]);
+ if (ret == 0)
+ map->elements++;
+ return ret;
+}
+
+static int
+addip(struct ip_set *set, const void *data, size_t size,
+ ip_set_ip_t *hash_ip)
+{
+ struct ip_set_iptree *map = set->data;
+ const struct ip_set_req_iptree *req = data;
+
+ if (size != sizeof(struct ip_set_req_iptree)) {
+ ip_set_printk("data length wrong (want %zu, have %zu)",
+ sizeof(struct ip_set_req_iptree),
+ size);
+ return -EINVAL;
+ }
+ DP("%u.%u.%u.%u %u", HIPQUAD(req->ip), req->timeout);
+ return __addip(set, req->ip,
+ req->timeout ? req->timeout : map->timeout,
+ hash_ip);
+}
+
+static int
+addip_kernel(struct ip_set *set,
+ const struct sk_buff *skb,
+ ip_set_ip_t *hash_ip,
+ const u_int32_t *flags,
+ unsigned char index)
+{
+ struct ip_set_iptree *map = set->data;
+
+ return __addip(set,
+ ntohl(flags[index] & IPSET_SRC
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
+ ? ip_hdr(skb)->saddr
+ : ip_hdr(skb)->daddr),
+#else
+ ? skb->nh.iph->saddr
+ : skb->nh.iph->daddr),
+#endif
+ map->timeout,
+ hash_ip);
+}
+
+#define DELIP_WALK(map, elem, branch) do { \
+ if ((map)->tree[elem]) { \
+ branch = (map)->tree[elem]; \
+ } else \
+ return -EEXIST; \
+} while (0)
+
+static inline int
+__delip(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip)
+{
+ struct ip_set_iptree *map = set->data;
+ struct ip_set_iptreeb *btree;
+ struct ip_set_iptreec *ctree;
+ struct ip_set_iptreed *dtree;
+ unsigned char a,b,c,d;
+
+ if (!ip)
+ return -ERANGE;
+
+ *hash_ip = ip;
+ ABCD(a, b, c, d, hash_ip);
+ DELIP_WALK(map, a, btree);
+ DELIP_WALK(btree, b, ctree);
+ DELIP_WALK(ctree, c, dtree);
+
+ if (dtree->expires[d]) {
+ dtree->expires[d] = 0;
+ map->elements--;
+ return 0;
+ }
+ return -EEXIST;
+}
+
+static int
+delip(struct ip_set *set, const void *data, size_t size,
+ ip_set_ip_t *hash_ip)
+{
+ const struct ip_set_req_iptree *req = data;
+
+ if (size != sizeof(struct ip_set_req_iptree)) {
+ ip_set_printk("data length wrong (want %zu, have %zu)",
+ sizeof(struct ip_set_req_iptree),
+ size);
+ return -EINVAL;
+ }
+ return __delip(set, req->ip, hash_ip);
+}
+
+static int
+delip_kernel(struct ip_set *set,
+ const struct sk_buff *skb,
+ ip_set_ip_t *hash_ip,
+ const u_int32_t *flags,
+ unsigned char index)
+{
+ return __delip(set,
+ ntohl(flags[index] & IPSET_SRC
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
+ ? ip_hdr(skb)->saddr
+ : ip_hdr(skb)->daddr),
+#else
+ ? skb->nh.iph->saddr
+ : skb->nh.iph->daddr),
+#endif
+ hash_ip);
+}
+
+#define LOOP_WALK_BEGIN(map, i, branch) \
+ for (i = 0; i < 256; i++) { \
+ if (!(map)->tree[i]) \
+ continue; \
+ branch = (map)->tree[i]
+
+#define LOOP_WALK_END }
+
+static void ip_tree_gc(unsigned long ul_set)
+{
+ struct ip_set *set = (struct ip_set *) ul_set;
+ struct ip_set_iptree *map = set->data;
+ struct ip_set_iptreeb *btree;
+ struct ip_set_iptreec *ctree;
+ struct ip_set_iptreed *dtree;
+ unsigned int a,b,c,d;
+ unsigned char i,j,k;
+
+ i = j = k = 0;
+ DP("gc: %s", set->name);
+ write_lock_bh(&set->lock);
+ LOOP_WALK_BEGIN(map, a, btree);
+ LOOP_WALK_BEGIN(btree, b, ctree);
+ LOOP_WALK_BEGIN(ctree, c, dtree);
+ for (d = 0; d < 256; d++) {
+ if (dtree->expires[d]) {
+ DP("gc: %u %u %u %u: expires %lu jiffies %lu",
+ a, b, c, d,
+ dtree->expires[d], jiffies);
+ if (map->timeout
+ && time_before(dtree->expires[d], jiffies)) {
+ dtree->expires[d] = 0;
+ map->elements--;
+ } else
+ k = 1;
+ }
+ }
+ if (k == 0) {
+ DP("gc: %s: leaf %u %u %u empty",
+ set->name, a, b, c);
+ kmem_cache_free(leaf_cachep, dtree);
+ ctree->tree[c] = NULL;
+ } else {
+ DP("gc: %s: leaf %u %u %u not empty",
+ set->name, a, b, c);
+ j = 1;
+ k = 0;
+ }
+ LOOP_WALK_END;
+ if (j == 0) {
+ DP("gc: %s: branch %u %u empty",
+ set->name, a, b);
+ kmem_cache_free(branch_cachep, ctree);
+ btree->tree[b] = NULL;
+ } else {
+ DP("gc: %s: branch %u %u not empty",
+ set->name, a, b);
+ i = 1;
+ j = k = 0;
+ }
+ LOOP_WALK_END;
+ if (i == 0) {
+ DP("gc: %s: branch %u empty",
+ set->name, a);
+ kmem_cache_free(branch_cachep, btree);
+ map->tree[a] = NULL;
+ } else {
+ DP("gc: %s: branch %u not empty",
+ set->name, a);
+ i = j = k = 0;
+ }
+ LOOP_WALK_END;
+ write_unlock_bh(&set->lock);
+
+ map->gc.expires = jiffies + map->gc_interval * HZ;
+ add_timer(&map->gc);
+}
+
+static inline void init_gc_timer(struct ip_set *set)
+{
+ struct ip_set_iptree *map = set->data;
+
+ /* Even if there is no timeout for the entries,
+ * we still have to call gc because delete
+ * do not clean up empty branches */
+ map->gc_interval = IPTREE_GC_TIME;
+ init_timer(&map->gc);
+ map->gc.data = (unsigned long) set;
+ map->gc.function = ip_tree_gc;
+ map->gc.expires = jiffies + map->gc_interval * HZ;
+ add_timer(&map->gc);
+}
+
+static int create(struct ip_set *set, const void *data, size_t size)
+{
+ const struct ip_set_req_iptree_create *req = data;
+ struct ip_set_iptree *map;
+
+ if (size != sizeof(struct ip_set_req_iptree_create)) {
+ ip_set_printk("data length wrong (want %zu, have %zu)",
+ sizeof(struct ip_set_req_iptree_create),
+ size);
+ return -EINVAL;
+ }
+
+ map = kmalloc(sizeof(struct ip_set_iptree), GFP_KERNEL);
+ if (!map) {
+ DP("out of memory for %d bytes",
+ sizeof(struct ip_set_iptree));
+ return -ENOMEM;
+ }
+ memset(map, 0, sizeof(*map));
+ map->timeout = req->timeout;
+ map->elements = 0;
+ set->data = map;
+
+ init_gc_timer(set);
+
+ return 0;
+}
+
+static void __flush(struct ip_set_iptree *map)
+{
+ struct ip_set_iptreeb *btree;
+ struct ip_set_iptreec *ctree;
+ struct ip_set_iptreed *dtree;
+ unsigned int a,b,c;
+
+ LOOP_WALK_BEGIN(map, a, btree);
+ LOOP_WALK_BEGIN(btree, b, ctree);
+ LOOP_WALK_BEGIN(ctree, c, dtree);
+ kmem_cache_free(leaf_cachep, dtree);
+ LOOP_WALK_END;
+ kmem_cache_free(branch_cachep, ctree);
+ LOOP_WALK_END;
+ kmem_cache_free(branch_cachep, btree);
+ LOOP_WALK_END;
+ map->elements = 0;
+}
+
+static void destroy(struct ip_set *set)
+{
+ struct ip_set_iptree *map = set->data;
+
+ /* gc might be running */
+ while (!del_timer(&map->gc))
+ msleep(IPTREE_DESTROY_SLEEP);
+ __flush(map);
+ kfree(map);
+ set->data = NULL;
+}
+
+static void flush(struct ip_set *set)
+{
+ struct ip_set_iptree *map = set->data;
+ unsigned int timeout = map->timeout;
+
+ /* gc might be running */
+ while (!del_timer(&map->gc))
+ msleep(IPTREE_DESTROY_SLEEP);
+ __flush(map);
+ memset(map, 0, sizeof(*map));
+ map->timeout = timeout;
+
+ init_gc_timer(set);
+}
+
+static void list_header(const struct ip_set *set, void *data)
+{
+ const struct ip_set_iptree *map = set->data;
+ struct ip_set_req_iptree_create *header = data;
+
+ header->timeout = map->timeout;
+}
+
+static int list_members_size(const struct ip_set *set)
+{
+ const struct ip_set_iptree *map = set->data;
+ struct ip_set_iptreeb *btree;
+ struct ip_set_iptreec *ctree;
+ struct ip_set_iptreed *dtree;
+ unsigned int a,b,c,d;
+ unsigned int count = 0;
+
+ LOOP_WALK_BEGIN(map, a, btree);
+ LOOP_WALK_BEGIN(btree, b, ctree);
+ LOOP_WALK_BEGIN(ctree, c, dtree);
+ for (d = 0; d < 256; d++) {
+ if (dtree->expires[d]
+ && (!map->timeout || time_after(dtree->expires[d], jiffies)))
+ count++;
+ }
+ LOOP_WALK_END;
+ LOOP_WALK_END;
+ LOOP_WALK_END;
+
+ DP("members %u", count);
+ return (count * sizeof(struct ip_set_req_iptree));
+}
+
+static void list_members(const struct ip_set *set, void *data)
+{
+ const struct ip_set_iptree *map = set->data;
+ struct ip_set_iptreeb *btree;
+ struct ip_set_iptreec *ctree;
+ struct ip_set_iptreed *dtree;
+ unsigned int a,b,c,d;
+ size_t offset = 0;
+ struct ip_set_req_iptree *entry;
+
+ LOOP_WALK_BEGIN(map, a, btree);
+ LOOP_WALK_BEGIN(btree, b, ctree);
+ LOOP_WALK_BEGIN(ctree, c, dtree);
+ for (d = 0; d < 256; d++) {
+ if (dtree->expires[d]
+ && (!map->timeout || time_after(dtree->expires[d], jiffies))) {
+ entry = data + offset;
+ entry->ip = ((a << 24) | (b << 16) | (c << 8) | d);
+ entry->timeout = !map->timeout ? 0
+ : (dtree->expires[d] - jiffies)/HZ;
+ offset += sizeof(struct ip_set_req_iptree);
+ }
+ }
+ LOOP_WALK_END;
+ LOOP_WALK_END;
+ LOOP_WALK_END;
+}
+
+static struct ip_set_type ip_set_iptree = {
+ .typename = SETTYPE_NAME,
+ .features = IPSET_TYPE_IP | IPSET_DATA_SINGLE,
+ .protocol_version = IP_SET_PROTOCOL_VERSION,
+ .create = &create,
+ .destroy = &destroy,
+ .flush = &flush,
+ .reqsize = sizeof(struct ip_set_req_iptree),
+ .addip = &addip,
+ .addip_kernel = &addip_kernel,
+ .delip = &delip,
+ .delip_kernel = &delip_kernel,
+ .testip = &testip,
+ .testip_kernel = &testip_kernel,
+ .header_size = sizeof(struct ip_set_req_iptree_create),
+ .list_header = &list_header,
+ .list_members_size = &list_members_size,
+ .list_members = &list_members,
+ .me = THIS_MODULE,
+};
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_DESCRIPTION("iptree type of IP sets");
+module_param(limit, int, 0600);
+MODULE_PARM_DESC(limit, "maximal number of elements stored in the sets");
+
+static int __init ip_set_iptree_init(void)
+{
+ int ret;
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)
+ branch_cachep = kmem_cache_create("ip_set_iptreeb",
+ sizeof(struct ip_set_iptreeb),
+ 0, 0, NULL);
+#else
+ branch_cachep = kmem_cache_create("ip_set_iptreeb",
+ sizeof(struct ip_set_iptreeb),
+ 0, 0, NULL, NULL);
+#endif
+ if (!branch_cachep) {
+ printk(KERN_ERR "Unable to create ip_set_iptreeb slab cache\n");
+ ret = -ENOMEM;
+ goto out;
+ }
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)
+ leaf_cachep = kmem_cache_create("ip_set_iptreed",
+ sizeof(struct ip_set_iptreed),
+ 0, 0, NULL);
+#else
+ leaf_cachep = kmem_cache_create("ip_set_iptreed",
+ sizeof(struct ip_set_iptreed),
+ 0, 0, NULL, NULL);
+#endif
+ if (!leaf_cachep) {
+ printk(KERN_ERR "Unable to create ip_set_iptreed slab cache\n");
+ ret = -ENOMEM;
+ goto free_branch;
+ }
+ ret = ip_set_register_set_type(&ip_set_iptree);
+ if (ret == 0)
+ goto out;
+
+ kmem_cache_destroy(leaf_cachep);
+ free_branch:
+ kmem_cache_destroy(branch_cachep);
+ out:
+ return ret;
+}
+
+static void __exit ip_set_iptree_fini(void)
+{
+ /* FIXME: possible race with ip_set_create() */
+ ip_set_unregister_set_type(&ip_set_iptree);
+ kmem_cache_destroy(leaf_cachep);
+ kmem_cache_destroy(branch_cachep);
+}
+
+module_init(ip_set_iptree_init);
+module_exit(ip_set_iptree_fini);