From 633b62df0104203591d5c427f6769857571b2540 Mon Sep 17 00:00:00 2001 From: "/C=EU/ST=EU/CN=Jozsef Kadlecsik/emailAddress=kadlec@blackhole.kfki.hu" Date: Wed, 2 Jul 2008 12:20:18 +0000 Subject: Initial ipset release with kernel modules included. --- kernel/ip_set_iptree.c | 607 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 607 insertions(+) create mode 100644 kernel/ip_set_iptree.c (limited to 'kernel/ip_set_iptree.c') diff --git a/kernel/ip_set_iptree.c b/kernel/ip_set_iptree.c new file mode 100644 index 0000000..1a5c32a --- /dev/null +++ b/kernel/ip_set_iptree.c @@ -0,0 +1,607 @@ +/* Copyright (C) 2005 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module implementing an IP set type: the iptree type */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Backward compatibility */ +#ifndef __nocast +#define __nocast +#endif + +#include + +static int limit = MAX_RANGE; + +/* Garbage collection interval in seconds: */ +#define IPTREE_GC_TIME 5*60 +/* Sleep so many milliseconds before trying again + * to delete the gc timer at destroying/flushing a set */ +#define IPTREE_DESTROY_SLEEP 100 + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,21) +static struct kmem_cache *branch_cachep; +static struct kmem_cache *leaf_cachep; +#else +static kmem_cache_t *branch_cachep; +static kmem_cache_t *leaf_cachep; +#endif + +#if defined(__LITTLE_ENDIAN) +#define ABCD(a,b,c,d,addrp) do { \ + a = ((unsigned char *)addrp)[3]; \ + b = ((unsigned char *)addrp)[2]; \ + c = ((unsigned char *)addrp)[1]; \ + d = ((unsigned char *)addrp)[0]; \ +} while (0) +#elif defined(__BIG_ENDIAN) +#define ABCD(a,b,c,d,addrp) do { \ + a = ((unsigned char *)addrp)[0]; \ + b = ((unsigned char *)addrp)[1]; \ + c = ((unsigned char *)addrp)[2]; \ + d = ((unsigned char *)addrp)[3]; \ +} while (0) +#else +#error "Please fix asm/byteorder.h" +#endif /* __LITTLE_ENDIAN */ + +#define TESTIP_WALK(map, elem, branch) do { \ + if ((map)->tree[elem]) { \ + branch = (map)->tree[elem]; \ + } else \ + return 0; \ +} while (0) + +static inline int +__testip(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip) +{ + struct ip_set_iptree *map = set->data; + struct ip_set_iptreeb *btree; + struct ip_set_iptreec *ctree; + struct ip_set_iptreed *dtree; + unsigned char a,b,c,d; + + if (!ip) + return -ERANGE; + + *hash_ip = ip; + ABCD(a, b, c, d, hash_ip); + DP("%u %u %u %u timeout %u", a, b, c, d, map->timeout); + TESTIP_WALK(map, a, btree); + TESTIP_WALK(btree, b, ctree); + TESTIP_WALK(ctree, c, dtree); + DP("%lu %lu", dtree->expires[d], jiffies); + return dtree->expires[d] + && (!map->timeout + || time_after(dtree->expires[d], jiffies)); +} + +static int +testip(struct ip_set *set, const void *data, size_t size, + ip_set_ip_t *hash_ip) +{ + const struct ip_set_req_iptree *req = data; + + if (size != sizeof(struct ip_set_req_iptree)) { + ip_set_printk("data length wrong (want %zu, have %zu)", + sizeof(struct ip_set_req_iptree), + size); + return -EINVAL; + } + return __testip(set, req->ip, hash_ip); +} + +static int +testip_kernel(struct ip_set *set, + const struct sk_buff *skb, + ip_set_ip_t *hash_ip, + const u_int32_t *flags, + unsigned char index) +{ + int res; + + DP("flag: %s src: %u.%u.%u.%u dst: %u.%u.%u.%u", + flags[index] & IPSET_SRC ? "SRC" : "DST", +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22) + NIPQUAD(ip_hdr(skb)->saddr), + NIPQUAD(ip_hdr(skb)->daddr)); +#else + NIPQUAD(skb->nh.iph->saddr), + NIPQUAD(skb->nh.iph->daddr)); +#endif + + res = __testip(set, + ntohl(flags[index] & IPSET_SRC +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22) + ? ip_hdr(skb)->saddr + : ip_hdr(skb)->daddr), +#else + ? skb->nh.iph->saddr + : skb->nh.iph->daddr), +#endif + hash_ip); + return (res < 0 ? 0 : res); +} + +#define ADDIP_WALK(map, elem, branch, type, cachep) do { \ + if ((map)->tree[elem]) { \ + DP("found %u", elem); \ + branch = (map)->tree[elem]; \ + } else { \ + branch = (type *) \ + kmem_cache_alloc(cachep, GFP_ATOMIC); \ + if (branch == NULL) \ + return -ENOMEM; \ + memset(branch, 0, sizeof(*branch)); \ + (map)->tree[elem] = branch; \ + DP("alloc %u", elem); \ + } \ +} while (0) + +static inline int +__addip(struct ip_set *set, ip_set_ip_t ip, unsigned int timeout, + ip_set_ip_t *hash_ip) +{ + struct ip_set_iptree *map = set->data; + struct ip_set_iptreeb *btree; + struct ip_set_iptreec *ctree; + struct ip_set_iptreed *dtree; + unsigned char a,b,c,d; + int ret = 0; + + if (!ip || map->elements >= limit) + /* We could call the garbage collector + * but it's probably overkill */ + return -ERANGE; + + *hash_ip = ip; + ABCD(a, b, c, d, hash_ip); + DP("%u %u %u %u timeout %u", a, b, c, d, timeout); + ADDIP_WALK(map, a, btree, struct ip_set_iptreeb, branch_cachep); + ADDIP_WALK(btree, b, ctree, struct ip_set_iptreec, branch_cachep); + ADDIP_WALK(ctree, c, dtree, struct ip_set_iptreed, leaf_cachep); + if (dtree->expires[d] + && (!map->timeout || time_after(dtree->expires[d], jiffies))) + ret = -EEXIST; + dtree->expires[d] = map->timeout ? (timeout * HZ + jiffies) : 1; + /* Lottery: I won! */ + if (dtree->expires[d] == 0) + dtree->expires[d] = 1; + DP("%u %lu", d, dtree->expires[d]); + if (ret == 0) + map->elements++; + return ret; +} + +static int +addip(struct ip_set *set, const void *data, size_t size, + ip_set_ip_t *hash_ip) +{ + struct ip_set_iptree *map = set->data; + const struct ip_set_req_iptree *req = data; + + if (size != sizeof(struct ip_set_req_iptree)) { + ip_set_printk("data length wrong (want %zu, have %zu)", + sizeof(struct ip_set_req_iptree), + size); + return -EINVAL; + } + DP("%u.%u.%u.%u %u", HIPQUAD(req->ip), req->timeout); + return __addip(set, req->ip, + req->timeout ? req->timeout : map->timeout, + hash_ip); +} + +static int +addip_kernel(struct ip_set *set, + const struct sk_buff *skb, + ip_set_ip_t *hash_ip, + const u_int32_t *flags, + unsigned char index) +{ + struct ip_set_iptree *map = set->data; + + return __addip(set, + ntohl(flags[index] & IPSET_SRC +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22) + ? ip_hdr(skb)->saddr + : ip_hdr(skb)->daddr), +#else + ? skb->nh.iph->saddr + : skb->nh.iph->daddr), +#endif + map->timeout, + hash_ip); +} + +#define DELIP_WALK(map, elem, branch) do { \ + if ((map)->tree[elem]) { \ + branch = (map)->tree[elem]; \ + } else \ + return -EEXIST; \ +} while (0) + +static inline int +__delip(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip) +{ + struct ip_set_iptree *map = set->data; + struct ip_set_iptreeb *btree; + struct ip_set_iptreec *ctree; + struct ip_set_iptreed *dtree; + unsigned char a,b,c,d; + + if (!ip) + return -ERANGE; + + *hash_ip = ip; + ABCD(a, b, c, d, hash_ip); + DELIP_WALK(map, a, btree); + DELIP_WALK(btree, b, ctree); + DELIP_WALK(ctree, c, dtree); + + if (dtree->expires[d]) { + dtree->expires[d] = 0; + map->elements--; + return 0; + } + return -EEXIST; +} + +static int +delip(struct ip_set *set, const void *data, size_t size, + ip_set_ip_t *hash_ip) +{ + const struct ip_set_req_iptree *req = data; + + if (size != sizeof(struct ip_set_req_iptree)) { + ip_set_printk("data length wrong (want %zu, have %zu)", + sizeof(struct ip_set_req_iptree), + size); + return -EINVAL; + } + return __delip(set, req->ip, hash_ip); +} + +static int +delip_kernel(struct ip_set *set, + const struct sk_buff *skb, + ip_set_ip_t *hash_ip, + const u_int32_t *flags, + unsigned char index) +{ + return __delip(set, + ntohl(flags[index] & IPSET_SRC +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22) + ? ip_hdr(skb)->saddr + : ip_hdr(skb)->daddr), +#else + ? skb->nh.iph->saddr + : skb->nh.iph->daddr), +#endif + hash_ip); +} + +#define LOOP_WALK_BEGIN(map, i, branch) \ + for (i = 0; i < 256; i++) { \ + if (!(map)->tree[i]) \ + continue; \ + branch = (map)->tree[i] + +#define LOOP_WALK_END } + +static void ip_tree_gc(unsigned long ul_set) +{ + struct ip_set *set = (struct ip_set *) ul_set; + struct ip_set_iptree *map = set->data; + struct ip_set_iptreeb *btree; + struct ip_set_iptreec *ctree; + struct ip_set_iptreed *dtree; + unsigned int a,b,c,d; + unsigned char i,j,k; + + i = j = k = 0; + DP("gc: %s", set->name); + write_lock_bh(&set->lock); + LOOP_WALK_BEGIN(map, a, btree); + LOOP_WALK_BEGIN(btree, b, ctree); + LOOP_WALK_BEGIN(ctree, c, dtree); + for (d = 0; d < 256; d++) { + if (dtree->expires[d]) { + DP("gc: %u %u %u %u: expires %lu jiffies %lu", + a, b, c, d, + dtree->expires[d], jiffies); + if (map->timeout + && time_before(dtree->expires[d], jiffies)) { + dtree->expires[d] = 0; + map->elements--; + } else + k = 1; + } + } + if (k == 0) { + DP("gc: %s: leaf %u %u %u empty", + set->name, a, b, c); + kmem_cache_free(leaf_cachep, dtree); + ctree->tree[c] = NULL; + } else { + DP("gc: %s: leaf %u %u %u not empty", + set->name, a, b, c); + j = 1; + k = 0; + } + LOOP_WALK_END; + if (j == 0) { + DP("gc: %s: branch %u %u empty", + set->name, a, b); + kmem_cache_free(branch_cachep, ctree); + btree->tree[b] = NULL; + } else { + DP("gc: %s: branch %u %u not empty", + set->name, a, b); + i = 1; + j = k = 0; + } + LOOP_WALK_END; + if (i == 0) { + DP("gc: %s: branch %u empty", + set->name, a); + kmem_cache_free(branch_cachep, btree); + map->tree[a] = NULL; + } else { + DP("gc: %s: branch %u not empty", + set->name, a); + i = j = k = 0; + } + LOOP_WALK_END; + write_unlock_bh(&set->lock); + + map->gc.expires = jiffies + map->gc_interval * HZ; + add_timer(&map->gc); +} + +static inline void init_gc_timer(struct ip_set *set) +{ + struct ip_set_iptree *map = set->data; + + /* Even if there is no timeout for the entries, + * we still have to call gc because delete + * do not clean up empty branches */ + map->gc_interval = IPTREE_GC_TIME; + init_timer(&map->gc); + map->gc.data = (unsigned long) set; + map->gc.function = ip_tree_gc; + map->gc.expires = jiffies + map->gc_interval * HZ; + add_timer(&map->gc); +} + +static int create(struct ip_set *set, const void *data, size_t size) +{ + const struct ip_set_req_iptree_create *req = data; + struct ip_set_iptree *map; + + if (size != sizeof(struct ip_set_req_iptree_create)) { + ip_set_printk("data length wrong (want %zu, have %zu)", + sizeof(struct ip_set_req_iptree_create), + size); + return -EINVAL; + } + + map = kmalloc(sizeof(struct ip_set_iptree), GFP_KERNEL); + if (!map) { + DP("out of memory for %d bytes", + sizeof(struct ip_set_iptree)); + return -ENOMEM; + } + memset(map, 0, sizeof(*map)); + map->timeout = req->timeout; + map->elements = 0; + set->data = map; + + init_gc_timer(set); + + return 0; +} + +static void __flush(struct ip_set_iptree *map) +{ + struct ip_set_iptreeb *btree; + struct ip_set_iptreec *ctree; + struct ip_set_iptreed *dtree; + unsigned int a,b,c; + + LOOP_WALK_BEGIN(map, a, btree); + LOOP_WALK_BEGIN(btree, b, ctree); + LOOP_WALK_BEGIN(ctree, c, dtree); + kmem_cache_free(leaf_cachep, dtree); + LOOP_WALK_END; + kmem_cache_free(branch_cachep, ctree); + LOOP_WALK_END; + kmem_cache_free(branch_cachep, btree); + LOOP_WALK_END; + map->elements = 0; +} + +static void destroy(struct ip_set *set) +{ + struct ip_set_iptree *map = set->data; + + /* gc might be running */ + while (!del_timer(&map->gc)) + msleep(IPTREE_DESTROY_SLEEP); + __flush(map); + kfree(map); + set->data = NULL; +} + +static void flush(struct ip_set *set) +{ + struct ip_set_iptree *map = set->data; + unsigned int timeout = map->timeout; + + /* gc might be running */ + while (!del_timer(&map->gc)) + msleep(IPTREE_DESTROY_SLEEP); + __flush(map); + memset(map, 0, sizeof(*map)); + map->timeout = timeout; + + init_gc_timer(set); +} + +static void list_header(const struct ip_set *set, void *data) +{ + const struct ip_set_iptree *map = set->data; + struct ip_set_req_iptree_create *header = data; + + header->timeout = map->timeout; +} + +static int list_members_size(const struct ip_set *set) +{ + const struct ip_set_iptree *map = set->data; + struct ip_set_iptreeb *btree; + struct ip_set_iptreec *ctree; + struct ip_set_iptreed *dtree; + unsigned int a,b,c,d; + unsigned int count = 0; + + LOOP_WALK_BEGIN(map, a, btree); + LOOP_WALK_BEGIN(btree, b, ctree); + LOOP_WALK_BEGIN(ctree, c, dtree); + for (d = 0; d < 256; d++) { + if (dtree->expires[d] + && (!map->timeout || time_after(dtree->expires[d], jiffies))) + count++; + } + LOOP_WALK_END; + LOOP_WALK_END; + LOOP_WALK_END; + + DP("members %u", count); + return (count * sizeof(struct ip_set_req_iptree)); +} + +static void list_members(const struct ip_set *set, void *data) +{ + const struct ip_set_iptree *map = set->data; + struct ip_set_iptreeb *btree; + struct ip_set_iptreec *ctree; + struct ip_set_iptreed *dtree; + unsigned int a,b,c,d; + size_t offset = 0; + struct ip_set_req_iptree *entry; + + LOOP_WALK_BEGIN(map, a, btree); + LOOP_WALK_BEGIN(btree, b, ctree); + LOOP_WALK_BEGIN(ctree, c, dtree); + for (d = 0; d < 256; d++) { + if (dtree->expires[d] + && (!map->timeout || time_after(dtree->expires[d], jiffies))) { + entry = data + offset; + entry->ip = ((a << 24) | (b << 16) | (c << 8) | d); + entry->timeout = !map->timeout ? 0 + : (dtree->expires[d] - jiffies)/HZ; + offset += sizeof(struct ip_set_req_iptree); + } + } + LOOP_WALK_END; + LOOP_WALK_END; + LOOP_WALK_END; +} + +static struct ip_set_type ip_set_iptree = { + .typename = SETTYPE_NAME, + .features = IPSET_TYPE_IP | IPSET_DATA_SINGLE, + .protocol_version = IP_SET_PROTOCOL_VERSION, + .create = &create, + .destroy = &destroy, + .flush = &flush, + .reqsize = sizeof(struct ip_set_req_iptree), + .addip = &addip, + .addip_kernel = &addip_kernel, + .delip = &delip, + .delip_kernel = &delip_kernel, + .testip = &testip, + .testip_kernel = &testip_kernel, + .header_size = sizeof(struct ip_set_req_iptree_create), + .list_header = &list_header, + .list_members_size = &list_members_size, + .list_members = &list_members, + .me = THIS_MODULE, +}; + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_DESCRIPTION("iptree type of IP sets"); +module_param(limit, int, 0600); +MODULE_PARM_DESC(limit, "maximal number of elements stored in the sets"); + +static int __init ip_set_iptree_init(void) +{ + int ret; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23) + branch_cachep = kmem_cache_create("ip_set_iptreeb", + sizeof(struct ip_set_iptreeb), + 0, 0, NULL); +#else + branch_cachep = kmem_cache_create("ip_set_iptreeb", + sizeof(struct ip_set_iptreeb), + 0, 0, NULL, NULL); +#endif + if (!branch_cachep) { + printk(KERN_ERR "Unable to create ip_set_iptreeb slab cache\n"); + ret = -ENOMEM; + goto out; + } +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23) + leaf_cachep = kmem_cache_create("ip_set_iptreed", + sizeof(struct ip_set_iptreed), + 0, 0, NULL); +#else + leaf_cachep = kmem_cache_create("ip_set_iptreed", + sizeof(struct ip_set_iptreed), + 0, 0, NULL, NULL); +#endif + if (!leaf_cachep) { + printk(KERN_ERR "Unable to create ip_set_iptreed slab cache\n"); + ret = -ENOMEM; + goto free_branch; + } + ret = ip_set_register_set_type(&ip_set_iptree); + if (ret == 0) + goto out; + + kmem_cache_destroy(leaf_cachep); + free_branch: + kmem_cache_destroy(branch_cachep); + out: + return ret; +} + +static void __exit ip_set_iptree_fini(void) +{ + /* FIXME: possible race with ip_set_create() */ + ip_set_unregister_set_type(&ip_set_iptree); + kmem_cache_destroy(leaf_cachep); + kmem_cache_destroy(branch_cachep); +} + +module_init(ip_set_iptree_init); +module_exit(ip_set_iptree_fini); -- cgit v1.2.3