/* Copyright (C) 2005 Jozsef Kadlecsik * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ /* Kernel module implementing an IP set type: the iptree type */ #include #include #include #include #include #include #include #include #include #include #include #include /* Backward compatibility */ #ifndef __nocast #define __nocast #endif #include static int limit = MAX_RANGE; /* Garbage collection interval in seconds: */ #define IPTREE_GC_TIME 5*60 /* Sleep so many milliseconds before trying again * to delete the gc timer at destroying/flushing a set */ #define IPTREE_DESTROY_SLEEP 100 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,21) static struct kmem_cache *branch_cachep; static struct kmem_cache *leaf_cachep; #else static kmem_cache_t *branch_cachep; static kmem_cache_t *leaf_cachep; #endif #if defined(__LITTLE_ENDIAN) #define ABCD(a,b,c,d,addrp) do { \ a = ((unsigned char *)addrp)[3]; \ b = ((unsigned char *)addrp)[2]; \ c = ((unsigned char *)addrp)[1]; \ d = ((unsigned char *)addrp)[0]; \ } while (0) #elif defined(__BIG_ENDIAN) #define ABCD(a,b,c,d,addrp) do { \ a = ((unsigned char *)addrp)[0]; \ b = ((unsigned char *)addrp)[1]; \ c = ((unsigned char *)addrp)[2]; \ d = ((unsigned char *)addrp)[3]; \ } while (0) #else #error "Please fix asm/byteorder.h" #endif /* __LITTLE_ENDIAN */ #define TESTIP_WALK(map, elem, branch) do { \ if ((map)->tree[elem]) { \ branch = (map)->tree[elem]; \ } else \ return 0; \ } while (0) static inline int __testip(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip) { struct ip_set_iptree *map = set->data; struct ip_set_iptreeb *btree; struct ip_set_iptreec *ctree; struct ip_set_iptreed *dtree; unsigned char a,b,c,d; if (!ip) return -ERANGE; *hash_ip = ip; ABCD(a, b, c, d, hash_ip); DP("%u %u %u %u timeout %u", a, b, c, d, map->timeout); TESTIP_WALK(map, a, btree); TESTIP_WALK(btree, b, ctree); TESTIP_WALK(ctree, c, dtree); DP("%lu %lu", dtree->expires[d], jiffies); return dtree->expires[d] && (!map->timeout || time_after(dtree->expires[d], jiffies)); } static int testip(struct ip_set *set, const void *data, size_t size, ip_set_ip_t *hash_ip) { const struct ip_set_req_iptree *req = data; if (size != sizeof(struct ip_set_req_iptree)) { ip_set_printk("data length wrong (want %zu, have %zu)", sizeof(struct ip_set_req_iptree), size); return -EINVAL; } return __testip(set, req->ip, hash_ip); } static int testip_kernel(struct ip_set *set, const struct sk_buff *skb, ip_set_ip_t *hash_ip, const u_int32_t *flags, unsigned char index) { int res; DP("flag: %s src: %u.%u.%u.%u dst: %u.%u.%u.%u", flags[index] & IPSET_SRC ? "SRC" : "DST", #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22) NIPQUAD(ip_hdr(skb)->saddr), NIPQUAD(ip_hdr(skb)->daddr)); #else NIPQUAD(skb->nh.iph->saddr), NIPQUAD(skb->nh.iph->daddr)); #endif res = __testip(set, ntohl(flags[index] & IPSET_SRC #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22) ? ip_hdr(skb)->saddr : ip_hdr(skb)->daddr), #else ? skb->nh.iph->saddr : skb->nh.iph->daddr), #endif hash_ip); return (res < 0 ? 0 : res); } #define ADDIP_WALK(map, elem, branch, type, cachep) do { \ if ((map)->tree[elem]) { \ DP("found %u", elem); \ branch = (map)->tree[elem]; \ } else { \ branch = (type *) \ kmem_cache_alloc(cachep, GFP_ATOMIC); \ if (branch == NULL) \ return -ENOMEM; \ memset(branch, 0, sizeof(*branch)); \ (map)->tree[elem] = branch; \ DP("alloc %u", elem); \ } \ } while (0) static inline int __addip(struct ip_set *set, ip_set_ip_t ip, unsigned int timeout, ip_set_ip_t *hash_ip) { struct ip_set_iptree *map = set->data; struct ip_set_iptreeb *btree; struct ip_set_iptreec *ctree; struct ip_set_iptreed *dtree; unsigned char a,b,c,d; int ret = 0; if (!ip || map->elements >= limit) /* We could call the garbage collector * but it's probably overkill */ return -ERANGE; *hash_ip = ip; ABCD(a, b, c, d, hash_ip); DP("%u %u %u %u timeout %u", a, b, c, d, timeout); ADDIP_WALK(map, a, btree, struct ip_set_iptreeb, branch_cachep); ADDIP_WALK(btree, b, ctree, struct ip_set_iptreec, branch_cachep); ADDIP_WALK(ctree, c, dtree, struct ip_set_iptreed, leaf_cachep); if (dtree->expires[d] && (!map->timeout || time_after(dtree->expires[d], jiffies))) ret = -EEXIST; dtree->expires[d] = map->timeout ? (timeout * HZ + jiffies) : 1; /* Lottery: I won! */ if (dtree->expires[d] == 0) dtree->expires[d] = 1; DP("%u %lu", d, dtree->expires[d]); if (ret == 0) map->elements++; return ret; } static int addip(struct ip_set *set, const void *data, size_t size, ip_set_ip_t *hash_ip) { struct ip_set_iptree *map = set->data; const struct ip_set_req_iptree *req = data; if (size != sizeof(struct ip_set_req_iptree)) { ip_set_printk("data length wrong (want %zu, have %zu)", sizeof(struct ip_set_req_iptree), size); return -EINVAL; } DP("%u.%u.%u.%u %u", HIPQUAD(req->ip), req->timeout); return __addip(set, req->ip, req->timeout ? req->timeout : map->timeout, hash_ip); } static int addip_kernel(struct ip_set *set, const struct sk_buff *skb, ip_set_ip_t *hash_ip, const u_int32_t *flags, unsigned char index) { struct ip_set_iptree *map = set->data; return __addip(set, ntohl(flags[index] & IPSET_SRC #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22) ? ip_hdr(skb)->saddr : ip_hdr(skb)->daddr), #else ? skb->nh.iph->saddr : skb->nh.iph->daddr), #endif map->timeout, hash_ip); } #define DELIP_WALK(map, elem, branch) do { \ if ((map)->tree[elem]) { \ branch = (map)->tree[elem]; \ } else \ return -EEXIST; \ } while (0) static inline int __delip(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip) { struct ip_set_iptree *map = set->data; struct ip_set_iptreeb *btree; struct ip_set_iptreec *ctree; struct ip_set_iptreed *dtree; unsigned char a,b,c,d; if (!ip) return -ERANGE; *hash_ip = ip; ABCD(a, b, c, d, hash_ip); DELIP_WALK(map, a, btree); DELIP_WALK(btree, b, ctree); DELIP_WALK(ctree, c, dtree); if (dtree->expires[d]) { dtree->expires[d] = 0; map->elements--; return 0; } return -EEXIST; } static int delip(struct ip_set *set, const void *data, size_t size, ip_set_ip_t *hash_ip) { const struct ip_set_req_iptree *req = data; if (size != sizeof(struct ip_set_req_iptree)) { ip_set_printk("data length wrong (want %zu, have %zu)", sizeof(struct ip_set_req_iptree), size); return -EINVAL; } return __delip(set, req->ip, hash_ip); } static int delip_kernel(struct ip_set *set, const struct sk_buff *skb, ip_set_ip_t *hash_ip, const u_int32_t *flags, unsigned char index) { return __delip(set, ntohl(flags[index] & IPSET_SRC #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22) ? ip_hdr(skb)->saddr : ip_hdr(skb)->daddr), #else ? skb->nh.iph->saddr : skb->nh.iph->daddr), #endif hash_ip); } #define LOOP_WALK_BEGIN(map, i, branch) \ for (i = 0; i < 256; i++) { \ if (!(map)->tree[i]) \ continue; \ branch = (map)->tree[i] #define LOOP_WALK_END } static void ip_tree_gc(unsigned long ul_set) { struct ip_set *set = (struct ip_set *) ul_set; struct ip_set_iptree *map = set->data; struct ip_set_iptreeb *btree; struct ip_set_iptreec *ctree; struct ip_set_iptreed *dtree; unsigned int a,b,c,d; unsigned char i,j,k; i = j = k = 0; DP("gc: %s", set->name); write_lock_bh(&set->lock); LOOP_WALK_BEGIN(map, a, btree); LOOP_WALK_BEGIN(btree, b, ctree); LOOP_WALK_BEGIN(ctree, c, dtree); for (d = 0; d < 256; d++) { if (dtree->expires[d]) { DP("gc: %u %u %u %u: expires %lu jiffies %lu", a, b, c, d, dtree->expires[d], jiffies); if (map->timeout && time_before(dtree->expires[d], jiffies)) { dtree->expires[d] = 0; map->elements--; } else k = 1; } } if (k == 0) { DP("gc: %s: leaf %u %u %u empty", set->name, a, b, c); kmem_cache_free(leaf_cachep, dtree); ctree->tree[c] = NULL; } else { DP("gc: %s: leaf %u %u %u not empty", set->name, a, b, c); j = 1; k = 0; } LOOP_WALK_END; if (j == 0) { DP("gc: %s: branch %u %u empty", set->name, a, b); kmem_cache_free(branch_cachep, ctree); btree->tree[b] = NULL; } else { DP("gc: %s: branch %u %u not empty", set->name, a, b); i = 1; j = k = 0; } LOOP_WALK_END; if (i == 0) { DP("gc: %s: branch %u empty", set->name, a); kmem_cache_free(branch_cachep, btree); map->tree[a] = NULL; } else { DP("gc: %s: branch %u not empty", set->name, a); i = j = k = 0; } LOOP_WALK_END; write_unlock_bh(&set->lock); map->gc.expires = jiffies + map->gc_interval * HZ; add_timer(&map->gc); } static inline void init_gc_timer(struct ip_set *set) { struct ip_set_iptree *map = set->data; /* Even if there is no timeout for the entries, * we still have to call gc because delete * do not clean up empty branches */ map->gc_interval = IPTREE_GC_TIME; init_timer(&map->gc); map->gc.data = (unsigned long) set; map->gc.function = ip_tree_gc; map->gc.expires = jiffies + map->gc_interval * HZ; add_timer(&map->gc); } static int create(struct ip_set *set, const void *data, size_t size) { const struct ip_set_req_iptree_create *req = data; struct ip_set_iptree *map; if (size != sizeof(struct ip_set_req_iptree_create)) { ip_set_printk("data length wrong (want %zu, have %zu)", sizeof(struct ip_set_req_iptree_create), size); return -EINVAL; } map = kmalloc(sizeof(struct ip_set_iptree), GFP_KERNEL); if (!map) { DP("out of memory for %d bytes", sizeof(struct ip_set_iptree)); return -ENOMEM; } memset(map, 0, sizeof(*map)); map->timeout = req->timeout; map->elements = 0; set->data = map; init_gc_timer(set); return 0; } static void __flush(struct ip_set_iptree *map) { struct ip_set_iptreeb *btree; struct ip_set_iptreec *ctree; struct ip_set_iptreed *dtree; unsigned int a,b,c; LOOP_WALK_BEGIN(map, a, btree); LOOP_WALK_BEGIN(btree, b, ctree); LOOP_WALK_BEGIN(ctree, c, dtree); kmem_cache_free(leaf_cachep, dtree); LOOP_WALK_END; kmem_cache_free(branch_cachep, ctree); LOOP_WALK_END; kmem_cache_free(branch_cachep, btree); LOOP_WALK_END; map->elements = 0; } static void destroy(struct ip_set *set) { struct ip_set_iptree *map = set->data; /* gc might be running */ while (!del_timer(&map->gc)) msleep(IPTREE_DESTROY_SLEEP); __flush(map); kfree(map); set->data = NULL; } static void flush(struct ip_set *set) { struct ip_set_iptree *map = set->data; unsigned int timeout = map->timeout; /* gc might be running */ while (!del_timer(&map->gc)) msleep(IPTREE_DESTROY_SLEEP); __flush(map); memset(map, 0, sizeof(*map)); map->timeout = timeout; init_gc_timer(set); } static void list_header(const struct ip_set *set, void *data) { const struct ip_set_iptree *map = set->data; struct ip_set_req_iptree_create *header = data; header->timeout = map->timeout; } static int list_members_size(const struct ip_set *set) { const struct ip_set_iptree *map = set->data; struct ip_set_iptreeb *btree; struct ip_set_iptreec *ctree; struct ip_set_iptreed *dtree; unsigned int a,b,c,d; unsigned int count = 0; LOOP_WALK_BEGIN(map, a, btree); LOOP_WALK_BEGIN(btree, b, ctree); LOOP_WALK_BEGIN(ctree, c, dtree); for (d = 0; d < 256; d++) { if (dtree->expires[d] && (!map->timeout || time_after(dtree->expires[d], jiffies))) count++; } LOOP_WALK_END; LOOP_WALK_END; LOOP_WALK_END; DP("members %u", count); return (count * sizeof(struct ip_set_req_iptree)); } static void list_members(const struct ip_set *set, void *data) { const struct ip_set_iptree *map = set->data; struct ip_set_iptreeb *btree; struct ip_set_iptreec *ctree; struct ip_set_iptreed *dtree; unsigned int a,b,c,d; size_t offset = 0; struct ip_set_req_iptree *entry; LOOP_WALK_BEGIN(map, a, btree); LOOP_WALK_BEGIN(btree, b, ctree); LOOP_WALK_BEGIN(ctree, c, dtree); for (d = 0; d < 256; d++) { if (dtree->expires[d] && (!map->timeout || time_after(dtree->expires[d], jiffies))) { entry = data + offset; entry->ip = ((a << 24) | (b << 16) | (c << 8) | d); entry->timeout = !map->timeout ? 0 : (dtree->expires[d] - jiffies)/HZ; offset += sizeof(struct ip_set_req_iptree); } } LOOP_WALK_END; LOOP_WALK_END; LOOP_WALK_END; } static struct ip_set_type ip_set_iptree = { .typename = SETTYPE_NAME, .features = IPSET_TYPE_IP | IPSET_DATA_SINGLE, .protocol_version = IP_SET_PROTOCOL_VERSION, .create = &create, .destroy = &destroy, .flush = &flush, .reqsize = sizeof(struct ip_set_req_iptree), .addip = &addip, .addip_kernel = &addip_kernel, .delip = &delip, .delip_kernel = &delip_kernel, .testip = &testip, .testip_kernel = &testip_kernel, .header_size = sizeof(struct ip_set_req_iptree_create), .list_header = &list_header, .list_members_size = &list_members_size, .list_members = &list_members, .me = THIS_MODULE, }; MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); MODULE_DESCRIPTION("iptree type of IP sets"); module_param(limit, int, 0600); MODULE_PARM_DESC(limit, "maximal number of elements stored in the sets"); static int __init ip_set_iptree_init(void) { int ret; #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23) branch_cachep = kmem_cache_create("ip_set_iptreeb", sizeof(struct ip_set_iptreeb), 0, 0, NULL); #else branch_cachep = kmem_cache_create("ip_set_iptreeb", sizeof(struct ip_set_iptreeb), 0, 0, NULL, NULL); #endif if (!branch_cachep) { printk(KERN_ERR "Unable to create ip_set_iptreeb slab cache\n"); ret = -ENOMEM; goto out; } #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23) leaf_cachep = kmem_cache_create("ip_set_iptreed", sizeof(struct ip_set_iptreed), 0, 0, NULL); #else leaf_cachep = kmem_cache_create("ip_set_iptreed", sizeof(struct ip_set_iptreed), 0, 0, NULL, NULL); #endif if (!leaf_cachep) { printk(KERN_ERR "Unable to create ip_set_iptreed slab cache\n"); ret = -ENOMEM; goto free_branch; } ret = ip_set_register_set_type(&ip_set_iptree); if (ret == 0) goto out; kmem_cache_destroy(leaf_cachep); free_branch: kmem_cache_destroy(branch_cachep); out: return ret; } static void __exit ip_set_iptree_fini(void) { /* FIXME: possible race with ip_set_create() */ ip_set_unregister_set_type(&ip_set_iptree); kmem_cache_destroy(leaf_cachep); kmem_cache_destroy(branch_cachep); } module_init(ip_set_iptree_init); module_exit(ip_set_iptree_fini);