From f72bf0ed59d14270d7b820626f9c7a7c67f40c00 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 2 Jun 2008 01:36:48 +0200 Subject: rework NFCT to use a generic hashtable This patch introduces a generic hashtable to store the nf_conntrack objects. The objects are identified by the original and reply tuples instead of the conntrack ID which is not dumped in the event message of linux kernel < 2.6.25. This patch also fixes the NFCT_MSG_* by NFCT_T_* which is the appropriate message type tag. --- input/flow/ulogd_inpflow_NFCT.c | 264 ++++++++++++++++++---------------------- 1 file changed, 120 insertions(+), 144 deletions(-) (limited to 'input/flow') diff --git a/input/flow/ulogd_inpflow_NFCT.c b/input/flow/ulogd_inpflow_NFCT.c index 5e5af87..a7d5d1f 100644 --- a/input/flow/ulogd_inpflow_NFCT.c +++ b/input/flow/ulogd_inpflow_NFCT.c @@ -13,12 +13,13 @@ * Added timestamp accounting support of the conntrack entries, * reworked by Harald Welte. * + * 11 May 2008, Pablo Neira Ayuso + * Use a generic hashtable to store the existing flows + * * TODO: * - add nanosecond-accurate packet receive timestamp of event-changing * packets to {ip,nf}_conntrack_netlink, so we can have accurate IPFIX * flowStart / flowEnd NanoSeconds. - * - if using preallocated data structure, get rid of all list heads and - * use per-bucket arrays instead. * - SIGHUP for reconfiguration without loosing hash table contents, but * re-read of config and reallocation / rehashing of table, if required * - Split hashtable code into separate [filter] plugin, so we can run @@ -34,6 +35,8 @@ #include #include #include +#include +#include #include #include @@ -44,24 +47,15 @@ typedef enum TIMES_ { START, STOP, __TIME_MAX } TIMES; struct ct_timestamp { - struct llist_head list; struct timeval time[__TIME_MAX]; - int id; -}; - -struct ct_htable { - struct llist_head *buckets; - int num_buckets; - int prealloc; - struct llist_head idle; - struct ct_timestamp *ts; + struct nf_conntrack *ct; }; struct nfct_pluginstance { struct nfct_handle *cth; struct ulogd_fd nfct_fd; struct ulogd_timer timer; - struct ct_htable *ct_active; + struct hashtable *ct_active; }; #define HTABLE_SIZE (8192) @@ -69,7 +63,7 @@ struct nfct_pluginstance { #define EVENT_MASK NF_NETLINK_CONNTRACK_NEW | NF_NETLINK_CONNTRACK_DESTROY static struct config_keyset nfct_kset = { - .num_ces = 6, + .num_ces = 5, .ces = { { .key = "pollinterval", @@ -83,12 +77,6 @@ static struct config_keyset nfct_kset = { .options = CONFIG_OPT_NONE, .u.value = 1, }, - { - .key = "hash_prealloc", - .type = CONFIG_TYPE_INT, - .options = CONFIG_OPT_NONE, - .u.value = 1, - }, { .key = "hash_buckets", .type = CONFIG_TYPE_INT, @@ -112,10 +100,9 @@ static struct config_keyset nfct_kset = { }; #define pollint_ce(x) (x->ces[0]) #define usehash_ce(x) (x->ces[1]) -#define prealloc_ce(x) (x->ces[2]) -#define buckets_ce(x) (x->ces[3]) -#define maxentries_ce(x) (x->ces[4]) -#define eventmask_ce(x) (x->ces[5]) +#define buckets_ce(x) (x->ces[2]) +#define maxentries_ce(x) (x->ces[3]) +#define eventmask_ce(x) (x->ces[4]) enum nfct_keys { NFCT_ORIG_IP_SADDR = 0, @@ -366,117 +353,68 @@ static struct ulogd_key nfct_okeys[] = { }, }; -static struct ct_htable *htable_alloc(int htable_size, int prealloc) +static uint32_t __hash4(const struct nf_conntrack *ct, struct hashtable *table) { - struct ct_htable *htable; - struct ct_timestamp *ct; - int i; - - htable = malloc(sizeof(*htable) - + sizeof(struct llist_head)*htable_size); - if (!htable) - return NULL; - - htable->buckets = (void *)htable + sizeof(*htable); - htable->num_buckets = htable_size; - htable->prealloc = prealloc; - INIT_LLIST_HEAD(&htable->idle); - - for (i = 0; i < htable->num_buckets; i++) - INIT_LLIST_HEAD(&htable->buckets[i]); - - if (!htable->prealloc) - return htable; - - ct = malloc(sizeof(struct ct_timestamp) - * htable->num_buckets * htable->prealloc); - if (!ct) { - free(htable); - return NULL; - } - - /* save the pointer for later free()ing */ - htable->ts = ct; - - for (i = 0; i < htable->num_buckets * htable->prealloc; i++) - llist_add(&ct[i].list, &htable->idle); - - return htable; + unsigned int a, b; + + a = jhash(nfct_get_attr(ct, ATTR_ORIG_IPV4_SRC), sizeof(uint32_t), + ((nfct_get_attr_u8(ct, ATTR_ORIG_L3PROTO) << 16) | + (nfct_get_attr_u8(ct, ATTR_ORIG_L4PROTO)))); + + b = jhash(nfct_get_attr(ct, ATTR_ORIG_IPV4_DST), sizeof(uint32_t), + ((nfct_get_attr_u16(ct, ATTR_ORIG_PORT_SRC) << 16) | + (nfct_get_attr_u16(ct, ATTR_ORIG_PORT_DST)))); + + /* + * Instead of returning hash % table->hashsize (implying a divide) + * we return the high 32 bits of the (hash * table->hashsize) that will + * give results between [0 and hashsize-1] and same hash distribution, + * but using a multiply, less expensive than a divide. See: + * http://www.mail-archive.com/netdev@vger.kernel.org/msg56623.html + */ + return ((uint64_t)jhash_2words(a, b, 0) * table->hashsize) >> 32; } -static void htable_free(struct ct_htable *htable) +static uint32_t __hash6(const struct nf_conntrack *ct, struct hashtable *table) { - struct llist_head *ptr, *ptr2; - int i; + unsigned int a, b; - if (htable->prealloc) { - /* the easy case */ - free(htable->ts); - free(htable); + a = jhash(nfct_get_attr(ct, ATTR_ORIG_IPV6_SRC), sizeof(uint32_t)*4, + ((nfct_get_attr_u8(ct, ATTR_ORIG_L3PROTO) << 16) | + (nfct_get_attr_u8(ct, ATTR_ORIG_L4PROTO)))); - return; - } - - /* non-prealloc case */ - - for (i = 0; i < htable->num_buckets; i++) { - llist_for_each_safe(ptr, ptr2, &htable->buckets[i]) - free(container_of(ptr, struct ct_timestamp, list)); - } + b = jhash(nfct_get_attr(ct, ATTR_ORIG_IPV6_DST), sizeof(uint32_t)*4, + ((nfct_get_attr_u16(ct, ATTR_ORIG_PORT_SRC) << 16) | + (nfct_get_attr_u16(ct, ATTR_ORIG_PORT_DST)))); - /* don't need to check for 'idle' list, since it is only used in - * the preallocated case */ + return ((uint64_t)jhash_2words(a, b, 0) * table->hashsize) >> 32; } -static int ct_hash_add(struct ct_htable *htable, unsigned int id) +static uint32_t hash(const void *data, struct hashtable *table) { - struct ct_timestamp *ct; - - if (htable->prealloc) { - if (llist_empty(&htable->idle)) { - ulogd_log(ULOGD_ERROR, "Not enough ct_timestamp entries\n"); - return -1; - } - - ct = container_of(htable->idle.next, struct ct_timestamp, list); - - ct->id = id; - gettimeofday(&ct->time[START], NULL); - - llist_move(&ct->list, &htable->buckets[id % htable->num_buckets]); - } else { - ct = malloc(sizeof *ct); - if (!ct) { - ulogd_log(ULOGD_ERROR, "Not enough memory\n"); - return -1; - } - - ct->id = id; - gettimeofday(&ct->time[START], NULL); + int ret = 0; + const struct ct_timestamp *ts = data; - llist_add(&ct->list, &htable->buckets[id % htable->num_buckets]); + switch(nfct_get_attr_u8(ts->ct, ATTR_L3PROTO)) { + case AF_INET: + ret = __hash4(ts->ct, table); + break; + case AF_INET6: + ret = __hash6(ts->ct, table); + break; + default: + break; } - return 0; + return ret; } -static struct ct_timestamp *ct_hash_get(struct ct_htable *htable, uint32_t id) +static int compare(const void *data1, const void *data2) { - struct ct_timestamp *ct = NULL; - struct llist_head *ptr; - - llist_for_each(ptr, &htable->buckets[id % htable->num_buckets]) { - ct = container_of(ptr, struct ct_timestamp, list); - if (ct->id == id) { - gettimeofday(&ct->time[STOP], NULL); - if (htable->prealloc) - llist_move(&ct->list, &htable->idle); - else - free(ct); - break; - } - } - return ct; + const struct ct_timestamp *u1 = data1; + const struct ct_timestamp *u2 = data2; + + return nfct_cmp(u1->ct, u2->ct, NFCT_CMP_ORIG | NFCT_CMP_REPL); } static int propagate_ct(struct ulogd_pluginstance *upi, @@ -600,28 +538,69 @@ static int event_handler(enum nf_conntrack_msg_type type, struct nfct_pluginstance *cpi = (struct nfct_pluginstance *) upi->private; struct ct_timestamp *ts = NULL; + struct ct_timestamp tmp = { + .ct = ct, + }; struct ulogd_pluginstance *npi = NULL; int ret = 0; - if (type == NFCT_MSG_NEW) { - if (usehash_ce(upi->config_kset).u.value != 0) { - ct_hash_add(cpi->ct_active, nfct_get_attr_u32(ct, ATTR_ID)); - return 0; + if (!usehash_ce(upi->config_kset).u.value && type == NFCT_T_DESTROY) { + /* since we support the re-use of one instance in + * several different stacks, we duplicate the message + * to let them know */ + llist_for_each_entry(npi, &upi->plist, plist) { + ret = propagate_ct(npi, ct, type, ts); + if (ret != 0) + break; } - } else if (type == NFCT_MSG_DESTROY) { - if (usehash_ce(upi->config_kset).u.value != 0) - ts = ct_hash_get(cpi->ct_active, nfct_get_attr_u32(ct, ATTR_ID)); + + propagate_ct(upi, ct, type, ts); + + return NFCT_CB_CONTINUE; } - /* since we support the re-use of one instance in - * several different stacks, we duplicate the message - * to let them know */ - llist_for_each_entry(npi, &upi->plist, plist) { - ret = propagate_ct(npi, ct, type, ts); - if (ret != 0) - return ret; + switch(type) { + case NFCT_T_NEW: + ts = hashtable_add(cpi->ct_active, &tmp); + gettimeofday(&ts->time[START], NULL); + return NFCT_CB_STOLEN; + case NFCT_T_UPDATE: + ts = hashtable_get(cpi->ct_active, &tmp); + if (ts) + nfct_copy(ts->ct, ct, NFCT_CP_META); + else { + ts = hashtable_add(cpi->ct_active, &tmp); + gettimeofday(&ts->time[START], NULL); + return NFCT_CB_STOLEN; + } + break; + case NFCT_T_DESTROY: + ts = hashtable_get(cpi->ct_active, &tmp); + if (ts) + gettimeofday(&ts->time[STOP], NULL); + + /* since we support the re-use of one instance in + * several different stacks, we duplicate the message + * to let them know */ + llist_for_each_entry(npi, &upi->plist, plist) { + ret = propagate_ct(npi, ct, type, ts); + if (ret != 0) + break; + } + + propagate_ct(upi, ct, type, ts); + + if (ts) { + hashtable_del(cpi->ct_active, ts); + free(ts->ct); + } + break; + default: + ulogd_log(ULOGD_NOTICE, "unknown netlink message type\n"); + break; } - return propagate_ct(upi, ct, type, ts); + + return NFCT_CB_CONTINUE; } static int read_cb_nfct(int fd, unsigned int what, void *param) @@ -677,7 +656,6 @@ static int constructor_nfct(struct ulogd_pluginstance *upi) { struct nfct_pluginstance *cpi = (struct nfct_pluginstance *)upi->private; - int prealloc; cpi->cth = nfct_open(NFNL_SUBSYS_CTNETLINK, eventmask_ce(upi->config_kset).u.value); @@ -695,15 +673,13 @@ static int constructor_nfct(struct ulogd_pluginstance *upi) ulogd_register_fd(&cpi->nfct_fd); - if (prealloc_ce(upi->config_kset).u.value != 0) - prealloc = maxentries_ce(upi->config_kset).u.value / - buckets_ce(upi->config_kset).u.value; - else - prealloc = 0; - if (usehash_ce(upi->config_kset).u.value != 0) { - cpi->ct_active = htable_alloc(buckets_ce(upi->config_kset).u.value, - prealloc); + cpi->ct_active = + hashtable_create(buckets_ce(upi->config_kset).u.value, + maxentries_ce(upi->config_kset).u.value, + sizeof(struct ct_timestamp), + hash, + compare); if (!cpi->ct_active) { ulogd_log(ULOGD_FATAL, "error allocating hash\n"); nfct_close(cpi->cth); @@ -719,7 +695,7 @@ static int destructor_nfct(struct ulogd_pluginstance *pi) struct nfct_pluginstance *cpi = (void *) pi; int rc; - htable_free(cpi->ct_active); + hashtable_destroy(cpi->ct_active); rc = nfct_close(cpi->cth); if (rc < 0) -- cgit v1.2.3