From 065ad898cc6e9ca0323af440b13acca18bd244ed Mon Sep 17 00:00:00 2001 From: "/C=EU/ST=EU/CN=Pablo Neira Ayuso/emailAddress=pablo@netfilter.org" Date: Tue, 19 Feb 2008 18:53:07 +0000 Subject: - implement a synchronous timer framework - fix crash when enabling pollinterval clause in flow-based accounting --- src/Makefile.am | 2 +- src/rbtree.c | 389 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/select.c | 4 +- src/timer.c | 213 +++++++++++++++---------------- src/ulogd.c | 33 ++--- 5 files changed, 512 insertions(+), 129 deletions(-) create mode 100644 src/rbtree.c (limited to 'src') diff --git a/src/Makefile.am b/src/Makefile.am index 8093aef..67f404e 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -5,5 +5,5 @@ AM_CPPFLAGS = $(all_includes) -I$(top_srcdir)/include \ sbin_PROGRAMS = ulogd -ulogd_SOURCES = ulogd.c select.c timer.c conffile.c +ulogd_SOURCES = ulogd.c select.c timer.c rbtree.c conffile.c ulogd_LDFLAGS = -export-dynamic diff --git a/src/rbtree.c b/src/rbtree.c new file mode 100644 index 0000000..09adad0 --- /dev/null +++ b/src/rbtree.c @@ -0,0 +1,389 @@ +/* + Red Black Trees + (C) 1999 Andrea Arcangeli + (C) 2002 David Woodhouse + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + linux/lib/rbtree.c +*/ + +#include + +static void __rb_rotate_left(struct rb_node *node, struct rb_root *root) +{ + struct rb_node *right = node->rb_right; + struct rb_node *parent = rb_parent(node); + + if ((node->rb_right = right->rb_left)) + rb_set_parent(right->rb_left, node); + right->rb_left = node; + + rb_set_parent(right, parent); + + if (parent) + { + if (node == parent->rb_left) + parent->rb_left = right; + else + parent->rb_right = right; + } + else + root->rb_node = right; + rb_set_parent(node, right); +} + +static void __rb_rotate_right(struct rb_node *node, struct rb_root *root) +{ + struct rb_node *left = node->rb_left; + struct rb_node *parent = rb_parent(node); + + if ((node->rb_left = left->rb_right)) + rb_set_parent(left->rb_right, node); + left->rb_right = node; + + rb_set_parent(left, parent); + + if (parent) + { + if (node == parent->rb_right) + parent->rb_right = left; + else + parent->rb_left = left; + } + else + root->rb_node = left; + rb_set_parent(node, left); +} + +void rb_insert_color(struct rb_node *node, struct rb_root *root) +{ + struct rb_node *parent, *gparent; + + while ((parent = rb_parent(node)) && rb_is_red(parent)) + { + gparent = rb_parent(parent); + + if (parent == gparent->rb_left) + { + { + register struct rb_node *uncle = gparent->rb_right; + if (uncle && rb_is_red(uncle)) + { + rb_set_black(uncle); + rb_set_black(parent); + rb_set_red(gparent); + node = gparent; + continue; + } + } + + if (parent->rb_right == node) + { + register struct rb_node *tmp; + __rb_rotate_left(parent, root); + tmp = parent; + parent = node; + node = tmp; + } + + rb_set_black(parent); + rb_set_red(gparent); + __rb_rotate_right(gparent, root); + } else { + { + register struct rb_node *uncle = gparent->rb_left; + if (uncle && rb_is_red(uncle)) + { + rb_set_black(uncle); + rb_set_black(parent); + rb_set_red(gparent); + node = gparent; + continue; + } + } + + if (parent->rb_left == node) + { + register struct rb_node *tmp; + __rb_rotate_right(parent, root); + tmp = parent; + parent = node; + node = tmp; + } + + rb_set_black(parent); + rb_set_red(gparent); + __rb_rotate_left(gparent, root); + } + } + + rb_set_black(root->rb_node); +} + +static void __rb_erase_color(struct rb_node *node, struct rb_node *parent, + struct rb_root *root) +{ + struct rb_node *other; + + while ((!node || rb_is_black(node)) && node != root->rb_node) + { + if (parent->rb_left == node) + { + other = parent->rb_right; + if (rb_is_red(other)) + { + rb_set_black(other); + rb_set_red(parent); + __rb_rotate_left(parent, root); + other = parent->rb_right; + } + if ((!other->rb_left || rb_is_black(other->rb_left)) && + (!other->rb_right || rb_is_black(other->rb_right))) + { + rb_set_red(other); + node = parent; + parent = rb_parent(node); + } + else + { + if (!other->rb_right || rb_is_black(other->rb_right)) + { + struct rb_node *o_left; + if ((o_left = other->rb_left)) + rb_set_black(o_left); + rb_set_red(other); + __rb_rotate_right(other, root); + other = parent->rb_right; + } + rb_set_color(other, rb_color(parent)); + rb_set_black(parent); + if (other->rb_right) + rb_set_black(other->rb_right); + __rb_rotate_left(parent, root); + node = root->rb_node; + break; + } + } + else + { + other = parent->rb_left; + if (rb_is_red(other)) + { + rb_set_black(other); + rb_set_red(parent); + __rb_rotate_right(parent, root); + other = parent->rb_left; + } + if ((!other->rb_left || rb_is_black(other->rb_left)) && + (!other->rb_right || rb_is_black(other->rb_right))) + { + rb_set_red(other); + node = parent; + parent = rb_parent(node); + } + else + { + if (!other->rb_left || rb_is_black(other->rb_left)) + { + register struct rb_node *o_right; + if ((o_right = other->rb_right)) + rb_set_black(o_right); + rb_set_red(other); + __rb_rotate_left(other, root); + other = parent->rb_left; + } + rb_set_color(other, rb_color(parent)); + rb_set_black(parent); + if (other->rb_left) + rb_set_black(other->rb_left); + __rb_rotate_right(parent, root); + node = root->rb_node; + break; + } + } + } + if (node) + rb_set_black(node); +} + +void rb_erase(struct rb_node *node, struct rb_root *root) +{ + struct rb_node *child, *parent; + int color; + + if (!node->rb_left) + child = node->rb_right; + else if (!node->rb_right) + child = node->rb_left; + else + { + struct rb_node *old = node, *left; + + node = node->rb_right; + while ((left = node->rb_left) != NULL) + node = left; + child = node->rb_right; + parent = rb_parent(node); + color = rb_color(node); + + if (child) + rb_set_parent(child, parent); + if (parent == old) { + parent->rb_right = child; + parent = node; + } else + parent->rb_left = child; + + node->rb_parent_color = old->rb_parent_color; + node->rb_right = old->rb_right; + node->rb_left = old->rb_left; + + if (rb_parent(old)) + { + if (rb_parent(old)->rb_left == old) + rb_parent(old)->rb_left = node; + else + rb_parent(old)->rb_right = node; + } else + root->rb_node = node; + + rb_set_parent(old->rb_left, node); + if (old->rb_right) + rb_set_parent(old->rb_right, node); + goto color; + } + + parent = rb_parent(node); + color = rb_color(node); + + if (child) + rb_set_parent(child, parent); + if (parent) + { + if (parent->rb_left == node) + parent->rb_left = child; + else + parent->rb_right = child; + } + else + root->rb_node = child; + + color: + if (color == RB_BLACK) + __rb_erase_color(child, parent, root); +} + +/* + * This function returns the first node (in sort order) of the tree. + */ +struct rb_node *rb_first(struct rb_root *root) +{ + struct rb_node *n; + + n = root->rb_node; + if (!n) + return NULL; + while (n->rb_left) + n = n->rb_left; + return n; +} + +struct rb_node *rb_last(struct rb_root *root) +{ + struct rb_node *n; + + n = root->rb_node; + if (!n) + return NULL; + while (n->rb_right) + n = n->rb_right; + return n; +} + +struct rb_node *rb_next(struct rb_node *node) +{ + struct rb_node *parent; + + if (rb_parent(node) == node) + return NULL; + + /* If we have a right-hand child, go down and then left as far + as we can. */ + if (node->rb_right) { + node = node->rb_right; + while (node->rb_left) + node=node->rb_left; + return node; + } + + /* No right-hand children. Everything down and left is + smaller than us, so any 'next' node must be in the general + direction of our parent. Go up the tree; any time the + ancestor is a right-hand child of its parent, keep going + up. First time it's a left-hand child of its parent, said + parent is our 'next' node. */ + while ((parent = rb_parent(node)) && node == parent->rb_right) + node = parent; + + return parent; +} + +struct rb_node *rb_prev(struct rb_node *node) +{ + struct rb_node *parent; + + if (rb_parent(node) == node) + return NULL; + + /* If we have a left-hand child, go down and then right as far + as we can. */ + if (node->rb_left) { + node = node->rb_left; + while (node->rb_right) + node=node->rb_right; + return node; + } + + /* No left-hand children. Go up till we find an ancestor which + is a right-hand child of its parent */ + while ((parent = rb_parent(node)) && node == parent->rb_left) + node = parent; + + return parent; +} + +void rb_replace_node(struct rb_node *victim, struct rb_node *new, + struct rb_root *root) +{ + struct rb_node *parent = rb_parent(victim); + + /* Set the surrounding nodes to point to the replacement */ + if (parent) { + if (victim == parent->rb_left) + parent->rb_left = new; + else + parent->rb_right = new; + } else { + root->rb_node = new; + } + if (victim->rb_left) + rb_set_parent(victim->rb_left, new); + if (victim->rb_right) + rb_set_parent(victim->rb_right, new); + + /* Copy the pointers/colour from the victim to the replacement */ + *new = *victim; +} diff --git a/src/select.c b/src/select.c index 6344a45..b1cdb17 100644 --- a/src/select.c +++ b/src/select.c @@ -55,7 +55,7 @@ void ulogd_unregister_fd(struct ulogd_fd *fd) llist_del(&fd->list); } -int ulogd_select_main() +int ulogd_select_main(struct timeval *tv) { struct ulogd_fd *ufd; fd_set readset, writeset, exceptset; @@ -77,7 +77,7 @@ int ulogd_select_main() FD_SET(ufd->fd, &exceptset); } - i = select(maxfd+1, &readset, &writeset, &exceptset, NULL); + i = select(maxfd+1, &readset, &writeset, &exceptset, tv); if (i > 0) { /* call registered callback functions */ llist_for_each_entry(ufd, &ulogd_fds, list) { diff --git a/src/timer.c b/src/timer.c index dfc898f..c4da0a5 100644 --- a/src/timer.c +++ b/src/timer.c @@ -4,11 +4,15 @@ * * userspace logging daemon for the netfilter subsystem * + * (C) 2006-2008 by Pablo Neira Ayuso + * + * based on previous works by: + * * (C) 2000-2005 by Harald Welte * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation + * as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -17,151 +21,146 @@ * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Description: + * This is the timer framework for ulogd, it works together with select() + * so that the daemon only wakes up when there are timers expired to run. + * This approach is more simple than the previous signal-based implementation + * that could wake up the daemon while running at any part of the code. + * + * TODO: + * - This piece of code has been extracted from conntrackd. Probably + * ulogd doesn't require such O(log n) scalable timer framework. Anyhow, + * we can simplify this code using the same API later, that would be + * quite straight forward. */ -#include +#include #include -#include -#include -#include +#include -#include -#include +static struct rb_root alarm_root = RB_ROOT; -static LLIST_HEAD(ulogd_timers); - -static void tv_normalize(struct timeval *out) +void ulogd_init_timer(struct ulogd_timer *t, + void *data, + void (*cb)(struct ulogd_timer *a, void *data)) { - out->tv_sec += (out->tv_usec / 1000000); - out->tv_usec = (out->tv_usec % 1000000); + /* initialize the head to check whether a node is inserted */ + RB_CLEAR_NODE(&t->node); + timerclear(&t->tv); + t->data = data; + t->cb = cb; } -/* subtract two struct timevals */ -static int tv_sub(struct timeval *res, const struct timeval *from, - const struct timeval *sub) +static void __add_timer(struct ulogd_timer *alarm) { - /* FIXME: this stinks. Deal with wraps, carry, ... */ - res->tv_sec = from->tv_sec - sub->tv_sec; - res->tv_usec = from->tv_usec - sub->tv_usec; + struct rb_node **new = &(alarm_root.rb_node); + struct rb_node *parent = NULL; - return 0; -} + while (*new) { + struct ulogd_timer *this; -static int tv_add(struct timeval *res, const struct timeval *a1, - const struct timeval *a2) -{ - unsigned int carry; + this = container_of(*new, struct ulogd_timer, node); - res->tv_sec = a1->tv_sec + a2->tv_sec; - res->tv_usec = a1->tv_usec + a2->tv_usec; + parent = *new; + if (timercmp(&alarm->tv, &this->tv, <)) + new = &((*new)->rb_left); + else + new = &((*new)->rb_right); + } - tv_normalize(res); + rb_link_node(&alarm->node, parent, new); + rb_insert_color(&alarm->node, &alarm_root); } -static int tv_later(const struct timeval *expires, const struct timeval *now) +void ulogd_add_timer(struct ulogd_timer *alarm, unsigned long sc) { - if (expires->tv_sec < now->tv_sec) - return 0; - else if (expires->tv_sec > now->tv_sec) - return 1; - else /* if (expires->tv_sec == now->tv_sec */ { - if (expires->tv_usec >= now->tv_usec) - return 1; - } + struct timeval tv; - return 0; + ulogd_del_timer(alarm); + alarm->tv.tv_sec = sc; + alarm->tv.tv_usec = 0; + gettimeofday(&tv, NULL); + timeradd(&alarm->tv, &tv, &alarm->tv); + __add_timer(alarm); } -static int tv_smaller(const struct timeval *t1, const struct timeval *t2) +void ulogd_del_timer(struct ulogd_timer *alarm) { - return tv_later(t2, t1); + /* don't remove a non-inserted node */ + if (!RB_EMPTY_NODE(&alarm->node)) { + rb_erase(&alarm->node, &alarm_root); + RB_CLEAR_NODE(&alarm->node); + } } -static int calc_next_expiration(void) +int ulogd_timer_pending(struct ulogd_timer *alarm) { - struct ulogd_timer *cur; - struct timeval min, now, diff; - struct itimerval iti; - int ret; - -retry: - if (llist_empty(&ulogd_timers)) + if (RB_EMPTY_NODE(&alarm->node)) return 0; - llist_for_each_entry(cur, &ulogd_timers, list) { - if (ulogd_timers.next == &cur->list) - min = cur->expires; - - if (tv_smaller(&cur->expires, &min)) - min = cur->expires; - } - - if (tv_sub(&diff, &min, &now) < 0) { - /* FIXME: run expired timer callbacks */ - /* we cannot run timers from here since we might be - * called from register_timer() within check_n_run() */ - - /* FIXME: restart with next minimum timer */ - goto retry; - } - - /* re-set kernel timer */ - memset(&iti, 0, sizeof(iti)); - memcpy(&iti.it_value, &diff, sizeof(iti.it_value)); - ret = setitimer(ITIMER_REAL, &iti, NULL); - if (ret < 0) - return ret; - - return 0; + return 1; } -void ulogd_timer_check_n_run(void) +static struct timeval * +calculate_next_run(struct timeval *cand, + struct timeval *tv, + struct timeval *next_run) { - struct ulogd_timer *cur, *cur2; - struct timeval now; - - if (gettimeofday(&now, NULL) < 0) - return; - - llist_for_each_entry_safe(cur, cur2, &ulogd_timers, list) { - if (tv_later(&cur->expires, &now)) { - /* fist delete it from the list of timers */ - llist_del(&cur->list); - /* then call. called function can re-add it */ - (cur->cb)(cur->data); + if (cand->tv_sec != LONG_MAX) { + if (timercmp(cand, tv, >)) + timersub(cand, tv, next_run); + else { + /* loop again inmediately */ + next_run->tv_sec = 0; + next_run->tv_usec = 0; } + return next_run; } - - calc_next_expiration(); + return NULL; } - -int ulogd_register_timer(struct ulogd_timer *timer) +struct timeval *ulogd_get_next_timer_run(struct timeval *next_run) { - int ret; + struct rb_node *node; struct timeval tv; - ret = gettimeofday(&tv, NULL); - if (ret < 0) - return ret; + gettimeofday(&tv, NULL); + + node = rb_first(&alarm_root); + if (node) { + struct ulogd_timer *this; + this = container_of(node, struct ulogd_timer, node); + return calculate_next_run(&this->tv, &tv, next_run); + } + return NULL; +} + +struct timeval *ulogd_do_timer_run(struct timeval *next_run) +{ + struct llist_head alarm_run_queue; + struct rb_node *node; + struct ulogd_timer *this; + struct timeval tv; - /* convert expiration time into absoulte time */ - timer->expires.tv_sec += tv.tv_sec; - timer->expires.tv_usec += tv.tv_usec; + gettimeofday(&tv, NULL); - llist_add_tail(&timer->list, &ulogd_timers); + INIT_LLIST_HEAD(&alarm_run_queue); + for (node = rb_first(&alarm_root); node; node = rb_next(node)) { + this = container_of(node, struct ulogd_timer, node); - /* re-calculate next expiration */ - calc_next_expiration(); + if (timercmp(&this->tv, &tv, >)) + break; - return 0; -} + llist_add(&this->list, &alarm_run_queue); + } -void ulogd_unregister_timer(struct ulogd_timer *timer) -{ - llist_del(&timer->list); + llist_for_each_entry(this, &alarm_run_queue, list) { + rb_erase(&this->node, &alarm_root); + RB_CLEAR_NODE(&this->node); + this->cb(this, this->data); + } - /* re-calculate next expiration */ - calc_next_expiration(); + return ulogd_get_next_timer_run(next_run); } diff --git a/src/ulogd.c b/src/ulogd.c index de2fd96..da821ee 100644 --- a/src/ulogd.c +++ b/src/ulogd.c @@ -61,6 +61,7 @@ #include #include #include +#include #include #include #ifdef DEBUG @@ -828,27 +829,24 @@ out_buf: } -static int ulogd_main_loop(void) +static void ulogd_main_loop(void) { - int ret = 0; + int ret; + struct timeval next_alarm; + struct timeval *next = NULL; while (1) { - ret = ulogd_select_main(); - if (ret == 0) - continue; + /* XXX: signal blocking? */ + if (next != NULL && !timerisset(next)) + next = ulogd_do_timer_run(&next_alarm); + else + next = ulogd_get_next_timer_run(&next_alarm); - if (ret < 0) { - if (errno == -EINTR) - continue; - else { - ulogd_log(ULOGD_ERROR, "select returned %s\n", - strerror(errno)); - break; - } - } + ret = ulogd_select_main(next); + if (ret < 0 && errno != -EINTR) + ulogd_log(ULOGD_ERROR, "select says %s\n", + strerror(errno)); } - - return ret; } /* open the logfile */ @@ -953,9 +951,6 @@ static void signal_handler(int signal) sigterm_handler(signal); } break; - case SIGALRM: - ulogd_timer_check_n_run(); - break; default: break; } -- cgit v1.2.3