1 files changed, 308 insertions, 0 deletions
diff --git a/src/cache-exp.c b/src/cache-exp.c
new file mode 100644
index 0000000..e88877a
--- /dev/null
+++ b/src/cache-exp.c
@@ -0,0 +1,308 @@
+/*
+ * (C) 2006-2011 by Pablo Neira Ayuso <pablo@netfilter.org>
+ * (C) 2011 by Vyatta Inc. <http://www.vyatta.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "cache.h"
+#include "hash.h"
+#include "log.h"
+#include "conntrackd.h"
+#include "netlink.h"
+#include "event.h"
+#include "jhash.h"
+#include "network.h"
+
+#include <errno.h>
+#include <string.h>
+#include <time.h>
+#include <libnetfilter_conntrack/libnetfilter_conntrack.h>
+
+static uint32_t
+cache_hash4_exp(const struct nf_conntrack *ct, const struct hashtable *table)
+{
+	uint32_t a[4] = {
+		[0]	= nfct_get_attr_u32(ct, ATTR_IPV4_SRC),
+		[1]	= nfct_get_attr_u32(ct, ATTR_IPV4_DST),
+		[2]	= nfct_get_attr_u8(ct, ATTR_L3PROTO) << 16 |
+			  nfct_get_attr_u8(ct, ATTR_L4PROTO),
+		[3]	= nfct_get_attr_u16(ct, ATTR_PORT_SRC) << 16 |
+			  nfct_get_attr_u16(ct, ATTR_PORT_DST),
+	};
+
+	/*
+	 * Instead of returning hash % table->hashsize (implying a divide)
+	 * we return the high 32 bits of the (hash * table->hashsize) that will
+	 * give results between [0 and hashsize-1] and same hash distribution,
+	 * but using a multiply, less expensive than a divide. See:
+	 * http://www.mail-archive.com/netdev@vger.kernel.org/msg56623.html
+	 */
+	return ((uint64_t)jhash2(a, 4, 0) * table->hashsize) >> 32;
+}
+
+static uint32_t
+cache_hash6_exp(const struct nf_conntrack *ct, const struct hashtable *table)
+{
+	uint32_t a[10];
+
+	memcpy(&a[0], nfct_get_attr(ct, ATTR_IPV6_SRC), sizeof(uint32_t)*4);
+	memcpy(&a[4], nfct_get_attr(ct, ATTR_IPV6_SRC), sizeof(uint32_t)*4);
+	a[8] = nfct_get_attr_u8(ct, ATTR_ORIG_L3PROTO) << 16 |
+	       nfct_get_attr_u8(ct, ATTR_ORIG_L4PROTO);
+	a[9] = nfct_get_attr_u16(ct, ATTR_ORIG_PORT_SRC) << 16 |
+	       nfct_get_attr_u16(ct, ATTR_ORIG_PORT_DST);
+
+	return ((uint64_t)jhash2(a, 10, 0) * table->hashsize) >> 32;
+}
+
+static uint32_t
+cache_exp_hash(const void *data, const struct hashtable *table)
+{
+	int ret = 0;
+	const struct nf_expect *exp = data;
+	const struct nf_conntrack *ct = nfexp_get_attr(exp, ATTR_EXP_MASTER);
+
+	switch(nfct_get_attr_u8(ct, ATTR_L3PROTO)) {
+		case AF_INET:
+			ret = cache_hash4_exp(ct, table);
+			break;
+		case AF_INET6:
+			ret = cache_hash6_exp(ct, table);
+			break;
+		default:
+			dlog(LOG_ERR, "unknown layer 3 proto in hash");
+			break;
+	}
+	return ret;
+}
+
+static int cache_exp_cmp(const void *data1, const void *data2)
+{
+	const struct cache_object *obj = data1;
+	const struct nf_expect *exp = data2;
+
+	return nfexp_cmp(obj->ptr, exp, 0);
+}
+
+static void *cache_exp_alloc(void)
+{
+	return nfexp_new();
+}
+
+static void cache_exp_free(void *ptr)
+{
+	nfexp_destroy(ptr);
+}
+
+static void cache_exp_copy(void *dst, void *src, unsigned int flags)
+{
+	/* XXX: add nfexp_copy(...) to libnetfilter_conntrack. */
+	memcpy(dst, src, nfexp_maxsize());
+}
+
+static int cache_exp_dump_step(void *data1, void *n)
+{
+	char buf[1024];
+	int size;
+	struct __dump_container *container = data1;
+	struct cache_object *obj = n;
+	char *data = obj->data;
+	unsigned i;
+
+	/*
+	 * XXX: Do not dump the entries that are scheduled to expire.
+	 *	These entries talk about already destroyed connections
+	 *	that we keep for some time just in case that we have to
+	 *	resent some lost messages. We do not show them to the
+	 *	user as he may think that the firewall replicas are not
+	 *	in sync. The branch below is a hack as it is quite
+	 *	specific and it breaks conntrackd modularity. Probably
+	 *	there's a nicer way to do this but until I come up with it...
+	 */
+	if (CONFIG(flags) & CTD_SYNC_FTFW && obj->status == C_OBJ_DEAD)
+		return 0;
+
+	/* do not show cached timeout, this may confuse users */
+	if (nfexp_attr_is_set(obj->ptr, ATTR_EXP_TIMEOUT))
+		nfexp_attr_unset(obj->ptr, ATTR_EXP_TIMEOUT);
+
+	memset(buf, 0, sizeof(buf));
+	size = nfexp_snprintf(buf, sizeof(buf),obj->ptr,
+			      NFCT_T_UNKNOWN, container->type, 0);
+
+	for (i = 0; i < obj->cache->num_features; i++) {
+		if (obj->cache->features[i]->dump) {
+			size += obj->cache->features[i]->dump(obj, data,
+							      buf+size,
+							      container->type);
+			data += obj->cache->features[i]->size;
+		}
+	}
+	if (container->type != NFCT_O_XML) {
+		long tm = time(NULL);
+		size += sprintf(buf+size, " [active since %lds]",
+				tm - obj->lifetime);
+	}
+	size += sprintf(buf+size, "\n");
+	if (send(container->fd, buf, size, 0) == -1) {
+		if (errno != EPIPE)
+			return -1;
+	}
+
+	return 0;
+}
+
+static int cache_exp_commit_step(void *data, void *n)
+{
+	struct cache_object *obj = n;
+	struct __commit_container *tmp = data;
+	int ret, retry = 1, timeout;
+	struct nf_expect *exp = obj->ptr;
+
+	if (CONFIG(commit_timeout)) {
+		timeout = CONFIG(commit_timeout);
+	} else {
+		timeout = time(NULL) - obj->lastupdate;
+		if (timeout < 0) {
+			/* XXX: Arbitrarily set the timer to one minute, how
+			 * can this happen? For example, an adjustment due to
+			 * daylight-saving. Probably other situations can
+			 * trigger this. */
+			timeout = 60;
+		}
+		/* calculate an estimation of the current timeout */
+		timeout = nfexp_get_attr_u32(exp, ATTR_EXP_TIMEOUT) - timeout;
+		if (timeout < 0) {
+			timeout = 60;
+		}
+	}
+
+retry:
+	if (nl_create_expect(tmp->h, exp, timeout) == -1) {
+		if (errno == EEXIST && retry == 1) {
+			ret = nl_destroy_expect(tmp->h, exp);
+			if (ret == 0 || (ret == -1 && errno == ENOENT)) {
+				if (retry) {
+					retry = 0;
+					goto retry;
+				}
+			}
+			dlog(LOG_ERR, "commit-destroy: %s", strerror(errno));
+			dlog_exp(STATE(log), exp, NFCT_O_PLAIN);
+			tmp->c->stats.commit_fail++;
+		} else {
+			dlog(LOG_ERR, "commit-create: %s", strerror(errno));
+			dlog_exp(STATE(log), exp, NFCT_O_PLAIN);
+			tmp->c->stats.commit_fail++;
+		}
+	} else {
+		tmp->c->stats.commit_ok++;
+	}
+	/* keep iterating even if we have found errors */
+	return 0;
+}
+
+static int
+cache_exp_commit(struct cache *c, struct nfct_handle *h, int clientfd)
+{
+	unsigned int commit_ok, commit_fail;
+	struct timeval commit_stop, res;
+	struct __commit_container tmp = {
+		.h = h,
+		.c = c,
+	};
+
+	/* we already have one commit in progress, skip this. The clientfd
+	 * descriptor has to be closed by the caller. */
+	if (clientfd && STATE_SYNC(commit).clientfd != -1)
+		return -1;
+
+	switch(STATE_SYNC(commit).state) {
+	case COMMIT_STATE_INACTIVE:
+		gettimeofday(&STATE_SYNC(commit).stats.start, NULL);
+		STATE_SYNC(commit).stats.ok = c->stats.commit_ok;
+		STATE_SYNC(commit).stats.fail = c->stats.commit_fail;
+		STATE_SYNC(commit).clientfd = clientfd;
+	case COMMIT_STATE_MASTER:
+		STATE_SYNC(commit).current =
+			hashtable_iterate_limit(c->h, &tmp,
+						STATE_SYNC(commit).current,
+						CONFIG(general).commit_steps,
+						cache_exp_commit_step);
+		if (STATE_SYNC(commit).current < CONFIG(hashsize)) {
+			STATE_SYNC(commit).state = COMMIT_STATE_MASTER;
+			/* give it another step as soon as possible */
+			write_evfd(STATE_SYNC(commit).evfd);
+			return 1;
+		}
+
+		/* calculate the time that commit has taken */
+		gettimeofday(&commit_stop, NULL);
+		timersub(&commit_stop, &STATE_SYNC(commit).stats.start, &res);
+
+		/* calculate new entries committed */
+		commit_ok = c->stats.commit_ok - STATE_SYNC(commit).stats.ok;
+		commit_fail =
+			c->stats.commit_fail - STATE_SYNC(commit).stats.fail;
+
+		/* log results */
+		dlog(LOG_NOTICE, "Committed %u new expectations", commit_ok);
+
+		if (commit_fail)
+			dlog(LOG_NOTICE, "%u expectations can't be "
+					 "committed", commit_fail);
+
+		dlog(LOG_NOTICE, "commit has taken %lu.%06lu seconds",
+			res.tv_sec, res.tv_usec);
+
+		/* prepare the state machine for new commits */
+		STATE_SYNC(commit).current = 0;
+		STATE_SYNC(commit).state = COMMIT_STATE_INACTIVE;
+
+		return 0;
+	}
+	return 1;
+}
+
+static struct nethdr *
+cache_exp_build_msg(const struct cache_object *obj, int type)
+{
+	return BUILD_NETMSG_FROM_EXP(obj->ptr, type);
+}
+
+/* template to cache expectations coming from the kernel. */
+struct cache_ops cache_sync_internal_exp_ops = {
+	.hash		= cache_exp_hash,
+	.cmp		= cache_exp_cmp,
+	.alloc		= cache_exp_alloc,
+	.free		= cache_exp_free,
+	.copy		= cache_exp_copy,
+	.dump_step	= cache_exp_dump_step,
+	.commit		= NULL,
+	.build_msg	= cache_exp_build_msg,
+};
+
+/* template to cache expectations coming from the network. */
+struct cache_ops cache_sync_external_exp_ops = {
+	.hash		= cache_exp_hash,
+	.cmp		= cache_exp_cmp,
+	.alloc		= cache_exp_alloc,
+	.free		= cache_exp_free,
+	.copy		= cache_exp_copy,
+	.dump_step	= cache_exp_dump_step,
+	.commit		= cache_exp_commit,
+	.build_msg	= NULL,
+};