From 79a777c60cfe02197c135adcc4edb2f63ae9a695 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 19 Dec 2011 17:13:25 +0100 Subject: conntrackd: support for expectation synchronization This patch adds support to synchronize expectations between firewalls. This addition aims to re-use as much as possible of the existing infrastructure for stability reasons. The expectation support has been tested with the FTP helper. This extension requires libnetfilter_conntrack 1.0.0. If this is the first time you're playing with conntrackd, I *strongly* recommend you to get working setup of conntrackd without expectation support before as described in the documentation. Then, enabling expectation support is rather easy. To know more about expectations, if you're not familiar with them, I suggest you to read: "Netfilter's Connection Tracking System" http://people.netfilter.org/pablo/docs/login.pdf Reprinted from ;login: The Magazine of USENIX, vol. 31, no. 3 (Berkeley, CA: USENIX Association, 2006, pp40-45.) In short, expectations allow one Linux firewall to filter multi-flow traffic like FTP, SIP and H.323. In my testbed, there are two firewalls in a primary-backup configuration running keepalived. The use a couple of floating cluster IP address (192.168.0.100 and 192.168.1.100) that are used by the client. These firewalls protect one FTP server (192.168.1.2) that will be accessed by one client. In ASCII art, it looks like this: 192.168.0.100 192.168.1.100 eth1 eth2 fw-1 / \ FTP -- client ------ ------ server -- 192.168.0.2 \ / 192.168.1.2 fw-2 This is the rule-set for the firewalls: -A POSTROUTING -t nat -s 192.168.0.2/32 -d 192.168.1.2/32 -j SNAT --to-source 192.168.1.100 -A INPUT -p tcp -m tcp --dport 22 -j ACCEPT -A INPUT -m state --state INVALID -j DROP -A FORWARD -m state --state RELATED -j ACCEPT -A FORWARD -i eth2 -m state --state ESTABLISHED -j ACCEPT -A FORWARD -i eth1 -p tcp -m tcp --dport 21 --tcp-flags FIN,SYN,RST,ACK SYN -m state --state NEW -j ACCEPT -A FORWARD -i eth1 -p tcp -m state --state ESTABLISHED -j ACCEPT -A FORWARD -m state --state INVALID -j LOG --log-prefix "invalid: " The following steps detail how to check that the expectation support works fine for conntrackd: 1) You have to enable the expectation support in the configuration file with the following option: Sync { ... Options { ExpectationSync { ftp sip h323 } } } This enables expectation synchronization for the FTP, SIP and H.323 helpers. You can alternatively use: Sync { ... Options { ExpectationSync On } } To enable expectation synchronization for all helpers. 2) Make sure you have loaded the FTP helper in both firewalls. root@fw1# modprobe nf_conntrack_ftp root@fw2# modprobe nf_conntrack_ftp 3) Switch to the client. Start one FTP control connection to one server that is protected by the firewalls, enter passive mode: (term-1) user@client$ nc 192.168.1.2 21 220 dummy FTP server USER anonymous 331 Please specify the password. PASS nothing 230 Login successful. PASV 227 Entering Passive Mode (192,168,1,2,163,11). This means that port 163*256+11=41739 will be used for the data traffic. Read this if you are not familiar with the FTP protocol: http://www.freefire.org/articles/ftpexample.php 3) Switch to fw-1 (primary) to check that the expectation is in the internal cache. root@fw1# conntrackd -i exp proto=6 src=192.168.0.2 dst=192.168.1.2 sport=0 dport=41739 mask-src=255.255.255.255 mask-dst=255.255.255.255 sport=0 dport=65535 master-src=192.168.0.2 master-dst=192.168.1.2 sport=36390 dport=21 [active since 5s] 4) Switch to fw-2 (backup) to check that the expectation has been successfully replicated. root@fw2# conntrackd -e exp proto=6 src=192.168.0.2 dst=192.168.1.2 sport=0 dport=41739 mask-src=255.255.255.255 mask-dst=255.255.255.255 sport=0 dport=65535 master-src=192.168.0.2 master-dst=192.168.1.2 sport=36390 dport=21 [active since 8s] 5) Make the primary firewall fw-1 fail. Now fw-2 becomes primary. 6) Switch to fw-2 (primary) to commit the external cache into the kernel. root@fw2# conntrackd -c exp The logs should display that the commit was successful: root@fw2# tail -100f /var/log/conntrackd.log [Wed Dec 7 22:16:31 2011] (pid=19195) [notice] committing external cache: expectations [Wed Dec 7 22:16:31 2011] (pid=19195) [notice] Committed 1 new entries [Wed Dec 7 22:16:31 2011] (pid=19195) [notice] commit has taken 0.000366 seconds 7) Switch to the client. Open a new terminal and connect to the port that has been announced by the server: (term-2) user@client$ nc -vvv 192.168.1.2 41739 (UNKNOWN) [192.168.1.2] 41739 (?) open 8) Switch to term-1 and ask for the file listing: [...] 227 Entering Passive Mode (192,168,1,2,163,11). LIST 9) Switch to term-2, it should display the listing. That means everything has worked fine. You may want to try disabling the expectation support and repeating the steps to check that *it does not work* without the state-synchronization. You can also display expectation statistics by means of: root@fwX# conntrackd -s exp This update requires no changes in the primary-backup.sh script that is used by the HA manager to interact with conntrackd. Thus, we provide a backward compatible command line interface. Regarding the Filter clause and expectations, we use the master conntrack to filter expectation events. The filtering is performed in user-space. No kernel-space filtering support for expectations yet (this support should go in libnetfilter_conntrack at some point). This patch also includes support to disable caching and to allow direct injection of expectations. Signed-off-by: Pablo Neira Ayuso --- src/cache-exp.c | 308 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 308 insertions(+) create mode 100644 src/cache-exp.c (limited to 'src/cache-exp.c') diff --git a/src/cache-exp.c b/src/cache-exp.c new file mode 100644 index 0000000..e88877a --- /dev/null +++ b/src/cache-exp.c @@ -0,0 +1,308 @@ +/* + * (C) 2006-2011 by Pablo Neira Ayuso + * (C) 2011 by Vyatta Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include "cache.h" +#include "hash.h" +#include "log.h" +#include "conntrackd.h" +#include "netlink.h" +#include "event.h" +#include "jhash.h" +#include "network.h" + +#include +#include +#include +#include + +static uint32_t +cache_hash4_exp(const struct nf_conntrack *ct, const struct hashtable *table) +{ + uint32_t a[4] = { + [0] = nfct_get_attr_u32(ct, ATTR_IPV4_SRC), + [1] = nfct_get_attr_u32(ct, ATTR_IPV4_DST), + [2] = nfct_get_attr_u8(ct, ATTR_L3PROTO) << 16 | + nfct_get_attr_u8(ct, ATTR_L4PROTO), + [3] = nfct_get_attr_u16(ct, ATTR_PORT_SRC) << 16 | + nfct_get_attr_u16(ct, ATTR_PORT_DST), + }; + + /* + * Instead of returning hash % table->hashsize (implying a divide) + * we return the high 32 bits of the (hash * table->hashsize) that will + * give results between [0 and hashsize-1] and same hash distribution, + * but using a multiply, less expensive than a divide. See: + * http://www.mail-archive.com/netdev@vger.kernel.org/msg56623.html + */ + return ((uint64_t)jhash2(a, 4, 0) * table->hashsize) >> 32; +} + +static uint32_t +cache_hash6_exp(const struct nf_conntrack *ct, const struct hashtable *table) +{ + uint32_t a[10]; + + memcpy(&a[0], nfct_get_attr(ct, ATTR_IPV6_SRC), sizeof(uint32_t)*4); + memcpy(&a[4], nfct_get_attr(ct, ATTR_IPV6_SRC), sizeof(uint32_t)*4); + a[8] = nfct_get_attr_u8(ct, ATTR_ORIG_L3PROTO) << 16 | + nfct_get_attr_u8(ct, ATTR_ORIG_L4PROTO); + a[9] = nfct_get_attr_u16(ct, ATTR_ORIG_PORT_SRC) << 16 | + nfct_get_attr_u16(ct, ATTR_ORIG_PORT_DST); + + return ((uint64_t)jhash2(a, 10, 0) * table->hashsize) >> 32; +} + +static uint32_t +cache_exp_hash(const void *data, const struct hashtable *table) +{ + int ret = 0; + const struct nf_expect *exp = data; + const struct nf_conntrack *ct = nfexp_get_attr(exp, ATTR_EXP_MASTER); + + switch(nfct_get_attr_u8(ct, ATTR_L3PROTO)) { + case AF_INET: + ret = cache_hash4_exp(ct, table); + break; + case AF_INET6: + ret = cache_hash6_exp(ct, table); + break; + default: + dlog(LOG_ERR, "unknown layer 3 proto in hash"); + break; + } + return ret; +} + +static int cache_exp_cmp(const void *data1, const void *data2) +{ + const struct cache_object *obj = data1; + const struct nf_expect *exp = data2; + + return nfexp_cmp(obj->ptr, exp, 0); +} + +static void *cache_exp_alloc(void) +{ + return nfexp_new(); +} + +static void cache_exp_free(void *ptr) +{ + nfexp_destroy(ptr); +} + +static void cache_exp_copy(void *dst, void *src, unsigned int flags) +{ + /* XXX: add nfexp_copy(...) to libnetfilter_conntrack. */ + memcpy(dst, src, nfexp_maxsize()); +} + +static int cache_exp_dump_step(void *data1, void *n) +{ + char buf[1024]; + int size; + struct __dump_container *container = data1; + struct cache_object *obj = n; + char *data = obj->data; + unsigned i; + + /* + * XXX: Do not dump the entries that are scheduled to expire. + * These entries talk about already destroyed connections + * that we keep for some time just in case that we have to + * resent some lost messages. We do not show them to the + * user as he may think that the firewall replicas are not + * in sync. The branch below is a hack as it is quite + * specific and it breaks conntrackd modularity. Probably + * there's a nicer way to do this but until I come up with it... + */ + if (CONFIG(flags) & CTD_SYNC_FTFW && obj->status == C_OBJ_DEAD) + return 0; + + /* do not show cached timeout, this may confuse users */ + if (nfexp_attr_is_set(obj->ptr, ATTR_EXP_TIMEOUT)) + nfexp_attr_unset(obj->ptr, ATTR_EXP_TIMEOUT); + + memset(buf, 0, sizeof(buf)); + size = nfexp_snprintf(buf, sizeof(buf),obj->ptr, + NFCT_T_UNKNOWN, container->type, 0); + + for (i = 0; i < obj->cache->num_features; i++) { + if (obj->cache->features[i]->dump) { + size += obj->cache->features[i]->dump(obj, data, + buf+size, + container->type); + data += obj->cache->features[i]->size; + } + } + if (container->type != NFCT_O_XML) { + long tm = time(NULL); + size += sprintf(buf+size, " [active since %lds]", + tm - obj->lifetime); + } + size += sprintf(buf+size, "\n"); + if (send(container->fd, buf, size, 0) == -1) { + if (errno != EPIPE) + return -1; + } + + return 0; +} + +static int cache_exp_commit_step(void *data, void *n) +{ + struct cache_object *obj = n; + struct __commit_container *tmp = data; + int ret, retry = 1, timeout; + struct nf_expect *exp = obj->ptr; + + if (CONFIG(commit_timeout)) { + timeout = CONFIG(commit_timeout); + } else { + timeout = time(NULL) - obj->lastupdate; + if (timeout < 0) { + /* XXX: Arbitrarily set the timer to one minute, how + * can this happen? For example, an adjustment due to + * daylight-saving. Probably other situations can + * trigger this. */ + timeout = 60; + } + /* calculate an estimation of the current timeout */ + timeout = nfexp_get_attr_u32(exp, ATTR_EXP_TIMEOUT) - timeout; + if (timeout < 0) { + timeout = 60; + } + } + +retry: + if (nl_create_expect(tmp->h, exp, timeout) == -1) { + if (errno == EEXIST && retry == 1) { + ret = nl_destroy_expect(tmp->h, exp); + if (ret == 0 || (ret == -1 && errno == ENOENT)) { + if (retry) { + retry = 0; + goto retry; + } + } + dlog(LOG_ERR, "commit-destroy: %s", strerror(errno)); + dlog_exp(STATE(log), exp, NFCT_O_PLAIN); + tmp->c->stats.commit_fail++; + } else { + dlog(LOG_ERR, "commit-create: %s", strerror(errno)); + dlog_exp(STATE(log), exp, NFCT_O_PLAIN); + tmp->c->stats.commit_fail++; + } + } else { + tmp->c->stats.commit_ok++; + } + /* keep iterating even if we have found errors */ + return 0; +} + +static int +cache_exp_commit(struct cache *c, struct nfct_handle *h, int clientfd) +{ + unsigned int commit_ok, commit_fail; + struct timeval commit_stop, res; + struct __commit_container tmp = { + .h = h, + .c = c, + }; + + /* we already have one commit in progress, skip this. The clientfd + * descriptor has to be closed by the caller. */ + if (clientfd && STATE_SYNC(commit).clientfd != -1) + return -1; + + switch(STATE_SYNC(commit).state) { + case COMMIT_STATE_INACTIVE: + gettimeofday(&STATE_SYNC(commit).stats.start, NULL); + STATE_SYNC(commit).stats.ok = c->stats.commit_ok; + STATE_SYNC(commit).stats.fail = c->stats.commit_fail; + STATE_SYNC(commit).clientfd = clientfd; + case COMMIT_STATE_MASTER: + STATE_SYNC(commit).current = + hashtable_iterate_limit(c->h, &tmp, + STATE_SYNC(commit).current, + CONFIG(general).commit_steps, + cache_exp_commit_step); + if (STATE_SYNC(commit).current < CONFIG(hashsize)) { + STATE_SYNC(commit).state = COMMIT_STATE_MASTER; + /* give it another step as soon as possible */ + write_evfd(STATE_SYNC(commit).evfd); + return 1; + } + + /* calculate the time that commit has taken */ + gettimeofday(&commit_stop, NULL); + timersub(&commit_stop, &STATE_SYNC(commit).stats.start, &res); + + /* calculate new entries committed */ + commit_ok = c->stats.commit_ok - STATE_SYNC(commit).stats.ok; + commit_fail = + c->stats.commit_fail - STATE_SYNC(commit).stats.fail; + + /* log results */ + dlog(LOG_NOTICE, "Committed %u new expectations", commit_ok); + + if (commit_fail) + dlog(LOG_NOTICE, "%u expectations can't be " + "committed", commit_fail); + + dlog(LOG_NOTICE, "commit has taken %lu.%06lu seconds", + res.tv_sec, res.tv_usec); + + /* prepare the state machine for new commits */ + STATE_SYNC(commit).current = 0; + STATE_SYNC(commit).state = COMMIT_STATE_INACTIVE; + + return 0; + } + return 1; +} + +static struct nethdr * +cache_exp_build_msg(const struct cache_object *obj, int type) +{ + return BUILD_NETMSG_FROM_EXP(obj->ptr, type); +} + +/* template to cache expectations coming from the kernel. */ +struct cache_ops cache_sync_internal_exp_ops = { + .hash = cache_exp_hash, + .cmp = cache_exp_cmp, + .alloc = cache_exp_alloc, + .free = cache_exp_free, + .copy = cache_exp_copy, + .dump_step = cache_exp_dump_step, + .commit = NULL, + .build_msg = cache_exp_build_msg, +}; + +/* template to cache expectations coming from the network. */ +struct cache_ops cache_sync_external_exp_ops = { + .hash = cache_exp_hash, + .cmp = cache_exp_cmp, + .alloc = cache_exp_alloc, + .free = cache_exp_free, + .copy = cache_exp_copy, + .dump_step = cache_exp_dump_step, + .commit = cache_exp_commit, + .build_msg = NULL, +}; -- cgit v1.2.3