From f152340a26912d090b5fd15be10208605929816b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 25 May 2008 20:36:54 +0200 Subject: add best effort replication protocol (aka NOTRACK) --- ChangeLog | 1 + doc/sync/notrack/README | 2 + doc/sync/notrack/node1/conntrackd.conf | 150 +++++++++++++++++++++++++++ doc/sync/notrack/node1/keepalived.conf | 39 +++++++ doc/sync/notrack/node2/conntrackd.conf | 149 ++++++++++++++++++++++++++ doc/sync/notrack/node2/keepalived.conf | 39 +++++++ doc/sync/notrack/script_backup.sh | 3 + doc/sync/notrack/script_master.sh | 5 + include/conntrackd.h | 1 + src/Makefile.am | 2 +- src/read_config_lex.l | 2 + src/read_config_yy.y | 15 ++- src/sync-mode.c | 2 + src/sync-notrack.c | 184 +++++++++++++++++++++++++++++++++ 14 files changed, 592 insertions(+), 2 deletions(-) create mode 100644 doc/sync/notrack/README create mode 100644 doc/sync/notrack/node1/conntrackd.conf create mode 100644 doc/sync/notrack/node1/keepalived.conf create mode 100644 doc/sync/notrack/node2/conntrackd.conf create mode 100644 doc/sync/notrack/node2/keepalived.conf create mode 100644 doc/sync/notrack/script_backup.sh create mode 100644 doc/sync/notrack/script_master.sh create mode 100644 src/sync-notrack.c diff --git a/ChangeLog b/ChangeLog index dec7537..597206a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -35,6 +35,7 @@ o improve network message sanity checkings o add Mcast[Snd|Rcv]SocketBuffer clauses to tune multicast socket buffers o add missing string.h required by strdup in config parsing o add eventfd emulation to communicate receiver -> sender +o add best effort replication protocol (aka NOTRACK) version 0.9.6 (2008/03/08) ------------------------------ diff --git a/doc/sync/notrack/README b/doc/sync/notrack/README new file mode 100644 index 0000000..99b2f33 --- /dev/null +++ b/doc/sync/notrack/README @@ -0,0 +1,2 @@ +This directory contains the files for the NOTRACK replication protocol. This +protocol provides best effort delivery. Therefore, it is unreliable. diff --git a/doc/sync/notrack/node1/conntrackd.conf b/doc/sync/notrack/node1/conntrackd.conf new file mode 100644 index 0000000..1185351 --- /dev/null +++ b/doc/sync/notrack/node1/conntrackd.conf @@ -0,0 +1,150 @@ +# +# Synchronizer settings +# +Sync { + Mode NOTRACK { + # + # Entries committed to the connection tracking table + # starts with a limited timeout of N seconds until the + # takeover process is completed. + # + CommitTimeout 180 + } + + # + # Multicast IP and interface where messages are + # broadcasted (dedicated link). IMPORTANT: Make sure + # that iptables accepts traffic for destination + # 225.0.0.50, eg: + # + # iptables -I INPUT -d 225.0.0.50 -j ACCEPT + # iptables -I OUTPUT -d 225.0.0.50 -j ACCEPT + # + Multicast { + IPv4_address 225.0.0.50 + IPv4_interface 192.168.100.100 # IP of dedicated link + Interface eth2 + Group 3780 + + # The multicast sender uses a buffer to enqueue the packets + # that are going to be transmitted. The default size of this + # socket buffer is available at /proc/sys/net/core/wmem_default. + # This value determines the chances to have an overrun in the + # sender queue. The overrun results packet loss, thus, losing + # state information that would have to be retransmitted. If you + # notice some packet loss, you may want to increase the size + # of the sender buffer. Note: This protocol is best effort, + # really recommended to increase the buffer size. + + McastSndSocketBuffer 1249280 + + # The multicast receiver uses a buffer to enqueue the packets + # that the socket is pending to handle. The default size of this + # socket buffer is available at /proc/sys/net/core/rmem_default. + # This value determines the chances to have an overrun in the + # receiver queue. The overrun results packet loss, thus, losing + # state information that would have to be retransmitted. If you + # notice some packet loss, you may want to increase the size of + # the receiver buffer. Note: This protocol is best effort, + # really recommended to increase the buffer size. + + McastRcvSocketBuffer 1249280 + } + + # Enable/Disable message checksumming + Checksum on + + # Uncomment this if you want to replicate just certain TCP states. + # This option introduces a tradeoff in the replication: it reduces + # CPU consumption and lost messages rate at the cost of having + # backup replicas that don't contain the current state that the active + # replica holds. TCP states are: SYN_SENT, SYN_RECV, ESTABLISHED, + # FIN_WAIT, CLOSE_WAIT, LAST_ACK, TIME_WAIT, CLOSE, LISTEN. + # + # Replicate ESTABLISHED TIME_WAIT for TCP + + # If you have a multiprimary setup (active-active) without connection + # persistency, ie. you can't know which firewall handles a packet + # that is part of a connection, then you need direct commit of + # conntrack entries to the kernel conntrack table. OSPF setups must + # set on this option. Default is Off. + # + # CacheWriteThrough On +} + +# +# General settings +# +General { + # + # Number of buckets in the caches: hash table + # + HashSize 8192 + + # + # Maximum number of conntracks: + # it must be >= $ cat /proc/sys/net/ipv4/netfilter/ip_conntrack_max + # + HashLimit 65535 + + # + # Logfile: on, off, or a filename + # Default: on (/var/log/conntrackd.log) + # + #LogFile off + + # + # Syslog: on, off or a facility name (daemon (default) or local0..7) + # Default: off + # + #Syslog on + + # + # Lockfile + # + LockFile /var/lock/conntrack.lock + + # + # Unix socket configuration + # + UNIX { + Path /tmp/sync.sock + Backlog 20 + } + + # + # Netlink socket buffer size + # + SocketBufferSize 262142 + + # + # Increase the socket buffer up to maximum if required + # + SocketBufferSizeMaxGrown 655355 +} + +# +# Ignore traffic for a certain set of IP's: Usually +# all the IP assigned to the firewall since local +# traffic must be ignored, just forwarded connections +# are worth to replicate +# +IgnoreTrafficFor { + IPv4_address 127.0.0.1 # loopback + IPv4_address 192.168.0.1 + IPv4_address 192.168.1.1 + IPv4_address 192.168.100.100 # dedicated link ip + IPv4_address 192.168.0.100 # virtual IP 1 + IPv4_address 192.168.1.100 # virtual IP 2 +} + +# +# Do not replicate certain protocol traffic +# +IgnoreProtocol { + UDP + ICMP + IGMP + VRRP + # numeric numbers also valid +} diff --git a/doc/sync/notrack/node1/keepalived.conf b/doc/sync/notrack/node1/keepalived.conf new file mode 100644 index 0000000..f937467 --- /dev/null +++ b/doc/sync/notrack/node1/keepalived.conf @@ -0,0 +1,39 @@ +vrrp_sync_group G1 { # must be before vrrp_instance declaration + group { + VI_1 + VI_2 + } + notify_master /etc/conntrackd/script_master.sh + notify_backup /etc/conntrackd/script_backup.sh +# notify_fault /etc/conntrackd/script_fault.sh +} + +vrrp_instance VI_1 { + interface eth1 + state SLAVE + virtual_router_id 61 + priority 80 + advert_int 3 + authentication { + auth_type PASS + auth_pass papas_con_tomate + } + virtual_ipaddress { + 192.168.0.100 # default CIDR mask is /32 + } +} + +vrrp_instance VI_2 { + interface eth0 + state SLAVE + virtual_router_id 62 + priority 80 + advert_int 3 + authentication { + auth_type PASS + auth_pass papas_con_tomate + } + virtual_ipaddress { + 192.168.1.100 + } +} diff --git a/doc/sync/notrack/node2/conntrackd.conf b/doc/sync/notrack/node2/conntrackd.conf new file mode 100644 index 0000000..7881d46 --- /dev/null +++ b/doc/sync/notrack/node2/conntrackd.conf @@ -0,0 +1,149 @@ +# +# Synchronizer settings +# +Sync { + Mode NOTRACK { + # Entries committed to the connection tracking table + # starts with a limited timeout of N seconds until the + # takeover process is completed. + # + CommitTimeout 180 + } + + # + # Multicast IP and interface where messages are + # broadcasted (dedicated link). IMPORTANT: Make sure + # that iptables accepts traffic for destination + # 225.0.0.50, eg: + # + # iptables -I INPUT -d 225.0.0.50 -j ACCEPT + # iptables -I OUTPUT -d 225.0.0.50 -j ACCEPT + # + Multicast { + IPv4_address 225.0.0.50 + IPv4_interface 192.168.100.200 # IP of dedicated link + Interface eth2 + Group 3780 + + # The multicast sender uses a buffer to enqueue the packets + # that are going to be transmitted. The default size of this + # socket buffer is available at /proc/sys/net/core/wmem_default. + # This value determines the chances to have an overrun in the + # sender queue. The overrun results packet loss, thus, losing + # state information that would have to be retransmitted. If you + # notice some packet loss, you may want to increase the size + # of the sender buffer. Note: This protocol is best effort, + # really recommended to increase the buffer size. + + McastSndSocketBuffer 1249280 + + # The multicast receiver uses a buffer to enqueue the packets + # that the socket is pending to handle. The default size of this + # socket buffer is available at /proc/sys/net/core/rmem_default. + # This value determines the chances to have an overrun in the + # receiver queue. The overrun results packet loss, thus, losing + # state information that would have to be retransmitted. If you + # notice some packet loss, you may want to increase the size of + # the receiver buffer. Note: This protocol is best effort, + # really recommended to increase the buffer size. + + McastRcvSocketBuffer 1249280 + } + + # Enable/Disable message checksumming + Checksum on + + # Uncomment this if you want to replicate just certain TCP states. + # This option introduces a tradeoff in the replication: it reduces + # CPU consumption and lost messages rate at the cost of having + # backup replicas that don't contain the current state that the active + # replica holds. TCP states are: SYN_SENT, SYN_RECV, ESTABLISHED, + # FIN_WAIT, CLOSE_WAIT, LAST_ACK, TIME_WAIT, CLOSE, LISTEN. + # + # Replicate ESTABLISHED TIME_WAIT for TCP + + # If you have a multiprimary setup (active-active) without connection + # persistency, ie. you can't know which firewall handles a packet + # that is part of a connection, then you need direct commit of + # conntrack entries to the kernel conntrack table. OSPF setups must + # set on this option. Default is Off. + # + # CacheWriteThrough On +} + +# +# General settings +# +General { + # + # Number of buckets in the caches: hash table + # + HashSize 8192 + + # + # Maximum number of conntracks: + # it must be >= $ cat /proc/sys/net/ipv4/netfilter/ip_conntrack_max + # + HashLimit 65535 + + # + # Logfile: on, off, or a filename + # Default: on (/var/log/conntrackd.log) + # + #LogFile off + + # + # Syslog: on, off or a facility name (daemon (default) or local0..7) + # Default: off + # + #Syslog on + + # + # Lockfile + # + LockFile /var/lock/conntrack.lock + + # + # Unix socket configuration + # + UNIX { + Path /tmp/sync.sock + Backlog 20 + } + + # + # Netlink socket buffer size + # + SocketBufferSize 262142 + + # + # Increase the socket buffer up to maximum if required + # + SocketBufferSizeMaxGrown 655355 +} + +# +# Ignore traffic for a certain set of IP's: Usually +# all the IP assigned to the firewall since local +# traffic must be ignored, just forwarded connections +# are worth to replicate +# +IgnoreTrafficFor { + IPv4_address 127.0.0.1 # loopback + IPv4_address 192.168.0.2 + IPv4_address 192.168.1.2 + IPv4_address 192.168.100.200 # dedicated link ip + IPv4_address 192.168.0.200 # virtual IP 1 + IPv4_address 192.168.1.200 # virtual IP 2 +} + +# +# Do not replicate certain protocol traffic +# +IgnoreProtocol { + UDP + ICMP + IGMP + VRRP + # numeric numbers also valid +} diff --git a/doc/sync/notrack/node2/keepalived.conf b/doc/sync/notrack/node2/keepalived.conf new file mode 100644 index 0000000..f937467 --- /dev/null +++ b/doc/sync/notrack/node2/keepalived.conf @@ -0,0 +1,39 @@ +vrrp_sync_group G1 { # must be before vrrp_instance declaration + group { + VI_1 + VI_2 + } + notify_master /etc/conntrackd/script_master.sh + notify_backup /etc/conntrackd/script_backup.sh +# notify_fault /etc/conntrackd/script_fault.sh +} + +vrrp_instance VI_1 { + interface eth1 + state SLAVE + virtual_router_id 61 + priority 80 + advert_int 3 + authentication { + auth_type PASS + auth_pass papas_con_tomate + } + virtual_ipaddress { + 192.168.0.100 # default CIDR mask is /32 + } +} + +vrrp_instance VI_2 { + interface eth0 + state SLAVE + virtual_router_id 62 + priority 80 + advert_int 3 + authentication { + auth_type PASS + auth_pass papas_con_tomate + } + virtual_ipaddress { + 192.168.1.100 + } +} diff --git a/doc/sync/notrack/script_backup.sh b/doc/sync/notrack/script_backup.sh new file mode 100644 index 0000000..813e375 --- /dev/null +++ b/doc/sync/notrack/script_backup.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +/usr/sbin/conntrackd -n # request a resync from other nodes via multicast diff --git a/doc/sync/notrack/script_master.sh b/doc/sync/notrack/script_master.sh new file mode 100644 index 0000000..ff1dbc0 --- /dev/null +++ b/doc/sync/notrack/script_master.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +/usr/sbin/conntrackd -c # commit the cache +/usr/sbin/conntrackd -f # flush the caches +/usr/sbin/conntrackd -R # resync with kernel conntrack table diff --git a/include/conntrackd.h b/include/conntrackd.h index c7a65be..8a6e8d2 100644 --- a/include/conntrackd.h +++ b/include/conntrackd.h @@ -51,6 +51,7 @@ enum { #define CTD_STATS_MODE (1UL << 1) #define CTD_SYNC_FTFW (1UL << 2) #define CTD_SYNC_ALARM (1UL << 3) +#define CTD_SYNC_NOTRACK (1UL << 4) /* FILENAME_MAX is 4096 on my system, perhaps too much? */ #ifndef FILENAME_MAXLEN diff --git a/src/Makefile.am b/src/Makefile.am index 554074f..69ddcfd 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -15,7 +15,7 @@ conntrackd_SOURCES = alarm.c main.c run.c hash.c queue.c rbtree.c \ ignore_pool.c fds.c event.c \ cache.c cache_iterators.c \ cache_lifetime.c cache_timer.c cache_wt.c \ - sync-mode.c sync-alarm.c sync-ftfw.c \ + sync-mode.c sync-alarm.c sync-ftfw.c sync-notrack.c \ traffic_stats.c stats-mode.c \ network.c \ state_helper.c state_helper_tcp.c \ diff --git a/src/read_config_lex.l b/src/read_config_lex.l index 7daaeab..bdde3b6 100644 --- a/src/read_config_lex.l +++ b/src/read_config_lex.l @@ -49,6 +49,7 @@ persistent [P|p][E|e][R|r][S|s][I|i][S|s][T|t][E|e][N|n][T|T] nack [N|n][A|a][C|c][K|k] alarm [A|a][L|l][A|a][R|r][M|m] ftfw [F|f][T|t][F|f][W|w] +notrack [N|n][O|o][T|t][R|r][A|a][C|c][K|k] %% "UNIX" { return T_UNIX; } @@ -125,6 +126,7 @@ ftfw [F|f][T|t][F|f][W|w] "is called `ftfw'. Please, update " "your conntrackd.conf file.\n"); return T_FTFW; } +{notrack} { return T_NOTRACK; } {string} { yylval.string = strdup(yytext); return T_STRING; } {comment} ; diff --git a/src/read_config_yy.y b/src/read_config_yy.y index 7fb3d5b..b9c53be 100644 --- a/src/read_config_yy.y +++ b/src/read_config_yy.y @@ -53,7 +53,7 @@ struct ct_conf conf; %token T_ESTABLISHED T_SYN_SENT T_SYN_RECV T_FIN_WAIT %token T_CLOSE_WAIT T_LAST_ACK T_TIME_WAIT T_CLOSE T_LISTEN %token T_SYSLOG T_WRITE_THROUGH T_STAT_BUFFER_SIZE T_DESTROY_TIMEOUT -%token T_MCAST_RCVBUFF T_MCAST_SNDBUFF +%token T_MCAST_RCVBUFF T_MCAST_SNDBUFF T_NOTRACK %token T_IP T_PATH_VAL %token T_NUMBER @@ -436,6 +436,7 @@ sync_line: refreshtime | delay_destroy_msgs | sync_mode_alarm | sync_mode_ftfw + | sync_mode_notrack | listen_to | state_replication | cache_writethrough @@ -452,6 +453,11 @@ sync_mode_ftfw: T_SYNC_MODE T_FTFW '{' sync_mode_ftfw_list '}' conf.flags |= CTD_SYNC_FTFW; }; +sync_mode_notrack: T_SYNC_MODE T_NOTRACK '{' sync_mode_notrack_list '}' +{ + conf.flags |= CTD_SYNC_NOTRACK; +}; + sync_mode_alarm_list: | sync_mode_alarm_list sync_mode_alarm_line; @@ -470,6 +476,13 @@ sync_mode_ftfw_line: resend_queue_size | window_size ; +sync_mode_notrack_list: + | sync_mode_notrack_list sync_mode_notrack_line; + +sync_mode_notrack_line: timeout + ; + + resend_queue_size: T_RESEND_BUFFER_SIZE T_NUMBER { conf.resend_queue_size = $2; diff --git a/src/sync-mode.c b/src/sync-mode.c index 2fe7406..16cc70d 100644 --- a/src/sync-mode.c +++ b/src/sync-mode.c @@ -169,6 +169,8 @@ static int init_sync(void) STATE_SYNC(sync) = &sync_ftfw; else if (CONFIG(flags) & CTD_SYNC_ALARM) STATE_SYNC(sync) = &sync_alarm; + else if (CONFIG(flags) & CTD_SYNC_NOTRACK) + STATE_SYNC(sync) = &sync_notrack; else { fprintf(stderr, "WARNING: No synchronization mode specified. " "Defaulting to FT-FW mode.\n"); diff --git a/src/sync-notrack.c b/src/sync-notrack.c new file mode 100644 index 0000000..2b1bc13 --- /dev/null +++ b/src/sync-notrack.c @@ -0,0 +1,184 @@ +/* + * (C) 2008 by Pablo Neira Ayuso + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include "conntrackd.h" +#include "sync.h" +#include "us-conntrack.h" +#include "queue.h" +#include "debug.h" +#include "network.h" +#include "log.h" +#include "cache.h" +#include "event.h" + +#include + +static LIST_HEAD(tx_list); +static unsigned int tx_list_len; +static struct queue *tx_queue; + +struct cache_notrack { + struct list_head tx_list; +}; + +static struct cache_extra cache_notrack_extra = { + .size = sizeof(struct cache_notrack), +}; + +static void tx_queue_add_ctlmsg(uint32_t flags, uint32_t from, uint32_t to) +{ + struct nethdr_ack ack = { + .flags = flags, + .from = from, + .to = to, + }; + + queue_add(tx_queue, &ack, NETHDR_ACK_SIZ); + write_evfd(STATE_SYNC(evfd)); +} + +static int notrack_init(void) +{ + tx_queue = queue_create(~0U); + if (tx_queue == NULL) { + dlog(LOG_ERR, "cannot create tx queue"); + return -1; + } + + return 0; +} + +static void notrack_kill(void) +{ + queue_destroy(tx_queue); +} + +static int do_cache_to_tx(void *data1, void *data2) +{ + struct us_conntrack *u = data2; + struct cache_notrack *cn = cache_get_extra(STATE_SYNC(internal), u); + + /* add to tx list */ + list_add_tail(&cn->tx_list, &tx_list); + tx_list_len++; + + write_evfd(STATE_SYNC(evfd)); + + return 0; +} + +static int notrack_local(int fd, int type, void *data) +{ + int ret = 1; + + switch(type) { + case REQUEST_DUMP: + dlog(LOG_NOTICE, "request resync"); + tx_queue_add_ctlmsg(NET_F_RESYNC, 0, 0); + break; + case SEND_BULK: + dlog(LOG_NOTICE, "sending bulk update"); + cache_iterate(STATE_SYNC(internal), NULL, do_cache_to_tx); + break; + default: + ret = 0; + break; + } + + return ret; +} + +static int digest_msg(const struct nethdr *net) +{ + if (IS_DATA(net)) + return MSG_DATA; + + if (IS_RESYNC(net)) { + cache_iterate(STATE_SYNC(internal), NULL, do_cache_to_tx); + return MSG_CTL; + } + + return MSG_BAD; +} + +static int notrack_recv(const struct nethdr *net) +{ + int ret; + unsigned int exp_seq; + + mcast_track_seq(net->seq, &exp_seq); + + ret = digest_msg(net); + + if (ret != MSG_BAD) + mcast_track_update_seq(net->seq); + + return ret; +} + +static int tx_queue_xmit(void *data1, const void *data2) +{ + struct nethdr *net = data1; + size_t len = prepare_send_netmsg(STATE_SYNC(mcast_client), net); + + mcast_buffered_send_netmsg(STATE_SYNC(mcast_client), net, len); + queue_del(tx_queue, net); + + return 0; +} + +static int tx_list_xmit(struct list_head *i, struct us_conntrack *u, int type) +{ + int ret; + struct nethdr *net = BUILD_NETMSG(u->ct, type); + size_t len = prepare_send_netmsg(STATE_SYNC(mcast_client), net); + + list_del_init(i); + tx_list_len--; + + ret = mcast_buffered_send_netmsg(STATE_SYNC(mcast_client), net, len); + + return ret; +} + +static void notrack_run(void) +{ + struct cache_notrack *cn, *tmp; + + /* send messages in the tx_queue */ + queue_iterate(tx_queue, NULL, tx_queue_xmit); + + /* send conntracks in the tx_list */ + list_for_each_entry_safe(cn, tmp, &tx_list, tx_list) { + struct us_conntrack *u; + + u = cache_get_conntrack(STATE_SYNC(internal), cn); + tx_list_xmit(&cn->tx_list, u, NFCT_Q_UPDATE); + } +} + +struct sync_mode sync_notrack = { + .internal_cache_flags = LIFETIME, + .external_cache_flags = LIFETIME, + .internal_cache_extra = &cache_notrack_extra, + .init = notrack_init, + .kill = notrack_kill, + .local = notrack_local, + .recv = notrack_recv, + .run = notrack_run, +}; -- cgit v1.2.3