summaryrefslogtreecommitdiffstats
path: root/br-nf-bds
diff options
context:
space:
mode:
authorBart De Schuymer <bdschuym@pandora.be>2002-08-24 09:23:41 +0000
committerBart De Schuymer <bdschuym@pandora.be>2002-08-24 09:23:41 +0000
commitd86fb4e9d55311c68244ae1fb40f6e3f3a454e19 (patch)
treeba124e0bd0a4033bb31ac271342a4f123f00d822 /br-nf-bds
parent5fbc5c35b3aee288f523ad06505c0035fc36f023 (diff)
*** empty log message ***
Diffstat (limited to 'br-nf-bds')
-rw-r--r--br-nf-bds/linux2.5/include/linux/netfilter.h168
-rw-r--r--br-nf-bds/linux2.5/include/linux/netfilter_ipv4.h80
-rw-r--r--br-nf-bds/linux2.5/include/linux/skbuff.h1144
-rw-r--r--br-nf-bds/linux2.5/net/Config.in97
-rw-r--r--br-nf-bds/linux2.5/net/bridge/Makefile24
-rw-r--r--br-nf-bds/linux2.5/net/bridge/br.c100
-rw-r--r--br-nf-bds/linux2.5/net/bridge/br_forward.c148
-rw-r--r--br-nf-bds/linux2.5/net/bridge/br_input.c181
-rw-r--r--br-nf-bds/linux2.5/net/bridge/br_netfilter.c567
-rw-r--r--br-nf-bds/linux2.5/net/bridge/br_private.h212
-rw-r--r--br-nf-bds/linux2.5/net/core/netfilter.c654
-rw-r--r--br-nf-bds/linux2.5/net/core/skbuff.c1208
-rw-r--r--br-nf-bds/linux2.5/net/ipv4/ip_output.c1033
-rw-r--r--br-nf-bds/linux2.5/net/ipv4/netfilter/ip_tables.c1811
-rw-r--r--br-nf-bds/linux2.5/net/ipv4/netfilter/ipt_LOG.c363
15 files changed, 7790 insertions, 0 deletions
diff --git a/br-nf-bds/linux2.5/include/linux/netfilter.h b/br-nf-bds/linux2.5/include/linux/netfilter.h
new file mode 100644
index 0000000..b2df329
--- /dev/null
+++ b/br-nf-bds/linux2.5/include/linux/netfilter.h
@@ -0,0 +1,168 @@
+#ifndef __LINUX_NETFILTER_H
+#define __LINUX_NETFILTER_H
+
+#ifdef __KERNEL__
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/net.h>
+#include <linux/if.h>
+#include <linux/wait.h>
+#include <linux/list.h>
+#endif
+
+/* Responses from hook functions. */
+#define NF_DROP 0
+#define NF_ACCEPT 1
+#define NF_STOLEN 2
+#define NF_QUEUE 3
+#define NF_REPEAT 4
+#define NF_MAX_VERDICT NF_REPEAT
+
+/* Generic cache responses from hook functions. */
+#define NFC_ALTERED 0x8000
+#define NFC_UNKNOWN 0x4000
+
+#ifdef __KERNEL__
+#include <linux/config.h>
+#ifdef CONFIG_NETFILTER
+
+extern void netfilter_init(void);
+
+/* Largest hook number + 1 */
+#define NF_MAX_HOOKS 8
+
+struct sk_buff;
+struct net_device;
+
+typedef unsigned int nf_hookfn(unsigned int hooknum,
+ struct sk_buff **skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *));
+
+struct nf_hook_ops
+{
+ struct list_head list;
+
+ /* User fills in from here down. */
+ nf_hookfn *hook;
+ int pf;
+ int hooknum;
+ /* Hooks are ordered in ascending priority. */
+ int priority;
+};
+
+struct nf_sockopt_ops
+{
+ struct list_head list;
+
+ int pf;
+
+ /* Non-inclusive ranges: use 0/0/NULL to never get called. */
+ int set_optmin;
+ int set_optmax;
+ int (*set)(struct sock *sk, int optval, void *user, unsigned int len);
+
+ int get_optmin;
+ int get_optmax;
+ int (*get)(struct sock *sk, int optval, void *user, int *len);
+
+ /* Number of users inside set() or get(). */
+ unsigned int use;
+ struct task_struct *cleanup_task;
+};
+
+/* Each queued (to userspace) skbuff has one of these. */
+struct nf_info
+{
+ /* The ops struct which sent us to userspace. */
+ struct nf_hook_ops *elem;
+
+ /* If we're sent to userspace, this keeps housekeeping info */
+ int pf;
+ unsigned int hook;
+ struct net_device *indev, *outdev;
+ int (*okfn)(struct sk_buff *);
+};
+
+/* Function to register/unregister hook points. */
+int nf_register_hook(struct nf_hook_ops *reg);
+void nf_unregister_hook(struct nf_hook_ops *reg);
+
+/* Functions to register get/setsockopt ranges (non-inclusive). You
+ need to check permissions yourself! */
+int nf_register_sockopt(struct nf_sockopt_ops *reg);
+void nf_unregister_sockopt(struct nf_sockopt_ops *reg);
+
+extern struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
+
+/* Activate hook; either okfn or kfree_skb called, unless a hook
+ returns NF_STOLEN (in which case, it's up to the hook to deal with
+ the consequences).
+
+ Returns -ERRNO if packet dropped. Zero means queued, stolen or
+ accepted.
+*/
+
+/* RR:
+ > I don't want nf_hook to return anything because people might forget
+ > about async and trust the return value to mean "packet was ok".
+
+ AK:
+ Just document it clearly, then you can expect some sense from kernel
+ coders :)
+*/
+
+/* This is gross, but inline doesn't cut it for avoiding the function
+ call in fast path: gcc doesn't inline (needs value tracking?). --RR */
+#ifdef CONFIG_NETFILTER_DEBUG
+#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) \
+ nf_hook_slow((pf), (hook), (skb), (indev), (outdev), (okfn), INT_MIN)
+#define NF_HOOK_THRESH nf_hook_slow
+#else
+#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) \
+(list_empty(&nf_hooks[(pf)][(hook)]) \
+ ? (okfn)(skb) \
+ : nf_hook_slow((pf), (hook), (skb), (indev), (outdev), (okfn), INT_MIN))
+#define NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, thresh) \
+(list_empty(&nf_hooks[(pf)][(hook)]) \
+ ? (okfn)(skb) \
+ : nf_hook_slow((pf), (hook), (skb), (indev), (outdev), (okfn), (thresh)))
+#endif
+
+int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb,
+ struct net_device *indev, struct net_device *outdev,
+ int (*okfn)(struct sk_buff *), int thresh);
+
+/* Call setsockopt() */
+int nf_setsockopt(struct sock *sk, int pf, int optval, char *opt,
+ int len);
+int nf_getsockopt(struct sock *sk, int pf, int optval, char *opt,
+ int *len);
+
+/* Packet queuing */
+typedef int (*nf_queue_outfn_t)(struct sk_buff *skb,
+ struct nf_info *info, void *data);
+extern int nf_register_queue_handler(int pf,
+ nf_queue_outfn_t outfn, void *data);
+extern int nf_unregister_queue_handler(int pf);
+extern void nf_reinject(struct sk_buff *skb,
+ struct nf_info *info,
+ unsigned int verdict);
+
+extern void (*ip_ct_attach)(struct sk_buff *, struct nf_ct_info *);
+
+#ifdef CONFIG_NETFILTER_DEBUG
+extern void nf_dump_skb(int pf, struct sk_buff *skb);
+#endif
+
+/* FIXME: Before cache is ever used, this must be implemented for real. */
+extern void nf_invalidate_cache(int pf);
+
+#else /* !CONFIG_NETFILTER */
+#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb)
+#endif /*CONFIG_NETFILTER*/
+
+#endif /*__KERNEL__*/
+#endif /*__LINUX_NETFILTER_H*/
diff --git a/br-nf-bds/linux2.5/include/linux/netfilter_ipv4.h b/br-nf-bds/linux2.5/include/linux/netfilter_ipv4.h
new file mode 100644
index 0000000..946190a
--- /dev/null
+++ b/br-nf-bds/linux2.5/include/linux/netfilter_ipv4.h
@@ -0,0 +1,80 @@
+#ifndef __LINUX_IP_NETFILTER_H
+#define __LINUX_IP_NETFILTER_H
+
+/* IPv4-specific defines for netfilter.
+ * (C)1998 Rusty Russell -- This code is GPL.
+ */
+
+#include <linux/config.h>
+#include <linux/netfilter.h>
+
+/* IP Cache bits. */
+/* Src IP address. */
+#define NFC_IP_SRC 0x0001
+/* Dest IP address. */
+#define NFC_IP_DST 0x0002
+/* Input device. */
+#define NFC_IP_IF_IN 0x0004
+/* Output device. */
+#define NFC_IP_IF_OUT 0x0008
+/* TOS. */
+#define NFC_IP_TOS 0x0010
+/* Protocol. */
+#define NFC_IP_PROTO 0x0020
+/* IP options. */
+#define NFC_IP_OPTIONS 0x0040
+/* Frag & flags. */
+#define NFC_IP_FRAG 0x0080
+
+/* Per-protocol information: only matters if proto match. */
+/* TCP flags. */
+#define NFC_IP_TCPFLAGS 0x0100
+/* Source port. */
+#define NFC_IP_SRC_PT 0x0200
+/* Dest port. */
+#define NFC_IP_DST_PT 0x0400
+/* Something else about the proto */
+#define NFC_IP_PROTO_UNKNOWN 0x2000
+
+/* IP Hooks */
+/* After promisc drops, checksum checks. */
+#define NF_IP_PRE_ROUTING 0
+/* If the packet is destined for this box. */
+#define NF_IP_LOCAL_IN 1
+/* If the packet is destined for another interface. */
+#define NF_IP_FORWARD 2
+/* Packets coming from a local process. */
+#define NF_IP_LOCAL_OUT 3
+/* Packets about to hit the wire. */
+#define NF_IP_POST_ROUTING 4
+#define NF_IP_NUMHOOKS 5
+
+enum nf_ip_hook_priorities {
+ NF_IP_PRI_FIRST = INT_MIN,
+ NF_IP_PRI_CONNTRACK = -200,
+ NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD = -175,
+ NF_IP_PRI_MANGLE = -150,
+ NF_IP_PRI_NAT_DST = -100,
+ NF_IP_PRI_BRIDGE_SABOTAGE = -50,
+ NF_IP_PRI_FILTER = 0,
+ NF_IP_PRI_NAT_SRC = 100,
+ NF_IP_PRI_LAST = INT_MAX,
+};
+
+/* Arguments for setsockopt SOL_IP: */
+/* 2.0 firewalling went from 64 through 71 (and +256, +512, etc). */
+/* 2.2 firewalling (+ masq) went from 64 through 76 */
+/* 2.4 firewalling went 64 through 67. */
+#define SO_ORIGINAL_DST 80
+
+#ifdef __KERNEL__
+#ifdef CONFIG_NETFILTER_DEBUG
+void nf_debug_ip_local_deliver(struct sk_buff *skb);
+void nf_debug_ip_loopback_xmit(struct sk_buff *newskb);
+void nf_debug_ip_finish_output2(struct sk_buff *skb);
+#endif /*CONFIG_NETFILTER_DEBUG*/
+
+extern int ip_route_me_harder(struct sk_buff **pskb);
+#endif /*__KERNEL__*/
+
+#endif /*__LINUX_IP_NETFILTER_H*/
diff --git a/br-nf-bds/linux2.5/include/linux/skbuff.h b/br-nf-bds/linux2.5/include/linux/skbuff.h
new file mode 100644
index 0000000..fb99584
--- /dev/null
+++ b/br-nf-bds/linux2.5/include/linux/skbuff.h
@@ -0,0 +1,1144 @@
+/*
+ * Definitions for the 'struct sk_buff' memory handlers.
+ *
+ * Authors:
+ * Alan Cox, <gw4pts@gw4pts.ampr.org>
+ * Florian La Roche, <rzsfl@rz.uni-sb.de>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_SKBUFF_H
+#define _LINUX_SKBUFF_H
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/time.h>
+#include <linux/cache.h>
+
+#include <asm/atomic.h>
+#include <asm/types.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/poll.h>
+#include <linux/net.h>
+
+#define HAVE_ALLOC_SKB /* For the drivers to know */
+#define HAVE_ALIGNABLE_SKB /* Ditto 8) */
+#define SLAB_SKB /* Slabified skbuffs */
+
+#define CHECKSUM_NONE 0
+#define CHECKSUM_HW 1
+#define CHECKSUM_UNNECESSARY 2
+
+#define SKB_DATA_ALIGN(X) (((X) + (SMP_CACHE_BYTES - 1)) & \
+ ~(SMP_CACHE_BYTES - 1))
+#define SKB_MAX_ORDER(X, ORDER) (((PAGE_SIZE << (ORDER)) - (X) - \
+ sizeof(struct skb_shared_info)) & \
+ ~(SMP_CACHE_BYTES - 1))
+#define SKB_MAX_HEAD(X) (SKB_MAX_ORDER((X), 0))
+#define SKB_MAX_ALLOC (SKB_MAX_ORDER(0, 2))
+
+/* A. Checksumming of received packets by device.
+ *
+ * NONE: device failed to checksum this packet.
+ * skb->csum is undefined.
+ *
+ * UNNECESSARY: device parsed packet and wouldbe verified checksum.
+ * skb->csum is undefined.
+ * It is bad option, but, unfortunately, many of vendors do this.
+ * Apparently with secret goal to sell you new device, when you
+ * will add new protocol to your host. F.e. IPv6. 8)
+ *
+ * HW: the most generic way. Device supplied checksum of _all_
+ * the packet as seen by netif_rx in skb->csum.
+ * NOTE: Even if device supports only some protocols, but
+ * is able to produce some skb->csum, it MUST use HW,
+ * not UNNECESSARY.
+ *
+ * B. Checksumming on output.
+ *
+ * NONE: skb is checksummed by protocol or csum is not required.
+ *
+ * HW: device is required to csum packet as seen by hard_start_xmit
+ * from skb->h.raw to the end and to record the checksum
+ * at skb->h.raw+skb->csum.
+ *
+ * Device must show its capabilities in dev->features, set
+ * at device setup time.
+ * NETIF_F_HW_CSUM - it is clever device, it is able to checksum
+ * everything.
+ * NETIF_F_NO_CSUM - loopback or reliable single hop media.
+ * NETIF_F_IP_CSUM - device is dumb. It is able to csum only
+ * TCP/UDP over IPv4. Sigh. Vendors like this
+ * way by an unknown reason. Though, see comment above
+ * about CHECKSUM_UNNECESSARY. 8)
+ *
+ * Any questions? No questions, good. --ANK
+ */
+
+#ifdef __i386__
+#define NET_CALLER(arg) (*(((void **)&arg) - 1))
+#else
+#define NET_CALLER(arg) __builtin_return_address(0)
+#endif
+
+#ifdef CONFIG_NETFILTER
+struct nf_conntrack {
+ atomic_t use;
+ void (*destroy)(struct nf_conntrack *);
+};
+
+struct nf_ct_info {
+ struct nf_conntrack *master;
+};
+#endif
+
+struct sk_buff_head {
+ /* These two members must be first. */
+ struct sk_buff *next;
+ struct sk_buff *prev;
+
+ __u32 qlen;
+ spinlock_t lock;
+};
+
+struct sk_buff;
+
+#define MAX_SKB_FRAGS 6
+
+typedef struct skb_frag_struct skb_frag_t;
+
+struct skb_frag_struct {
+ struct page *page;
+ __u16 page_offset;
+ __u16 size;
+};
+
+/* This data is invariant across clones and lives at
+ * the end of the header data, ie. at skb->end.
+ */
+struct skb_shared_info {
+ atomic_t dataref;
+ unsigned int nr_frags;
+ struct sk_buff *frag_list;
+ skb_frag_t frags[MAX_SKB_FRAGS];
+};
+
+/**
+ * struct sk_buff - socket buffer
+ * @next: Next buffer in list
+ * @prev: Previous buffer in list
+ * @list: List we are on
+ * @sk: Socket we are owned by
+ * @stamp: Time we arrived
+ * @dev: Device we arrived on/are leaving by
+ * @physindev: Phsical device we arrived on (see bridge-nf)
+ * @physoutdev: Phsical device we will leave by (see bridge-nf)
+ * @h: Transport layer header
+ * @nh: Network layer header
+ * @mac: Link layer header
+ * @dst: FIXME: Describe this field
+ * @cb: Control buffer. Free for use by every layer. Put private vars here
+ * @len: Length of actual data
+ * @data_len: Data length
+ * @csum: Checksum
+ * @__unused: Dead field, may be reused
+ * @cloned: Head may be cloned (check refcnt to be sure)
+ * @pkt_type: Packet class
+ * @ip_summed: Driver fed us an IP checksum
+ * @priority: Packet queueing priority
+ * @users: User count - see {datagram,tcp}.c
+ * @protocol: Packet protocol from driver
+ * @security: Security level of packet
+ * @truesize: Buffer size
+ * @head: Head of buffer
+ * @data: Data head pointer
+ * @tail: Tail pointer
+ * @end: End pointer
+ * @destructor: Destruct function
+ * @nfmark: Can be used for communication between hooks
+ * @nfcache: Cache info
+ * @nfct: Associated connection, if any
+ * @nf_debug: Netfilter debugging
+ * @tc_index: Traffic control index
+ */
+
+struct sk_buff {
+ /* These two members must be first. */
+ struct sk_buff *next;
+ struct sk_buff *prev;
+
+ struct sk_buff_head *list;
+ struct sock *sk;
+ struct timeval stamp;
+ struct net_device *dev;
+ struct net_device *physindev;
+ struct net_device *physoutdev;
+
+ union {
+ struct tcphdr *th;
+ struct udphdr *uh;
+ struct icmphdr *icmph;
+ struct igmphdr *igmph;
+ struct iphdr *ipiph;
+ unsigned char *raw;
+ } h;
+
+ union {
+ struct iphdr *iph;
+ struct ipv6hdr *ipv6h;
+ struct arphdr *arph;
+ unsigned char *raw;
+ } nh;
+
+ union {
+ struct ethhdr *ethernet;
+ unsigned char *raw;
+ } mac;
+
+ struct dst_entry *dst;
+
+ /*
+ * This is the control buffer. It is free to use for every
+ * layer. Please put your private variables there. If you
+ * want to keep them across layers you have to do a skb_clone()
+ * first. This is owned by whoever has the skb queued ATM.
+ */
+ char cb[48];
+
+ unsigned int len,
+ data_len,
+ csum;
+ unsigned char __unused,
+ cloned,
+ pkt_type,
+ ip_summed;
+ __u32 priority;
+ atomic_t users;
+ unsigned short protocol,
+ security;
+ unsigned int truesize;
+
+ unsigned char *head,
+ *data,
+ *tail,
+ *end;
+
+ void (*destructor)(struct sk_buff *skb);
+#ifdef CONFIG_NETFILTER
+ unsigned long nfmark;
+ __u32 nfcache;
+ struct nf_ct_info *nfct;
+#ifdef CONFIG_NETFILTER_DEBUG
+ unsigned int nf_debug;
+#endif
+#endif /* CONFIG_NETFILTER */
+#if defined(CONFIG_HIPPI)
+ union {
+ __u32 ifield;
+ } private;
+#endif
+#ifdef CONFIG_NET_SCHED
+ __u32 tc_index; /* traffic control index */
+#endif
+};
+
+#define SK_WMEM_MAX 65535
+#define SK_RMEM_MAX 65535
+
+#ifdef __KERNEL__
+/*
+ * Handling routines are only of interest to the kernel
+ */
+#include <linux/slab.h>
+
+#include <asm/system.h>
+
+extern void __kfree_skb(struct sk_buff *skb);
+extern struct sk_buff *alloc_skb(unsigned int size, int priority);
+extern void kfree_skbmem(struct sk_buff *skb);
+extern struct sk_buff *skb_clone(struct sk_buff *skb, int priority);
+extern struct sk_buff *skb_copy(const struct sk_buff *skb, int priority);
+extern struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask);
+extern int pskb_expand_head(struct sk_buff *skb,
+ int nhead, int ntail, int gfp_mask);
+extern struct sk_buff *skb_realloc_headroom(struct sk_buff *skb,
+ unsigned int headroom);
+extern struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
+ int newheadroom, int newtailroom,
+ int priority);
+#define dev_kfree_skb(a) kfree_skb(a)
+extern void skb_over_panic(struct sk_buff *skb, int len,
+ void *here);
+extern void skb_under_panic(struct sk_buff *skb, int len,
+ void *here);
+
+/* Internal */
+#define skb_shinfo(SKB) ((struct skb_shared_info *)((SKB)->end))
+
+/**
+ * skb_queue_empty - check if a queue is empty
+ * @list: queue head
+ *
+ * Returns true if the queue is empty, false otherwise.
+ */
+static inline int skb_queue_empty(struct sk_buff_head *list)
+{
+ return list->next == (struct sk_buff *)list;
+}
+
+/**
+ * skb_get - reference buffer
+ * @skb: buffer to reference
+ *
+ * Makes another reference to a socket buffer and returns a pointer
+ * to the buffer.
+ */
+static inline struct sk_buff *skb_get(struct sk_buff *skb)
+{
+ atomic_inc(&skb->users);
+ return skb;
+}
+
+/*
+ * If users == 1, we are the only owner and are can avoid redundant
+ * atomic change.
+ */
+
+/**
+ * kfree_skb - free an sk_buff
+ * @skb: buffer to free
+ *
+ * Drop a reference to the buffer and free it if the usage count has
+ * hit zero.
+ */
+static inline void kfree_skb(struct sk_buff *skb)
+{
+ if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users))
+ __kfree_skb(skb);
+}
+
+/* Use this if you didn't touch the skb state [for fast switching] */
+static inline void kfree_skb_fast(struct sk_buff *skb)
+{
+ if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users))
+ kfree_skbmem(skb);
+}
+
+/**
+ * skb_cloned - is the buffer a clone
+ * @skb: buffer to check
+ *
+ * Returns true if the buffer was generated with skb_clone() and is
+ * one of multiple shared copies of the buffer. Cloned buffers are
+ * shared data so must not be written to under normal circumstances.
+ */
+static inline int skb_cloned(struct sk_buff *skb)
+{
+ return skb->cloned && atomic_read(&skb_shinfo(skb)->dataref) != 1;
+}
+
+/**
+ * skb_shared - is the buffer shared
+ * @skb: buffer to check
+ *
+ * Returns true if more than one person has a reference to this
+ * buffer.
+ */
+static inline int skb_shared(struct sk_buff *skb)
+{
+ return atomic_read(&skb->users) != 1;
+}
+
+/**
+ * skb_share_check - check if buffer is shared and if so clone it
+ * @skb: buffer to check
+ * @pri: priority for memory allocation
+ *
+ * If the buffer is shared the buffer is cloned and the old copy
+ * drops a reference. A new clone with a single reference is returned.
+ * If the buffer is not shared the original buffer is returned. When
+ * being called from interrupt status or with spinlocks held pri must
+ * be GFP_ATOMIC.
+ *
+ * NULL is returned on a memory allocation failure.
+ */
+static inline struct sk_buff *skb_share_check(struct sk_buff *skb, int pri)
+{
+ if (skb_shared(skb)) {
+ struct sk_buff *nskb = skb_clone(skb, pri);
+ kfree_skb(skb);
+ skb = nskb;
+ }
+ return skb;
+}
+
+/*
+ * Copy shared buffers into a new sk_buff. We effectively do COW on
+ * packets to handle cases where we have a local reader and forward
+ * and a couple of other messy ones. The normal one is tcpdumping
+ * a packet thats being forwarded.
+ */
+
+/**
+ * skb_unshare - make a copy of a shared buffer
+ * @skb: buffer to check
+ * @pri: priority for memory allocation
+ *
+ * If the socket buffer is a clone then this function creates a new
+ * copy of the data, drops a reference count on the old copy and returns
+ * the new copy with the reference count at 1. If the buffer is not a clone
+ * the original buffer is returned. When called with a spinlock held or
+ * from interrupt state @pri must be %GFP_ATOMIC
+ *
+ * %NULL is returned on a memory allocation failure.
+ */
+static inline struct sk_buff *skb_unshare(struct sk_buff *skb, int pri)
+{
+ if (skb_cloned(skb)) {
+ struct sk_buff *nskb = skb_copy(skb, pri);
+ kfree_skb(skb); /* Free our shared copy */
+ skb = nskb;
+ }
+ return skb;
+}
+
+/**
+ * skb_peek
+ * @list_: list to peek at
+ *
+ * Peek an &sk_buff. Unlike most other operations you _MUST_
+ * be careful with this one. A peek leaves the buffer on the
+ * list and someone else may run off with it. You must hold
+ * the appropriate locks or have a private queue to do this.
+ *
+ * Returns %NULL for an empty list or a pointer to the head element.
+ * The reference count is not incremented and the reference is therefore
+ * volatile. Use with caution.
+ */
+static inline struct sk_buff *skb_peek(struct sk_buff_head *list_)
+{
+ struct sk_buff *list = ((struct sk_buff *)list_)->next;
+ if (list == (struct sk_buff *)list_)
+ list = NULL;
+ return list;
+}
+
+/**
+ * skb_peek_tail
+ * @list_: list to peek at
+ *
+ * Peek an &sk_buff. Unlike most other operations you _MUST_
+ * be careful with this one. A peek leaves the buffer on the
+ * list and someone else may run off with it. You must hold
+ * the appropriate locks or have a private queue to do this.
+ *
+ * Returns %NULL for an empty list or a pointer to the tail element.
+ * The reference count is not incremented and the reference is therefore
+ * volatile. Use with caution.
+ */
+static inline struct sk_buff *skb_peek_tail(struct sk_buff_head *list_)
+{
+ struct sk_buff *list = ((struct sk_buff *)list_)->prev;
+ if (list == (struct sk_buff *)list_)
+ list = NULL;
+ return list;
+}
+
+/**
+ * skb_queue_len - get queue length
+ * @list_: list to measure
+ *
+ * Return the length of an &sk_buff queue.
+ */
+static inline __u32 skb_queue_len(struct sk_buff_head *list_)
+{
+ return list_->qlen;
+}
+
+static inline void skb_queue_head_init(struct sk_buff_head *list)
+{
+ spin_lock_init(&list->lock);
+ list->prev = list->next = (struct sk_buff *)list;
+ list->qlen = 0;
+}
+
+/*
+ * Insert an sk_buff at the start of a list.
+ *
+ * The "__skb_xxxx()" functions are the non-atomic ones that
+ * can only be called with interrupts disabled.
+ */
+
+/**
+ * __skb_queue_head - queue a buffer at the list head
+ * @list: list to use
+ * @newsk: buffer to queue
+ *
+ * Queue a buffer at the start of a list. This function takes no locks
+ * and you must therefore hold required locks before calling it.
+ *
+ * A buffer cannot be placed on two lists at the same time.
+ */
+static inline void __skb_queue_head(struct sk_buff_head *list,
+ struct sk_buff *newsk)
+{
+ struct sk_buff *prev, *next;
+
+ newsk->list = list;
+ list->qlen++;
+ prev = (struct sk_buff *)list;
+ next = prev->next;
+ newsk->next = next;
+ newsk->prev = prev;
+ next->prev = prev->next = newsk;
+}
+
+
+/**
+ * skb_queue_head - queue a buffer at the list head
+ * @list: list to use
+ * @newsk: buffer to queue
+ *
+ * Queue a buffer at the start of the list. This function takes the
+ * list lock and can be used safely with other locking &sk_buff functions
+ * safely.
+ *
+ * A buffer cannot be placed on two lists at the same time.
+ */
+static inline void skb_queue_head(struct sk_buff_head *list,
+ struct sk_buff *newsk)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&list->lock, flags);
+ __skb_queue_head(list, newsk);
+ spin_unlock_irqrestore(&list->lock, flags);
+}
+
+/**
+ * __skb_queue_tail - queue a buffer at the list tail
+ * @list: list to use
+ * @newsk: buffer to queue
+ *
+ * Queue a buffer at the end of a list. This function takes no locks
+ * and you must therefore hold required locks before calling it.
+ *
+ * A buffer cannot be placed on two lists at the same time.
+ */
+static inline void __skb_queue_tail(struct sk_buff_head *list,
+ struct sk_buff *newsk)
+{
+ struct sk_buff *prev, *next;
+
+ newsk->list = list;
+ list->qlen++;
+ next = (struct sk_buff *)list;
+ prev = next->prev;
+ newsk->next = next;
+ newsk->prev = prev;
+ next->prev = prev->next = newsk;
+}
+
+/**
+ * skb_queue_tail - queue a buffer at the list tail
+ * @list: list to use
+ * @newsk: buffer to queue
+ *
+ * Queue a buffer at the tail of the list. This function takes the
+ * list lock and can be used safely with other locking &sk_buff functions
+ * safely.
+ *
+ * A buffer cannot be placed on two lists at the same time.
+ */
+static inline void skb_queue_tail(struct sk_buff_head *list,
+ struct sk_buff *newsk)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&list->lock, flags);
+ __skb_queue_tail(list, newsk);
+ spin_unlock_irqrestore(&list->lock, flags);
+}
+
+/**
+ * __skb_dequeue - remove from the head of the queue
+ * @list: list to dequeue from
+ *
+ * Remove the head of the list. This function does not take any locks
+ * so must be used with appropriate locks held only. The head item is
+ * returned or %NULL if the list is empty.
+ */
+static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list)
+{
+ struct sk_buff *next, *prev, *result;
+
+ prev = (struct sk_buff *) list;
+ next = prev->next;
+ result = NULL;
+ if (next != prev) {
+ result = next;
+ next = next->next;
+ list->qlen--;
+ next->prev = prev;
+ prev->next = next;
+ result->next = result->prev = NULL;
+ result->list = NULL;
+ }
+ return result;
+}
+
+/**
+ * skb_dequeue - remove from the head of the queue
+ * @list: list to dequeue from
+ *
+ * Remove the head of the list. The list lock is taken so the function
+ * may be used safely with other locking list functions. The head item is
+ * returned or %NULL if the list is empty.
+ */
+
+static inline struct sk_buff *skb_dequeue(struct sk_buff_head *list)
+{
+ unsigned long flags;
+ struct sk_buff *result;
+
+ spin_lock_irqsave(&list->lock, flags);
+ result = __skb_dequeue(list);
+ spin_unlock_irqrestore(&list->lock, flags);
+ return result;
+}
+
+/*
+ * Insert a packet on a list.
+ */
+
+static inline void __skb_insert(struct sk_buff *newsk,
+ struct sk_buff *prev, struct sk_buff *next,
+ struct sk_buff_head *list)
+{
+ newsk->next = next;
+ newsk->prev = prev;
+ next->prev = prev->next = newsk;
+ newsk->list = list;
+ list->qlen++;
+}
+
+/**
+ * skb_insert - insert a buffer
+ * @old: buffer to insert before
+ * @newsk: buffer to insert
+ *
+ * Place a packet before a given packet in a list. The list locks are taken
+ * and this function is atomic with respect to other list locked calls
+ * A buffer cannot be placed on two lists at the same time.
+ */
+
+static inline void skb_insert(struct sk_buff *old, struct sk_buff *newsk)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&old->list->lock, flags);
+ __skb_insert(newsk, old->prev, old, old->list);
+ spin_unlock_irqrestore(&old->list->lock, flags);
+}
+
+/*
+ * Place a packet after a given packet in a list.
+ */
+
+static inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk)
+{
+ __skb_insert(newsk, old, old->next, old->list);
+}
+
+/**
+ * skb_append - append a buffer
+ * @old: buffer to insert after
+ * @newsk: buffer to insert
+ *
+ * Place a packet after a given packet in a list. The list locks are taken
+ * and this function is atomic with respect to other list locked calls.
+ * A buffer cannot be placed on two lists at the same time.
+ */
+
+
+static inline void skb_append(struct sk_buff *old, struct sk_buff *newsk)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&old->list->lock, flags);
+ __skb_append(old, newsk);
+ spin_unlock_irqrestore(&old->list->lock, flags);
+}
+
+/*
+ * remove sk_buff from list. _Must_ be called atomically, and with
+ * the list known..
+ */
+static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
+{
+ struct sk_buff *next, *prev;
+
+ list->qlen--;
+ next = skb->next;
+ prev = skb->prev;
+ skb->next = skb->prev = NULL;
+ skb->list = NULL;
+ next->prev = prev;
+ prev->next = next;
+}
+
+/**
+ * skb_unlink - remove a buffer from a list
+ * @skb: buffer to remove
+ *
+ * Place a packet after a given packet in a list. The list locks are taken
+ * and this function is atomic with respect to other list locked calls
+ *
+ * Works even without knowing the list it is sitting on, which can be
+ * handy at times. It also means that THE LIST MUST EXIST when you
+ * unlink. Thus a list must have its contents unlinked before it is
+ * destroyed.
+ */
+static inline void skb_unlink(struct sk_buff *skb)
+{
+ struct sk_buff_head *list = skb->list;
+
+ if (list) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&list->lock, flags);
+ if (skb->list == list)
+ __skb_unlink(skb, skb->list);
+ spin_unlock_irqrestore(&list->lock, flags);
+ }
+}
+
+/* XXX: more streamlined implementation */
+
+/**
+ * __skb_dequeue_tail - remove from the tail of the queue
+ * @list: list to dequeue from
+ *
+ * Remove the tail of the list. This function does not take any locks
+ * so must be used with appropriate locks held only. The tail item is
+ * returned or %NULL if the list is empty.
+ */
+static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list)
+{
+ struct sk_buff *skb = skb_peek_tail(list);
+ if (skb)
+ __skb_unlink(skb, list);
+ return skb;
+}
+
+/**
+ * skb_dequeue - remove from the head of the queue
+ * @list: list to dequeue from
+ *
+ * Remove the head of the list. The list lock is taken so the function
+ * may be used safely with other locking list functions. The tail item is
+ * returned or %NULL if the list is empty.
+ */
+static inline struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list)
+{
+ unsigned long flags;
+ struct sk_buff *result;
+
+ spin_lock_irqsave(&list->lock, flags);
+ result = __skb_dequeue_tail(list);
+ spin_unlock_irqrestore(&list->lock, flags);
+ return result;
+}
+
+static inline int skb_is_nonlinear(const struct sk_buff *skb)
+{
+ return skb->data_len;
+}
+
+static inline int skb_headlen(const struct sk_buff *skb)
+{
+ return skb->len - skb->data_len;
+}
+
+#define SKB_PAGE_ASSERT(skb) do { if (skb_shinfo(skb)->nr_frags) \
+ BUG(); } while (0)
+#define SKB_FRAG_ASSERT(skb) do { if (skb_shinfo(skb)->frag_list) \
+ BUG(); } while (0)
+#define SKB_LINEAR_ASSERT(skb) do { if (skb_is_nonlinear(skb)) \
+ BUG(); } while (0)
+
+/*
+ * Add data to an sk_buff
+ */
+static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len)
+{
+ unsigned char *tmp = skb->tail;
+ SKB_LINEAR_ASSERT(skb);
+ skb->tail += len;
+ skb->len += len;
+ return tmp;
+}
+
+/**
+ * skb_put - add data to a buffer
+ * @skb: buffer to use
+ * @len: amount of data to add
+ *
+ * This function extends the used data area of the buffer. If this would
+ * exceed the total buffer size the kernel will panic. A pointer to the
+ * first byte of the extra data is returned.
+ */
+static inline unsigned char *skb_put(struct sk_buff *skb, unsigned int len)
+{
+ unsigned char *tmp = skb->tail;
+ SKB_LINEAR_ASSERT(skb);
+ skb->tail += len;
+ skb->len += len;
+ if (skb->tail>skb->end)
+ skb_over_panic(skb, len, current_text_addr());
+ return tmp;
+}
+
+static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len)
+{
+ skb->data -= len;
+ skb->len += len;
+ return skb->data;
+}
+
+/**
+ * skb_push - add data to the start of a buffer
+ * @skb: buffer to use
+ * @len: amount of data to add
+ *
+ * This function extends the used data area of the buffer at the buffer
+ * start. If this would exceed the total buffer headroom the kernel will
+ * panic. A pointer to the first byte of the extra data is returned.
+ */
+static inline unsigned char *skb_push(struct sk_buff *skb, unsigned int len)
+{
+ skb->data -= len;
+ skb->len += len;
+ if (skb->data<skb->head)
+ skb_under_panic(skb, len, current_text_addr());
+ return skb->data;
+}
+
+static inline char *__skb_pull(struct sk_buff *skb, unsigned int len)
+{
+ skb->len -= len;
+ if (skb->len < skb->data_len)
+ BUG();
+ return skb->data += len;
+}
+
+/**
+ * skb_pull - remove data from the start of a buffer
+ * @skb: buffer to use
+ * @len: amount of data to remove
+ *
+ * This function removes data from the start of a buffer, returning
+ * the memory to the headroom. A pointer to the next data in the buffer
+ * is returned. Once the data has been pulled future pushes will overwrite
+ * the old data.
+ */
+static inline unsigned char *skb_pull(struct sk_buff *skb, unsigned int len)
+{
+ return (len > skb->len) ? NULL : __skb_pull(skb, len);
+}
+
+extern unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta);
+
+static inline char *__pskb_pull(struct sk_buff *skb, unsigned int len)
+{
+ if (len > skb_headlen(skb) &&
+ !__pskb_pull_tail(skb, len-skb_headlen(skb)))
+ return NULL;
+ skb->len -= len;
+ return skb->data += len;
+}
+
+static inline unsigned char *pskb_pull(struct sk_buff *skb, unsigned int len)
+{
+ return (len > skb->len) ? NULL : __pskb_pull(skb, len);
+}
+
+static inline int pskb_may_pull(struct sk_buff *skb, unsigned int len)
+{
+ if (len <= skb_headlen(skb))
+ return 1;
+ if (len > skb->len)
+ return 0;
+ return __pskb_pull_tail(skb, len-skb_headlen(skb)) != NULL;
+}
+
+/**
+ * skb_headroom - bytes at buffer head
+ * @skb: buffer to check
+ *
+ * Return the number of bytes of free space at the head of an &sk_buff.
+ */
+static inline int skb_headroom(const struct sk_buff *skb)
+{
+ return skb->data - skb->head;
+}
+
+/**
+ * skb_tailroom - bytes at buffer end
+ * @skb: buffer to check
+ *
+ * Return the number of bytes of free space at the tail of an sk_buff
+ */
+static inline int skb_tailroom(const struct sk_buff *skb)
+{
+ return skb_is_nonlinear(skb) ? 0 : skb->end - skb->tail;
+}
+
+/**
+ * skb_reserve - adjust headroom
+ * @skb: buffer to alter
+ * @len: bytes to move
+ *
+ * Increase the headroom of an empty &sk_buff by reducing the tail
+ * room. This is only allowed for an empty buffer.
+ */
+static inline void skb_reserve(struct sk_buff *skb, unsigned int len)
+{
+ skb->data += len;
+ skb->tail += len;
+}
+
+extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc);
+
+static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
+{
+ if (!skb->data_len) {
+ skb->len = len;
+ skb->tail = skb->data + len;
+ } else
+ ___pskb_trim(skb, len, 0);
+}
+
+/**
+ * skb_trim - remove end from a buffer
+ * @skb: buffer to alter
+ * @len: new length
+ *
+ * Cut the length of a buffer down by removing data from the tail. If
+ * the buffer is already under the length specified it is not modified.
+ */
+static inline void skb_trim(struct sk_buff *skb, unsigned int len)
+{
+ if (skb->len > len)
+ __skb_trim(skb, len);
+}
+
+
+static inline int __pskb_trim(struct sk_buff *skb, unsigned int len)
+{
+ if (!skb->data_len) {
+ skb->len = len;
+ skb->tail = skb->data+len;
+ return 0;
+ }
+ return ___pskb_trim(skb, len, 1);
+}
+
+static inline int pskb_trim(struct sk_buff *skb, unsigned int len)
+{
+ return (len < skb->len) ? __pskb_trim(skb, len) : 0;
+}
+
+/**
+ * skb_orphan - orphan a buffer
+ * @skb: buffer to orphan
+ *
+ * If a buffer currently has an owner then we call the owner's
+ * destructor function and make the @skb unowned. The buffer continues
+ * to exist but is no longer charged to its former owner.
+ */
+static inline void skb_orphan(struct sk_buff *skb)
+{
+ if (skb->destructor)
+ skb->destructor(skb);
+ skb->destructor = NULL;
+ skb->sk = NULL;
+}
+
+/**
+ * skb_queue_purge - empty a list
+ * @list: list to empty
+ *
+ * Delete all buffers on an &sk_buff list. Each buffer is removed from
+ * the list and one reference dropped. This function takes the list
+ * lock and is atomic with respect to other list locking functions.
+ */
+static inline void skb_queue_purge(struct sk_buff_head *list)
+{
+ struct sk_buff *skb;
+ while ((skb = skb_dequeue(list)) != NULL)
+ kfree_skb(skb);
+}
+
+/**
+ * __skb_queue_purge - empty a list
+ * @list: list to empty
+ *
+ * Delete all buffers on an &sk_buff list. Each buffer is removed from
+ * the list and one reference dropped. This function does not take the
+ * list lock and the caller must hold the relevant locks to use it.
+ */
+static inline void __skb_queue_purge(struct sk_buff_head *list)
+{
+ struct sk_buff *skb;
+ while ((skb = __skb_dequeue(list)) != NULL)
+ kfree_skb(skb);
+}
+
+/**
+ * __dev_alloc_skb - allocate an skbuff for sending
+ * @length: length to allocate
+ * @gfp_mask: get_free_pages mask, passed to alloc_skb
+ *
+ * Allocate a new &sk_buff and assign it a usage count of one. The
+ * buffer has unspecified headroom built in. Users should allocate
+ * the headroom they think they need without accounting for the
+ * built in space. The built in space is used for optimisations.
+ *
+ * %NULL is returned in there is no free memory.
+ */
+static inline struct sk_buff *__dev_alloc_skb(unsigned int length,
+ int gfp_mask)
+{
+ struct sk_buff *skb = alloc_skb(length + 16, gfp_mask);
+ if (skb)
+ skb_reserve(skb, 16);
+ return skb;
+}
+
+/**
+ * dev_alloc_skb - allocate an skbuff for sending
+ * @length: length to allocate
+ *
+ * Allocate a new &sk_buff and assign it a usage count of one. The
+ * buffer has unspecified headroom built in. Users should allocate
+ * the headroom they think they need without accounting for the
+ * built in space. The built in space is used for optimisations.
+ *
+ * %NULL is returned in there is no free memory. Although this function
+ * allocates memory it can be called from an interrupt.
+ */
+static inline struct sk_buff *dev_alloc_skb(unsigned int length)
+{
+ return __dev_alloc_skb(length, GFP_ATOMIC);
+}
+
+/**
+ * skb_cow - copy header of skb when it is required
+ * @skb: buffer to cow
+ * @headroom: needed headroom
+ *
+ * If the skb passed lacks sufficient headroom or its data part
+ * is shared, data is reallocated. If reallocation fails, an error
+ * is returned and original skb is not changed.
+ *
+ * The result is skb with writable area skb->head...skb->tail
+ * and at least @headroom of space at head.
+ */
+static inline int skb_cow(struct sk_buff *skb, unsigned int headroom)
+{
+ int delta = (headroom > 16 ? headroom : 16) - skb_headroom(skb);
+
+ if (delta < 0)
+ delta = 0;
+
+ if (delta || skb_cloned(skb))
+ return pskb_expand_head(skb, (delta + 15) & ~15, 0, GFP_ATOMIC);
+ return 0;
+}
+
+/**
+ * skb_linearize - convert paged skb to linear one
+ * @skb: buffer to linarize
+ * @gfp: allocation mode
+ *
+ * If there is no free memory -ENOMEM is returned, otherwise zero
+ * is returned and the old skb data released.
+ */
+int skb_linearize(struct sk_buff *skb, int gfp);
+
+static inline void *kmap_skb_frag(const skb_frag_t *frag)
+{
+#ifdef CONFIG_HIGHMEM
+ if (in_irq())
+ BUG();
+
+ local_bh_disable();
+#endif
+ return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ);
+}
+
+static inline void kunmap_skb_frag(void *vaddr)
+{
+ kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ);
+#ifdef CONFIG_HIGHMEM
+ local_bh_enable();
+#endif
+}
+
+#define skb_queue_walk(queue, skb) \
+ for (skb = (queue)->next; \
+ (skb != (struct sk_buff *)(queue)); \
+ skb = skb->next)
+
+
+extern struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
+ int noblock, int *err);
+extern unsigned int datagram_poll(struct file *file, struct socket *sock,
+ struct poll_table_struct *wait);
+extern int skb_copy_datagram(const struct sk_buff *from,
+ int offset, char *to, int size);
+extern int skb_copy_datagram_iovec(const struct sk_buff *from,
+ int offset, struct iovec *to,
+ int size);
+extern int skb_copy_and_csum_datagram(const struct sk_buff *skb,
+ int offset, u8 *to, int len,
+ unsigned int *csump);
+extern int skb_copy_and_csum_datagram_iovec(const
+ struct sk_buff *skb,
+ int hlen,
+ struct iovec *iov);
+extern void skb_free_datagram(struct sock *sk, struct sk_buff *skb);
+extern unsigned int skb_checksum(const struct sk_buff *skb, int offset,
+ int len, unsigned int csum);
+extern int skb_copy_bits(const struct sk_buff *skb, int offset,
+ void *to, int len);
+extern unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb,
+ int offset, u8 *to, int len,
+ unsigned int csum);
+extern void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
+
+extern void skb_init(void);
+extern void skb_add_mtu(int mtu);
+
+#ifdef CONFIG_NETFILTER
+static inline void nf_conntrack_put(struct nf_ct_info *nfct)
+{
+ if (nfct && atomic_dec_and_test(&nfct->master->use))
+ nfct->master->destroy(nfct->master);
+}
+static inline void nf_conntrack_get(struct nf_ct_info *nfct)
+{
+ if (nfct)
+ atomic_inc(&nfct->master->use);
+}
+#endif
+
+#endif /* __KERNEL__ */
+#endif /* _LINUX_SKBUFF_H */
diff --git a/br-nf-bds/linux2.5/net/Config.in b/br-nf-bds/linux2.5/net/Config.in
new file mode 100644
index 0000000..4c408f2
--- /dev/null
+++ b/br-nf-bds/linux2.5/net/Config.in
@@ -0,0 +1,97 @@
+#
+# Network configuration
+#
+mainmenu_option next_comment
+comment 'Networking options'
+tristate 'Packet socket' CONFIG_PACKET
+if [ "$CONFIG_PACKET" != "n" ]; then
+ bool ' Packet socket: mmapped IO' CONFIG_PACKET_MMAP
+fi
+
+tristate 'Netlink device emulation' CONFIG_NETLINK_DEV
+
+bool 'Network packet filtering (replaces ipchains)' CONFIG_NETFILTER
+if [ "$CONFIG_NETFILTER" = "y" ]; then
+ bool ' Network packet filtering debugging' CONFIG_NETFILTER_DEBUG
+fi
+bool 'Socket Filtering' CONFIG_FILTER
+tristate 'Unix domain sockets' CONFIG_UNIX
+bool 'TCP/IP networking' CONFIG_INET
+if [ "$CONFIG_INET" = "y" ]; then
+ source net/ipv4/Config.in
+ if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
+# IPv6 as module will cause a CRASH if you try to unload it
+ tristate ' The IPv6 protocol (EXPERIMENTAL)' CONFIG_IPV6
+ if [ "$CONFIG_IPV6" != "n" ]; then
+ source net/ipv6/Config.in
+ fi
+ fi
+fi
+if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
+ bool 'Asynchronous Transfer Mode (ATM) (EXPERIMENTAL)' CONFIG_ATM
+ if [ "$CONFIG_ATM" = "y" ]; then
+ if [ "$CONFIG_INET" = "y" ]; then
+ bool ' Classical IP over ATM (EXPERIMENTAL)' CONFIG_ATM_CLIP
+ if [ "$CONFIG_ATM_CLIP" = "y" ]; then
+ bool ' Do NOT send ICMP if no neighbour (EXPERIMENTAL)' CONFIG_ATM_CLIP_NO_ICMP
+ fi
+ fi
+ tristate ' LAN Emulation (LANE) support (EXPERIMENTAL)' CONFIG_ATM_LANE
+ if [ "$CONFIG_INET" = "y" -a "$CONFIG_ATM_LANE" != "n" ]; then
+ tristate ' Multi-Protocol Over ATM (MPOA) support (EXPERIMENTAL)' CONFIG_ATM_MPOA
+ fi
+ fi
+fi
+tristate '802.1Q VLAN Support' CONFIG_VLAN_8021Q
+
+comment ' '
+tristate 'The IPX protocol' CONFIG_IPX
+if [ "$CONFIG_IPX" != "n" ]; then
+ source net/ipx/Config.in
+fi
+
+tristate 'Appletalk protocol support' CONFIG_ATALK
+source drivers/net/appletalk/Config.in
+
+tristate 'DECnet Support' CONFIG_DECNET
+if [ "$CONFIG_DECNET" != "n" ]; then
+ source net/decnet/Config.in
+fi
+dep_tristate '802.1d Ethernet Bridging' CONFIG_BRIDGE $CONFIG_INET
+if [ "$CONFIG_BRIDGE" != "n" -a "$CONFIG_NETFILTER" != "n" ]; then
+ source net/bridge/netfilter/Config.in
+fi
+if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
+ if [ "$CONFIG_BRIDGE" != "n" -a "$CONFIG_NETFILTER" != "n" ]; then
+ bool ' netfilter (firewalling) support' CONFIG_BRIDGE_NF
+ fi
+ tristate 'CCITT X.25 Packet Layer (EXPERIMENTAL)' CONFIG_X25
+ tristate 'LAPB Data Link Driver (EXPERIMENTAL)' CONFIG_LAPB
+ tristate 'ANSI/IEEE 802.2 Data link layer protocol' CONFIG_LLC
+ if [ "$CONFIG_LLC" != "n" ]; then
+ # When NETBEUI is added the following line will be a tristate
+ define_bool CONFIG_LLC_UI y
+ fi
+ bool 'Frame Diverter (EXPERIMENTAL)' CONFIG_NET_DIVERT
+ if [ "$CONFIG_INET" = "y" ]; then
+ tristate 'Acorn Econet/AUN protocols (EXPERIMENTAL)' CONFIG_ECONET
+ if [ "$CONFIG_ECONET" != "n" ]; then
+ bool ' AUN over UDP' CONFIG_ECONET_AUNUDP
+ bool ' Native Econet' CONFIG_ECONET_NATIVE
+ fi
+ fi
+ tristate 'WAN router' CONFIG_WAN_ROUTER
+ bool 'Fast switching (read help!)' CONFIG_NET_FASTROUTE
+ bool 'Forwarding between high speed interfaces' CONFIG_NET_HW_FLOWCONTROL
+fi
+
+mainmenu_option next_comment
+comment 'QoS and/or fair queueing'
+bool 'QoS and/or fair queueing' CONFIG_NET_SCHED
+if [ "$CONFIG_NET_SCHED" = "y" ]; then
+ source net/sched/Config.in
+fi
+#bool 'Network code profiler' CONFIG_NET_PROFILE
+endmenu
+
+endmenu
diff --git a/br-nf-bds/linux2.5/net/bridge/Makefile b/br-nf-bds/linux2.5/net/bridge/Makefile
new file mode 100644
index 0000000..5eed6ca
--- /dev/null
+++ b/br-nf-bds/linux2.5/net/bridge/Makefile
@@ -0,0 +1,24 @@
+#
+# Makefile for the IEEE 802.1d ethernet bridging layer.
+#
+
+ifneq ($(CONFIG_BRIDGE_EBT_BROUTE),n)
+ifneq ($(CONFIG_BRIDGE_EBT_BROUTE),)
+export-objs := br.o
+endif
+endif
+
+obj-$(CONFIG_BRIDGE) += bridge.o
+
+bridge-objs := br.o br_device.o br_fdb.o br_forward.o br_if.o br_input.o \
+ br_ioctl.o br_notify.o br_stp.o br_stp_bpdu.o \
+ br_stp_if.o br_stp_timer.o
+obj-$(CONFIG_BRIDGE_EBT) += netfilter/
+
+ifneq ($(CONFIG_BRIDGE_NF),n)
+ifneq ($(CONFIG_BRIDGE_NF),)
+bridge-objs += br_netfilter.o
+endif
+endif
+
+include $(TOPDIR)/Rules.make
diff --git a/br-nf-bds/linux2.5/net/bridge/br.c b/br-nf-bds/linux2.5/net/bridge/br.c
new file mode 100644
index 0000000..30681c1
--- /dev/null
+++ b/br-nf-bds/linux2.5/net/bridge/br.c
@@ -0,0 +1,100 @@
+/*
+ * Generic parts
+ * Linux ethernet bridge
+ *
+ * Authors:
+ * Lennert Buytenhek <buytenh@gnu.org>
+ *
+ * $Id: br.c,v 1.1 2002/08/24 09:28:29 bdschuym Exp $
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/miscdevice.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/init.h>
+#include <linux/if_bridge.h>
+#include <asm/uaccess.h>
+#include "br_private.h"
+
+#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
+#include "../atm/lec.h"
+#endif
+
+#if defined(CONFIG_BRIDGE_EBT_BROUTE) || \
+ defined(CONFIG_BRIDGE_EBT_BROUTE_MODULE)
+unsigned int (*broute_decision) (unsigned int hook, struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *)) = NULL;
+#endif
+
+void br_dec_use_count()
+{
+ MOD_DEC_USE_COUNT;
+}
+
+void br_inc_use_count()
+{
+ MOD_INC_USE_COUNT;
+}
+
+static int __init br_init(void)
+{
+ printk(KERN_INFO "NET4: Ethernet Bridge 008 for NET4.0\n");
+
+#ifdef CONFIG_BRIDGE_NF
+ if (br_netfilter_init())
+ return 1;
+#endif
+
+ br_handle_frame_hook = br_handle_frame;
+ br_ioctl_hook = br_ioctl_deviceless_stub;
+#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
+ br_fdb_get_hook = br_fdb_get;
+ br_fdb_put_hook = br_fdb_put;
+#endif
+ register_netdevice_notifier(&br_device_notifier);
+
+ return 0;
+}
+
+static void __br_clear_frame_hook(void)
+{
+ br_handle_frame_hook = NULL;
+}
+
+static void __br_clear_ioctl_hook(void)
+{
+ br_ioctl_hook = NULL;
+}
+
+static void __exit br_deinit(void)
+{
+#ifdef CONFIG_BRIDGE_NF
+ br_netfilter_fini();
+#endif
+ unregister_netdevice_notifier(&br_device_notifier);
+ br_call_ioctl_atomic(__br_clear_ioctl_hook);
+ net_call_rx_atomic(__br_clear_frame_hook);
+#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
+ br_fdb_get_hook = NULL;
+ br_fdb_put_hook = NULL;
+#endif
+}
+
+#if defined(CONFIG_BRIDGE_EBT_BROUTE) || \
+ defined(CONFIG_BRIDGE_EBT_BROUTE_MODULE)
+EXPORT_SYMBOL(broute_decision);
+#endif
+
+module_init(br_init)
+module_exit(br_deinit)
+MODULE_LICENSE("GPL");
diff --git a/br-nf-bds/linux2.5/net/bridge/br_forward.c b/br-nf-bds/linux2.5/net/bridge/br_forward.c
new file mode 100644
index 0000000..1854fa4
--- /dev/null
+++ b/br-nf-bds/linux2.5/net/bridge/br_forward.c
@@ -0,0 +1,148 @@
+/*
+ * Forwarding decision
+ * Linux ethernet bridge
+ *
+ * Authors:
+ * Lennert Buytenhek <buytenh@gnu.org>
+ *
+ * $Id: br_forward.c,v 1.1 2002/08/24 09:28:29 bdschuym Exp $
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/skbuff.h>
+#include <linux/if_bridge.h>
+#include <linux/netfilter_bridge.h>
+#include "br_private.h"
+
+static inline int should_deliver(struct net_bridge_port *p, struct sk_buff *skb)
+{
+ if (skb->dev == p->dev ||
+ p->state != BR_STATE_FORWARDING)
+ return 0;
+
+ return 1;
+}
+
+int br_dev_queue_push_xmit(struct sk_buff *skb)
+{
+ skb_push(skb, ETH_HLEN);
+ dev_queue_xmit(skb);
+
+ return 0;
+}
+
+int br_forward_finish(struct sk_buff *skb)
+{
+ NF_HOOK(PF_BRIDGE, NF_BR_POST_ROUTING, skb, NULL, skb->dev,
+ br_dev_queue_push_xmit);
+
+ return 0;
+}
+
+static void __br_deliver(struct net_bridge_port *to, struct sk_buff *skb)
+{
+ skb->dev = to->dev;
+ NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
+ br_forward_finish);
+}
+
+static void __br_forward(struct net_bridge_port *to, struct sk_buff *skb)
+{
+ struct net_device *indev;
+
+ indev = skb->dev;
+ skb->dev = to->dev;
+
+ NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, indev, skb->dev,
+ br_forward_finish);
+}
+
+/* called under bridge lock */
+void br_deliver(struct net_bridge_port *to, struct sk_buff *skb)
+{
+ if (should_deliver(to, skb)) {
+ __br_deliver(to, skb);
+ return;
+ }
+
+ kfree_skb(skb);
+}
+
+/* called under bridge lock */
+void br_forward(struct net_bridge_port *to, struct sk_buff *skb)
+{
+ if (should_deliver(to, skb)) {
+ __br_forward(to, skb);
+ return;
+ }
+
+ kfree_skb(skb);
+}
+
+/* called under bridge lock */
+static void br_flood(struct net_bridge *br, struct sk_buff *skb, int clone,
+ void (*__packet_hook)(struct net_bridge_port *p, struct sk_buff *skb))
+{
+ struct net_bridge_port *p;
+ struct net_bridge_port *prev;
+
+ if (clone) {
+ struct sk_buff *skb2;
+
+ if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL) {
+ br->statistics.tx_dropped++;
+ return;
+ }
+
+ skb = skb2;
+ }
+
+ prev = NULL;
+
+ p = br->port_list;
+ while (p != NULL) {
+ if (should_deliver(p, skb)) {
+ if (prev != NULL) {
+ struct sk_buff *skb2;
+
+ if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL) {
+ br->statistics.tx_dropped++;
+ kfree_skb(skb);
+ return;
+ }
+
+ __packet_hook(prev, skb2);
+ }
+
+ prev = p;
+ }
+
+ p = p->next;
+ }
+
+ if (prev != NULL) {
+ __packet_hook(prev, skb);
+ return;
+ }
+
+ kfree_skb(skb);
+}
+
+/* called under bridge lock */
+void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb, int clone)
+{
+ br_flood(br, skb, clone, __br_deliver);
+}
+
+/* called under bridge lock */
+void br_flood_forward(struct net_bridge *br, struct sk_buff *skb, int clone)
+{
+ br_flood(br, skb, clone, __br_forward);
+}
diff --git a/br-nf-bds/linux2.5/net/bridge/br_input.c b/br-nf-bds/linux2.5/net/bridge/br_input.c
new file mode 100644
index 0000000..d467dda
--- /dev/null
+++ b/br-nf-bds/linux2.5/net/bridge/br_input.c
@@ -0,0 +1,181 @@
+/*
+ * Handle incoming frames
+ * Linux ethernet bridge
+ *
+ * Authors:
+ * Lennert Buytenhek <buytenh@gnu.org>
+ *
+ * $Id: br_input.c,v 1.1 2002/08/24 09:28:29 bdschuym Exp $
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/if_bridge.h>
+#include <linux/netfilter_bridge.h>
+#include "br_private.h"
+#if defined(CONFIG_BRIDGE_EBT_BROUTE) || \
+ defined(CONFIG_BRIDGE_EBT_BROUTE_MODULE)
+#include <linux/netfilter.h>
+#endif
+
+unsigned char bridge_ula[6] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
+
+static int br_pass_frame_up_finish(struct sk_buff *skb)
+{
+ netif_rx(skb);
+
+ return 0;
+}
+
+static void br_pass_frame_up(struct net_bridge *br, struct sk_buff *skb)
+{
+ struct net_device *indev;
+
+ br->statistics.rx_packets++;
+ br->statistics.rx_bytes += skb->len;
+
+ indev = skb->dev;
+ skb->dev = &br->dev;
+ skb->pkt_type = PACKET_HOST;
+ skb_push(skb, ETH_HLEN);
+ skb->protocol = eth_type_trans(skb, &br->dev);
+
+ NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, indev, NULL,
+ br_pass_frame_up_finish);
+}
+
+int br_handle_frame_finish(struct sk_buff *skb)
+{
+ struct net_bridge *br;
+ unsigned char *dest;
+ struct net_bridge_fdb_entry *dst;
+ struct net_bridge_port *p;
+ int passedup;
+
+ dest = skb->mac.ethernet->h_dest;
+
+ p = skb->dev->br_port;
+ if (p == NULL)
+ goto err_nolock;
+
+ br = p->br;
+ read_lock(&br->lock);
+ if (skb->dev->br_port == NULL)
+ goto err;
+
+ passedup = 0;
+ if (br->dev.flags & IFF_PROMISC) {
+ struct sk_buff *skb2;
+
+ skb2 = skb_clone(skb, GFP_ATOMIC);
+ if (skb2 != NULL) {
+ passedup = 1;
+ br_pass_frame_up(br, skb2);
+ }
+ }
+
+ if (dest[0] & 1) {
+ br_flood_forward(br, skb, !passedup);
+ if (!passedup)
+ br_pass_frame_up(br, skb);
+ goto out;
+ }
+
+ dst = br_fdb_get(br, dest);
+ if (dst != NULL && dst->is_local) {
+ if (!passedup)
+ br_pass_frame_up(br, skb);
+ else
+ kfree_skb(skb);
+ br_fdb_put(dst);
+ goto out;
+ }
+
+ if (dst != NULL) {
+ br_forward(dst->dst, skb);
+ br_fdb_put(dst);
+ goto out;
+ }
+
+ br_flood_forward(br, skb, 0);
+
+out:
+ read_unlock(&br->lock);
+ return 0;
+
+err:
+ read_unlock(&br->lock);
+err_nolock:
+ kfree_skb(skb);
+ return 0;
+}
+
+int br_handle_frame(struct sk_buff *skb)
+{
+ struct net_bridge *br;
+ unsigned char *dest;
+ struct net_bridge_port *p;
+
+ dest = skb->mac.ethernet->h_dest;
+
+ p = skb->dev->br_port;
+ if (p == NULL)
+ goto err_nolock;
+
+ br = p->br;
+ read_lock(&br->lock);
+ if (skb->dev->br_port == NULL)
+ goto err;
+
+ if (!(br->dev.flags & IFF_UP) ||
+ p->state == BR_STATE_DISABLED)
+ goto err;
+
+ if (skb->mac.ethernet->h_source[0] & 1)
+ goto err;
+
+ if (p->state == BR_STATE_LEARNING ||
+ p->state == BR_STATE_FORWARDING)
+ br_fdb_insert(br, p, skb->mac.ethernet->h_source, 0);
+
+ if (br->stp_enabled &&
+ !memcmp(dest, bridge_ula, 5) &&
+ !(dest[5] & 0xF0))
+ goto handle_special_frame;
+
+ if (p->state == BR_STATE_FORWARDING) {
+#if defined(CONFIG_BRIDGE_EBT_BROUTE) || \
+ defined(CONFIG_BRIDGE_EBT_BROUTE_MODULE)
+ if (broute_decision && broute_decision(NF_BR_BROUTING, &skb,
+ skb->dev, NULL, NULL) == NF_DROP)
+ return -1;
+#endif
+ NF_HOOK(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
+ br_handle_frame_finish);
+ read_unlock(&br->lock);
+ return 0;
+ }
+
+err:
+ read_unlock(&br->lock);
+err_nolock:
+ kfree_skb(skb);
+ return 0;
+
+handle_special_frame:
+ if (!dest[5]) {
+ br_stp_handle_bpdu(skb);
+ read_unlock(&br->lock);
+ return 0;
+ }
+
+ kfree_skb(skb);
+ read_unlock(&br->lock);
+ return 0;
+}
diff --git a/br-nf-bds/linux2.5/net/bridge/br_netfilter.c b/br-nf-bds/linux2.5/net/bridge/br_netfilter.c
new file mode 100644
index 0000000..929588e
--- /dev/null
+++ b/br-nf-bds/linux2.5/net/bridge/br_netfilter.c
@@ -0,0 +1,567 @@
+/*
+ * Handle firewalling
+ * Linux ethernet bridge
+ *
+ * Authors:
+ * Lennert Buytenhek <buytenh@gnu.org>
+ * Bart De Schuymer <bart.de.schuymer@pandora.be>
+ *
+ * $Id: br_netfilter.c,v 1.1 2002/08/24 09:28:29 bdschuym Exp $
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Lennert dedicates this file to Kerstin Wurdinger.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/ip.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/if_ether.h>
+#include <linux/netfilter_bridge.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/in_route.h>
+#include <net/ip.h>
+#include <net/tcp.h>
+#include <asm/uaccess.h>
+#include <asm/checksum.h>
+#include "br_private.h"
+
+
+#ifndef WE_REALLY_INSIST_ON_NOT_HAVING_NAT_SUPPORT
+/* As the original source/destination addresses are variables private to this
+ * file, we store them in unused space at the end of the control buffer.
+ * On 64-bit platforms the TCP control buffer size still leaves us 8 bytes
+ * of space at the end, so that fits. Usage of the original source address
+ * and the original destination address never overlaps (daddr is needed
+ * around PRE_ROUTING, and saddr around POST_ROUTING), so that's okay as
+ * well.
+ */
+#define skb_origaddr(skb) (*((u32 *)((skb)->cb + sizeof((skb)->cb) - 4)))
+
+#define store_orig_dstaddr(skb) (skb_origaddr(skb) = (skb)->nh.iph->daddr)
+#define store_orig_srcaddr(skb) (skb_origaddr(skb) = (skb)->nh.iph->saddr)
+#define dnat_took_place(skb) (skb_origaddr(skb) != (skb)->nh.iph->daddr)
+#define snat_took_place(skb) (skb_origaddr(skb) != (skb)->nh.iph->saddr)
+#else
+#define store_orig_dstaddr(skb)
+#define store_orig_srcaddr(skb)
+#define dnat_took_place(skb) (0)
+#define snat_took_place(skb) (0)
+#endif
+
+
+#define has_bridge_parent(device) ((device)->br_port != NULL)
+#define bridge_parent(device) (&((device)->br_port->br->dev))
+
+
+/* As opposed to the DNAT case, for the SNAT case it's not quite
+ * clear what we should do with ethernet addresses in NAT'ed
+ * packets. Use this heuristic for now.
+ */
+static inline void __maybe_fixup_src_address(struct sk_buff *skb)
+{
+ if (snat_took_place(skb) &&
+ inet_addr_type(skb->nh.iph->saddr) == RTN_LOCAL) {
+ memcpy(skb->mac.ethernet->h_source,
+ bridge_parent(skb->dev)->dev_addr,
+ ETH_ALEN);
+ }
+}
+
+
+/* We need these fake structures to make netfilter happy --
+ * lots of places assume that skb->dst != NULL, which isn't
+ * all that unreasonable.
+ *
+ * Currently, we fill in the PMTU entry because netfilter
+ * refragmentation needs it, and the rt_flags entry because
+ * ipt_REJECT needs it. Future netfilter modules might
+ * require us to fill additional fields.
+ */
+static struct net_device __fake_net_device = {
+ hard_header_len: ETH_HLEN
+};
+
+static struct rtable __fake_rtable = {
+ u: {
+ dst: {
+ __refcnt: ATOMIC_INIT(1),
+ dev: &__fake_net_device,
+ pmtu: 1500
+ }
+ },
+
+ rt_flags: 0
+};
+
+
+/* PF_BRIDGE/PRE_ROUTING *********************************************/
+static void __br_dnat_complain(void)
+{
+ static unsigned long last_complaint = 0;
+
+ if (jiffies - last_complaint >= 5 * HZ) {
+ printk(KERN_WARNING "Performing cross-bridge DNAT requires IP "
+ "forwarding to be enabled\n");
+ last_complaint = jiffies;
+ }
+}
+
+
+static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
+{
+ skb->dev = bridge_parent(skb->dev);
+ skb->dst->output(skb);
+ return 0;
+}
+
+/* This requires some explaining. If DNAT has taken place,
+ * we will need to fix up the destination ethernet address,
+ * and this is a tricky process.
+ *
+ * There are two cases to consider:
+ * 1. The packet was DNAT'ed to a device in the same bridge
+ * port group as it was received on. We can still bridge
+ * the packet.
+ * 2. The packet was DNAT'ed to a different device, either
+ * a non-bridged device or another bridge port group.
+ * The packet will need to be routed.
+ *
+ * The way to distinguish between the two is by calling ip_route_input()
+ * and looking at skb->dst->dev, which it changed to the destination device
+ * if ip_route_input() succeeds.
+ *
+ * Let us first consider ip_route_input() succeeds:
+ *
+ * If skb->dst->dev equals the logical bridge device the packet came in on,
+ * we can consider this bridging. We then call skb->dst->output() which will
+ * make the packet enter br_nf_local_out() not much later. In that function
+ * it is assured that the iptables FORWARD chain is traversed for the packet.
+ *
+ * Else, the packet is considered to be routed and we just change the
+ * destination MAC address so that the packet will later be passed up to the ip
+ * stack to be routed.
+ *
+ * Let us now consider ip_route_input() fails:
+ *
+ * After a "echo '0' > /proc/sys/net/ipv4/ip_forward" ip_route_input() will
+ * fail, while ip_route_output() will return success. The source address for
+ * for ip_route_output() is set to zero, so ip_route_output()
+ * thinks we're handling a locally generated packet and won't care if
+ * ip forwarding is allowed. We send a warning message to the users's log
+ * telling her to put ip forwarding on.
+ *
+ * ip_route_input() will also fail if there is no route available. Then we just
+ * drop the packet.
+ *
+ * The other special thing happening here is putting skb->physoutdev on
+ * &__fake_net_device (resp. NULL) for bridged (resp. routed) packets. This is
+ * needed so that br_nf_local_out() can know that it has to give the packets to
+ * the BR_NF_FORWARD (resp. BR_NF_LOCAL_OUT) bridge hook. See that function.
+ * --Lennert, 20020411
+ * --Bart, 20020416 (updated)
+ */
+
+static int br_nf_pre_routing_finish(struct sk_buff *skb)
+{
+ struct net_device *dev = skb->dev;
+ struct iphdr *iph = skb->nh.iph;
+
+ if (dnat_took_place(skb)) {
+ if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev)) {
+ struct rtable *rt;
+
+ if (!ip_route_output(&rt, iph->daddr, 0, iph->tos, 0)) {
+ // bridged dnated traffic isn't dependent on
+ // disabled ip_forwarding
+ if (((struct dst_entry *)rt)->dev == dev) {
+ skb->dst = (struct dst_entry *)rt;
+ goto bridged_dnat;
+ }
+ __br_dnat_complain();
+ dst_release((struct dst_entry *)rt);
+ }
+ kfree_skb(skb);
+ return 0;
+ } else {
+ if (skb->dst->dev == dev) {
+bridged_dnat:
+ // tell br_nf_local_out this is a bridged frame
+ skb->physoutdev = &__fake_net_device;
+ skb->dev = skb->physindev;
+ NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
+ br_nf_pre_routing_finish_bridge, 1);
+ return 0;
+ }
+ // tell br_nf_local_out this is a routed frame
+ skb->physoutdev = NULL;
+ memcpy(skb->mac.ethernet->h_dest, dev->dev_addr, ETH_ALEN);
+ }
+ } else {
+ skb->dst = (struct dst_entry *)&__fake_rtable;
+ dst_hold(skb->dst);
+ }
+
+ skb->dev = skb->physindev;
+ NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
+ br_handle_frame_finish, 1);
+
+ return 0;
+}
+
+/* Replicate the checks that IPv4 does on packet reception.
+ * Set skb->dev to the bridge device (i.e. parent of the
+ * receiving device) to make netfilter happy, the REDIRECT
+ * target in particular. Save the original destination IP
+ * address to be able to detect DNAT afterwards.
+ */
+static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *))
+{
+ struct iphdr *iph;
+ __u32 len;
+ struct sk_buff *skb;
+
+ if ((*pskb)->protocol != __constant_htons(ETH_P_IP))
+ return NF_ACCEPT;
+
+ if ((skb = skb_share_check(*pskb, GFP_ATOMIC)) == NULL)
+ goto out;
+
+ if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+ goto inhdr_error;
+
+ iph = skb->nh.iph;
+ if (iph->ihl < 5 || iph->version != 4)
+ goto inhdr_error;
+
+ if (!pskb_may_pull(skb, 4*iph->ihl))
+ goto inhdr_error;
+
+ iph = skb->nh.iph;
+ if (ip_fast_csum((__u8 *)iph, iph->ihl) != 0)
+ goto inhdr_error;
+
+ len = ntohs(iph->tot_len);
+ if (skb->len < len || len < 4*iph->ihl)
+ goto inhdr_error;
+
+ if (skb->len > len) {
+ __pskb_trim(skb, len);
+ if (skb->ip_summed == CHECKSUM_HW)
+ skb->ip_summed = CHECKSUM_NONE;
+ }
+
+ skb->physindev = skb->dev;
+ skb->dev = bridge_parent(skb->dev);
+ if (skb->pkt_type == PACKET_OTHERHOST)
+ skb->pkt_type = PACKET_HOST;
+ store_orig_dstaddr(skb);
+ NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL,
+ br_nf_pre_routing_finish);
+
+ return NF_STOLEN;
+
+inhdr_error:
+// IP_INC_STATS_BH(IpInHdrErrors);
+out:
+ return NF_DROP;
+}
+
+
+/* PF_BRIDGE/LOCAL_IN ************************************************/
+/* The packet is locally destined, which requires a real
+ * dst_entry, so detach the fake one. On the way up, the
+ * packet would pass through PRE_ROUTING again (which already
+ * took place when the packet entered the bridge), but we
+ * register an IPv4 PRE_ROUTING 'sabotage' hook that will
+ * prevent this from happening.
+ */
+static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *))
+{
+ struct sk_buff *skb = *pskb;
+
+ if (skb->protocol != __constant_htons(ETH_P_IP))
+ return NF_ACCEPT;
+
+ if (skb->dst == (struct dst_entry *)&__fake_rtable) {
+ dst_release(skb->dst);
+ skb->dst = NULL;
+ }
+
+ return NF_ACCEPT;
+}
+
+
+/* PF_BRIDGE/FORWARD *************************************************/
+static int br_nf_forward_finish(struct sk_buff *skb)
+{
+ NF_HOOK_THRESH(PF_BRIDGE, NF_BR_FORWARD, skb, skb->physindev,
+ skb->dev, br_forward_finish, 1);
+
+ return 0;
+}
+
+/* This is the 'purely bridged' case. We pass the packet to
+ * netfilter with indev and outdev set to the bridge device,
+ * but we are still able to filter on the 'real' indev/outdev
+ * because another bit of the bridge-nf patch overloads the
+ * '-i' and '-o' iptables interface checks to take
+ * skb->phys{in,out}dev into account as well (so both the real
+ * device and the bridge device will match).
+ */
+static unsigned int br_nf_forward(unsigned int hook, struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *))
+{
+ struct sk_buff *skb = *pskb;
+
+ // don't mess with non-ip frames, also don't mess with the ip-packets
+ // when br_nf_local_out_finish explicitly says so.
+ if (skb->protocol != __constant_htons(ETH_P_IP) || skb->physindev == NULL)
+ return NF_ACCEPT;
+
+ skb->physoutdev = skb->dev;
+ NF_HOOK(PF_INET, NF_IP_FORWARD, skb, bridge_parent(skb->physindev),
+ bridge_parent(skb->dev), br_nf_forward_finish);
+
+ return NF_STOLEN;
+}
+
+
+/* PF_BRIDGE/LOCAL_OUT ***********************************************/
+static int br_nf_local_out_finish_forward(struct sk_buff *skb)
+{
+ struct net_device *dev;
+
+ dev = skb->physindev;
+ // tell br_nf_forward to stay away
+ skb->physindev = NULL;
+ NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, dev, skb->dev,
+ br_forward_finish);
+
+ return 0;
+}
+
+static int br_nf_local_out_finish(struct sk_buff *skb)
+{
+ NF_HOOK_THRESH(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
+ br_forward_finish, INT_MIN + 1);
+
+ return 0;
+}
+
+
+/* This hook sees both locally originated IP packets and forwarded
+ * IP packets (in both cases the destination device is a bridge
+ * device). For the sake of interface transparency (i.e. properly
+ * overloading the '-o' option), we steal packets destined to
+ * a bridge device away from the IPv4 FORWARD and OUTPUT hooks,
+ * and reinject them later, when we have determined the real
+ * output device. This reinjecting happens here.
+ *
+ * If skb->physindev is NULL, the bridge-nf code never touched
+ * this packet before, and so the packet was locally originated.
+ * We call the IPv4 LOCAL_OUT hook.
+ *
+ * If skb->physindev isn't NULL, there are two cases:
+ * 1. The packet was IP routed.
+ * 2. The packet was cross-bridge DNAT'ed (see the comment near
+ * PF_BRIDGE/PRE_ROUTING).
+ * In both cases, we call the IPv4 FORWARD hook. In case 1,
+ * if the packet originally came from a bridge device, and in
+ * case 2, skb->physindev will have a bridge device as parent,
+ * so we use that parent device as indev. Otherwise, we just
+ * use physindev.
+ *
+ * If skb->physoutdev == NULL the bridge code never touched the
+ * packet or the packet was routed in br_nf_pre_routing_finish().
+ * We give the packet to the bridge NF_BR_LOCAL_OUT hook.
+ * If not, the packet is actually a bridged one so we give it to
+ * the NF_BR_FORWARD hook.
+ */
+
+static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*_okfn)(struct sk_buff *))
+{
+ int hookno, prio;
+ int (*okfn)(struct sk_buff *skb);
+ struct net_device *realindev;
+ struct sk_buff *skb = *pskb;
+
+ if (skb->protocol != __constant_htons(ETH_P_IP))
+ return NF_ACCEPT;
+
+ /* Sometimes we get packets with NULL ->dst here (for example,
+ * running a dhcp client daemon triggers this).
+ */
+ if (skb->dst == NULL)
+ return NF_ACCEPT;
+
+ // bridged, take forward
+ // (see big note in front of br_nf_pre_routing_finish)
+ if (skb->physoutdev == &__fake_net_device) {
+ okfn = br_nf_local_out_finish_forward;
+ } else if (skb->physoutdev == NULL) {
+ // non-bridged: routed or locally generated traffic, take local_out
+ // (see big note in front of br_nf_pre_routing_finish)
+ okfn = br_nf_local_out_finish;
+ } else {
+ printk("ARGH: bridge_or_routed hack doesn't work\n");
+ okfn = br_nf_local_out_finish;
+ }
+
+ skb->physoutdev = skb->dev;
+
+ hookno = NF_IP_LOCAL_OUT;
+ prio = NF_IP_PRI_BRIDGE_SABOTAGE;
+ if ((realindev = skb->physindev) != NULL) {
+ hookno = NF_IP_FORWARD;
+ // there is an iptables mangle table FORWARD chain with
+ // priority -150. This chain should see the physical out-dev.
+ prio = NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD;
+ if (has_bridge_parent(realindev))
+ realindev = bridge_parent(realindev);
+ }
+
+ NF_HOOK_THRESH(PF_INET, hookno, skb, realindev,
+ bridge_parent(skb->dev), okfn, prio + 1);
+
+ return NF_STOLEN;
+}
+
+
+/* PF_BRIDGE/POST_ROUTING ********************************************/
+static int br_nf_post_routing_finish(struct sk_buff *skb)
+{
+ __maybe_fixup_src_address(skb);
+ NF_HOOK_THRESH(PF_BRIDGE, NF_BR_POST_ROUTING, skb, NULL,
+ bridge_parent(skb->dev), br_dev_queue_push_xmit, 1);
+
+ return 0;
+}
+
+static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *))
+{
+ struct sk_buff *skb = *pskb;
+
+ /* Be very paranoid. */
+ if (skb->mac.raw < skb->head || skb->mac.raw + ETH_HLEN > skb->data) {
+ printk(KERN_CRIT "Argh!! Fuck me harder with a chainsaw. ");
+ if (skb->dev != NULL) {
+ printk("[%s]", skb->dev->name);
+ if (has_bridge_parent(skb->dev))
+ printk("[%s]", bridge_parent(skb->dev)->name);
+ }
+ printk("\n");
+ return NF_ACCEPT;
+ }
+
+ if (skb->protocol != __constant_htons(ETH_P_IP))
+ return NF_ACCEPT;
+
+ /* Sometimes we get packets with NULL ->dst here (for example,
+ * running a dhcp client daemon triggers this).
+ */
+ if (skb->dst == NULL)
+ return NF_ACCEPT;
+
+ store_orig_srcaddr(skb);
+ NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL,
+ bridge_parent(skb->dev), br_nf_post_routing_finish);
+
+ return NF_STOLEN;
+}
+
+
+/* IPv4/SABOTAGE *****************************************************/
+/* Don't hand locally destined packets to PF_INET/PRE_ROUTING
+ * for the second time. */
+static unsigned int ipv4_sabotage_in(unsigned int hook, struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *))
+{
+ if (in->hard_start_xmit == br_dev_xmit &&
+ okfn != br_nf_pre_routing_finish) {
+ okfn(*pskb);
+ return NF_STOLEN;
+ }
+
+ return NF_ACCEPT;
+}
+
+/* Postpone execution of PF_INET/FORWARD, PF_INET/LOCAL_OUT
+ * and PF_INET/POST_ROUTING until we have done the forwarding
+ * decision in the bridge code and have determined skb->physoutdev.
+ */
+static unsigned int ipv4_sabotage_out(unsigned int hook, struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *))
+{
+ if (out->hard_start_xmit == br_dev_xmit &&
+ okfn != br_nf_forward_finish &&
+ okfn != br_nf_local_out_finish &&
+ okfn != br_nf_post_routing_finish) {
+ struct sk_buff *skb = *pskb;
+
+ if (hook == NF_IP_FORWARD && skb->physindev == NULL)
+ skb->physindev = (struct net_device *)in;
+ okfn(skb);
+ return NF_STOLEN;
+ }
+
+ return NF_ACCEPT;
+}
+
+
+static struct nf_hook_ops br_nf_ops[] = {
+ { { NULL, NULL }, br_nf_pre_routing, PF_BRIDGE, NF_BR_PRE_ROUTING, 0 },
+ { { NULL, NULL }, br_nf_local_in, PF_BRIDGE, NF_BR_LOCAL_IN, 0 },
+ { { NULL, NULL }, br_nf_forward, PF_BRIDGE, NF_BR_FORWARD, 0 },
+ // we need INT_MIN, so innocent NF_BR_LOCAL_OUT functions don't
+ // get bridged traffic as input
+ { { NULL, NULL }, br_nf_local_out, PF_BRIDGE, NF_BR_LOCAL_OUT, INT_MIN },
+ { { NULL, NULL }, br_nf_post_routing, PF_BRIDGE, NF_BR_POST_ROUTING, 0 },
+
+ { { NULL, NULL }, ipv4_sabotage_in, PF_INET, NF_IP_PRE_ROUTING, NF_IP_PRI_FIRST },
+
+ { { NULL, NULL }, ipv4_sabotage_out, PF_INET, NF_IP_FORWARD, NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD },
+ { { NULL, NULL }, ipv4_sabotage_out, PF_INET, NF_IP_LOCAL_OUT, NF_IP_PRI_BRIDGE_SABOTAGE },
+ { { NULL, NULL }, ipv4_sabotage_out, PF_INET, NF_IP_POST_ROUTING, NF_IP_PRI_FIRST },
+};
+
+#define NUMHOOKS (sizeof(br_nf_ops)/sizeof(br_nf_ops[0]))
+
+
+int br_netfilter_init(void)
+{
+ int i;
+
+#ifndef WE_REALLY_INSIST_ON_NOT_HAVING_NAT_SUPPORT
+ if (sizeof(struct tcp_skb_cb) + 4 >= sizeof(((struct sk_buff *)NULL)->cb)) {
+ extern int __too_little_space_in_control_buffer(void);
+ __too_little_space_in_control_buffer();
+ }
+#endif
+
+ for (i=0;i<NUMHOOKS;i++) {
+ int ret;
+
+ if ((ret = nf_register_hook(&br_nf_ops[i])) >= 0)
+ continue;
+
+ while (i--)
+ nf_unregister_hook(&br_nf_ops[i]);
+
+ return ret;
+ }
+
+ printk(KERN_NOTICE "Bridge firewalling registered\n");
+
+ return 0;
+}
+
+void br_netfilter_fini(void)
+{
+ int i;
+
+ for (i=NUMHOOKS-1;i>=0;i--)
+ nf_unregister_hook(&br_nf_ops[i]);
+}
diff --git a/br-nf-bds/linux2.5/net/bridge/br_private.h b/br-nf-bds/linux2.5/net/bridge/br_private.h
new file mode 100644
index 0000000..f5713c3
--- /dev/null
+++ b/br-nf-bds/linux2.5/net/bridge/br_private.h
@@ -0,0 +1,212 @@
+/*
+ * Linux ethernet bridge
+ *
+ * Authors:
+ * Lennert Buytenhek <buytenh@gnu.org>
+ *
+ * $Id: br_private.h,v 1.1 2002/08/24 09:28:29 bdschuym Exp $
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _BR_PRIVATE_H
+#define _BR_PRIVATE_H
+
+#include <linux/netdevice.h>
+#include <linux/miscdevice.h>
+#include <linux/if_bridge.h>
+#include "br_private_timer.h"
+
+#define BR_HASH_BITS 8
+#define BR_HASH_SIZE (1 << BR_HASH_BITS)
+
+#define BR_HOLD_TIME (1*HZ)
+
+typedef struct bridge_id bridge_id;
+typedef struct mac_addr mac_addr;
+typedef __u16 port_id;
+
+struct bridge_id
+{
+ unsigned char prio[2];
+ unsigned char addr[6];
+};
+
+struct mac_addr
+{
+ unsigned char addr[6];
+ unsigned char pad[2];
+};
+
+struct net_bridge_fdb_entry
+{
+ struct net_bridge_fdb_entry *next_hash;
+ struct net_bridge_fdb_entry **pprev_hash;
+ atomic_t use_count;
+ mac_addr addr;
+ struct net_bridge_port *dst;
+ unsigned long ageing_timer;
+ unsigned is_local:1;
+ unsigned is_static:1;
+};
+
+struct net_bridge_port
+{
+ struct net_bridge_port *next;
+ struct net_bridge *br;
+ struct net_device *dev;
+ int port_no;
+
+ /* STP */
+ port_id port_id;
+ int state;
+ int path_cost;
+ bridge_id designated_root;
+ int designated_cost;
+ bridge_id designated_bridge;
+ port_id designated_port;
+ unsigned topology_change_ack:1;
+ unsigned config_pending:1;
+ int priority;
+
+ struct br_timer forward_delay_timer;
+ struct br_timer hold_timer;
+ struct br_timer message_age_timer;
+};
+
+struct net_bridge
+{
+ struct net_bridge *next;
+ rwlock_t lock;
+ struct net_bridge_port *port_list;
+ struct net_device dev;
+ struct net_device_stats statistics;
+ rwlock_t hash_lock;
+ struct net_bridge_fdb_entry *hash[BR_HASH_SIZE];
+ struct timer_list tick;
+
+ /* STP */
+ bridge_id designated_root;
+ int root_path_cost;
+ int root_port;
+ int max_age;
+ int hello_time;
+ int forward_delay;
+ bridge_id bridge_id;
+ int bridge_max_age;
+ int bridge_hello_time;
+ int bridge_forward_delay;
+ unsigned stp_enabled:1;
+ unsigned topology_change:1;
+ unsigned topology_change_detected:1;
+
+ struct br_timer hello_timer;
+ struct br_timer tcn_timer;
+ struct br_timer topology_change_timer;
+ struct br_timer gc_timer;
+
+ int ageing_time;
+ int gc_interval;
+};
+
+extern struct notifier_block br_device_notifier;
+extern unsigned char bridge_ula[6];
+
+/* br.c */
+extern void br_dec_use_count(void);
+extern void br_inc_use_count(void);
+
+/* br_device.c */
+extern int br_dev_xmit(struct sk_buff *skb, struct net_device *dev);
+extern void br_dev_setup(struct net_device *dev);
+extern int br_dev_xmit(struct sk_buff *skb, struct net_device *dev);
+
+/* br_fdb.c */
+extern void br_fdb_changeaddr(struct net_bridge_port *p,
+ unsigned char *newaddr);
+extern void br_fdb_cleanup(struct net_bridge *br);
+extern void br_fdb_delete_by_port(struct net_bridge *br,
+ struct net_bridge_port *p);
+extern struct net_bridge_fdb_entry *br_fdb_get(struct net_bridge *br,
+ unsigned char *addr);
+extern void br_fdb_put(struct net_bridge_fdb_entry *ent);
+extern int br_fdb_get_entries(struct net_bridge *br,
+ unsigned char *_buf,
+ int maxnum,
+ int offset);
+extern void br_fdb_insert(struct net_bridge *br,
+ struct net_bridge_port *source,
+ unsigned char *addr,
+ int is_local);
+
+/* br_forward.c */
+extern void br_deliver(struct net_bridge_port *to,
+ struct sk_buff *skb);
+extern int br_dev_queue_push_xmit(struct sk_buff *skb);
+extern void br_forward(struct net_bridge_port *to,
+ struct sk_buff *skb);
+extern int br_forward_finish(struct sk_buff *skb);
+extern void br_flood_deliver(struct net_bridge *br,
+ struct sk_buff *skb,
+ int clone);
+extern void br_flood_forward(struct net_bridge *br,
+ struct sk_buff *skb,
+ int clone);
+
+/* br_if.c */
+extern int br_add_bridge(char *name);
+extern int br_del_bridge(char *name);
+extern int br_add_if(struct net_bridge *br,
+ struct net_device *dev);
+extern int br_del_if(struct net_bridge *br,
+ struct net_device *dev);
+extern int br_get_bridge_ifindices(int *indices,
+ int num);
+extern void br_get_port_ifindices(struct net_bridge *br,
+ int *ifindices);
+
+/* br_input.c */
+extern int br_handle_frame_finish(struct sk_buff *skb);
+extern int br_handle_frame(struct sk_buff *skb);
+
+/* br_ioctl.c */
+extern void br_call_ioctl_atomic(void (*fn)(void));
+extern int br_ioctl(struct net_bridge *br,
+ unsigned int cmd,
+ unsigned long arg0,
+ unsigned long arg1,
+ unsigned long arg2);
+extern int br_ioctl_deviceless_stub(unsigned long arg);
+
+/* br_netfilter.c */
+extern int br_netfilter_init(void);
+extern void br_netfilter_fini(void);
+
+/* br_stp.c */
+extern int br_is_root_bridge(struct net_bridge *br);
+extern struct net_bridge_port *br_get_port(struct net_bridge *br,
+ int port_no);
+extern void br_init_port(struct net_bridge_port *p);
+extern port_id br_make_port_id(struct net_bridge_port *p);
+extern void br_become_designated_port(struct net_bridge_port *p);
+
+/* br_stp_if.c */
+extern void br_stp_enable_bridge(struct net_bridge *br);
+extern void br_stp_disable_bridge(struct net_bridge *br);
+extern void br_stp_enable_port(struct net_bridge_port *p);
+extern void br_stp_disable_port(struct net_bridge_port *p);
+extern void br_stp_recalculate_bridge_id(struct net_bridge *br);
+extern void br_stp_set_bridge_priority(struct net_bridge *br,
+ int newprio);
+extern void br_stp_set_port_priority(struct net_bridge_port *p,
+ int newprio);
+extern void br_stp_set_path_cost(struct net_bridge_port *p,
+ int path_cost);
+
+/* br_stp_bpdu.c */
+extern void br_stp_handle_bpdu(struct sk_buff *skb);
+
+#endif
diff --git a/br-nf-bds/linux2.5/net/core/netfilter.c b/br-nf-bds/linux2.5/net/core/netfilter.c
new file mode 100644
index 0000000..198dad6
--- /dev/null
+++ b/br-nf-bds/linux2.5/net/core/netfilter.c
@@ -0,0 +1,654 @@
+/* netfilter.c: look after the filters for various protocols.
+ * Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
+ *
+ * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
+ * way.
+ *
+ * Rusty Russell (C)2000 -- This code is GPL.
+ *
+ * February 2000: Modified by James Morris to have 1 queue per protocol.
+ * 15-Mar-2000: Added NF_REPEAT --RR.
+ */
+#include <linux/config.h>
+#include <linux/netfilter.h>
+#include <net/protocol.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/wait.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/if.h>
+#include <linux/netdevice.h>
+#include <linux/brlock.h>
+#include <linux/inetdevice.h>
+#include <net/sock.h>
+#include <net/route.h>
+#include <linux/ip.h>
+
+#define __KERNEL_SYSCALLS__
+#include <linux/unistd.h>
+
+/* In this code, we can be waiting indefinitely for userspace to
+ * service a packet if a hook returns NF_QUEUE. We could keep a count
+ * of skbuffs queued for userspace, and not deregister a hook unless
+ * this is zero, but that sucks. Now, we simply check when the
+ * packets come back: if the hook is gone, the packet is discarded. */
+#ifdef CONFIG_NETFILTER_DEBUG
+#define NFDEBUG(format, args...) printk(format , ## args)
+#else
+#define NFDEBUG(format, args...)
+#endif
+
+/* Sockopts only registered and called from user context, so
+ BR_NETPROTO_LOCK would be overkill. Also, [gs]etsockopt calls may
+ sleep. */
+static DECLARE_MUTEX(nf_sockopt_mutex);
+
+struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
+static LIST_HEAD(nf_sockopts);
+
+/*
+ * A queue handler may be registered for each protocol. Each is protected by
+ * long term mutex. The handler must provide an an outfn() to accept packets
+ * for queueing and must reinject all packets it receives, no matter what.
+ */
+static struct nf_queue_handler_t {
+ nf_queue_outfn_t outfn;
+ void *data;
+} queue_handler[NPROTO];
+
+int nf_register_hook(struct nf_hook_ops *reg)
+{
+ struct list_head *i;
+
+ br_write_lock_bh(BR_NETPROTO_LOCK);
+ for (i = nf_hooks[reg->pf][reg->hooknum].next;
+ i != &nf_hooks[reg->pf][reg->hooknum];
+ i = i->next) {
+ if (reg->priority < ((struct nf_hook_ops *)i)->priority)
+ break;
+ }
+ list_add(&reg->list, i->prev);
+ br_write_unlock_bh(BR_NETPROTO_LOCK);
+ return 0;
+}
+
+void nf_unregister_hook(struct nf_hook_ops *reg)
+{
+ br_write_lock_bh(BR_NETPROTO_LOCK);
+ list_del(&reg->list);
+ br_write_unlock_bh(BR_NETPROTO_LOCK);
+}
+
+/* Do exclusive ranges overlap? */
+static inline int overlap(int min1, int max1, int min2, int max2)
+{
+ return max1 > min2 && min1 < max2;
+}
+
+/* Functions to register sockopt ranges (exclusive). */
+int nf_register_sockopt(struct nf_sockopt_ops *reg)
+{
+ struct list_head *i;
+ int ret = 0;
+
+ if (down_interruptible(&nf_sockopt_mutex) != 0)
+ return -EINTR;
+
+ for (i = nf_sockopts.next; i != &nf_sockopts; i = i->next) {
+ struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i;
+ if (ops->pf == reg->pf
+ && (overlap(ops->set_optmin, ops->set_optmax,
+ reg->set_optmin, reg->set_optmax)
+ || overlap(ops->get_optmin, ops->get_optmax,
+ reg->get_optmin, reg->get_optmax))) {
+ NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
+ ops->set_optmin, ops->set_optmax,
+ ops->get_optmin, ops->get_optmax,
+ reg->set_optmin, reg->set_optmax,
+ reg->get_optmin, reg->get_optmax);
+ ret = -EBUSY;
+ goto out;
+ }
+ }
+
+ list_add(&reg->list, &nf_sockopts);
+out:
+ up(&nf_sockopt_mutex);
+ return ret;
+}
+
+void nf_unregister_sockopt(struct nf_sockopt_ops *reg)
+{
+ /* No point being interruptible: we're probably in cleanup_module() */
+ restart:
+ down(&nf_sockopt_mutex);
+ if (reg->use != 0) {
+ /* To be woken by nf_sockopt call... */
+ /* FIXME: Stuart Young's name appears gratuitously. */
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ reg->cleanup_task = current;
+ up(&nf_sockopt_mutex);
+ schedule();
+ goto restart;
+ }
+ list_del(&reg->list);
+ up(&nf_sockopt_mutex);
+}
+
+#ifdef CONFIG_NETFILTER_DEBUG
+#include <net/ip.h>
+#include <net/route.h>
+#include <net/tcp.h>
+#include <linux/netfilter_ipv4.h>
+
+static void debug_print_hooks_ip(unsigned int nf_debug)
+{
+ if (nf_debug & (1 << NF_IP_PRE_ROUTING)) {
+ printk("PRE_ROUTING ");
+ nf_debug ^= (1 << NF_IP_PRE_ROUTING);
+ }
+ if (nf_debug & (1 << NF_IP_LOCAL_IN)) {
+ printk("LOCAL_IN ");
+ nf_debug ^= (1 << NF_IP_LOCAL_IN);
+ }
+ if (nf_debug & (1 << NF_IP_FORWARD)) {
+ printk("FORWARD ");
+ nf_debug ^= (1 << NF_IP_FORWARD);
+ }
+ if (nf_debug & (1 << NF_IP_LOCAL_OUT)) {
+ printk("LOCAL_OUT ");
+ nf_debug ^= (1 << NF_IP_LOCAL_OUT);
+ }
+ if (nf_debug & (1 << NF_IP_POST_ROUTING)) {
+ printk("POST_ROUTING ");
+ nf_debug ^= (1 << NF_IP_POST_ROUTING);
+ }
+ if (nf_debug)
+ printk("Crap bits: 0x%04X", nf_debug);
+ printk("\n");
+}
+
+void nf_dump_skb(int pf, struct sk_buff *skb)
+{
+ printk("skb: pf=%i %s dev=%s len=%u\n",
+ pf,
+ skb->sk ? "(owned)" : "(unowned)",
+ skb->dev ? skb->dev->name : "(no dev)",
+ skb->len);
+ switch (pf) {
+ case PF_INET: {
+ const struct iphdr *ip = skb->nh.iph;
+ __u32 *opt = (__u32 *) (ip + 1);
+ int opti;
+ __u16 src_port = 0, dst_port = 0;
+
+ if (ip->protocol == IPPROTO_TCP
+ || ip->protocol == IPPROTO_UDP) {
+ struct tcphdr *tcp=(struct tcphdr *)((__u32 *)ip+ip->ihl);
+ src_port = ntohs(tcp->source);
+ dst_port = ntohs(tcp->dest);
+ }
+
+ printk("PROTO=%d %u.%u.%u.%u:%hu %u.%u.%u.%u:%hu"
+ " L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu",
+ ip->protocol, NIPQUAD(ip->saddr),
+ src_port, NIPQUAD(ip->daddr),
+ dst_port,
+ ntohs(ip->tot_len), ip->tos, ntohs(ip->id),
+ ntohs(ip->frag_off), ip->ttl);
+
+ for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++)
+ printk(" O=0x%8.8X", *opt++);
+ printk("\n");
+ }
+ }
+}
+
+void nf_debug_ip_local_deliver(struct sk_buff *skb)
+{
+ /* If it's a loopback packet, it must have come through
+ * NF_IP_LOCAL_OUT, NF_IP_RAW_INPUT, NF_IP_PRE_ROUTING and
+ * NF_IP_LOCAL_IN. Otherwise, must have gone through
+ * NF_IP_RAW_INPUT and NF_IP_PRE_ROUTING. */
+ if (!skb->dev) {
+ printk("ip_local_deliver: skb->dev is NULL.\n");
+ }
+ else if (strcmp(skb->dev->name, "lo") == 0) {
+ if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
+ | (1 << NF_IP_POST_ROUTING)
+ | (1 << NF_IP_PRE_ROUTING)
+ | (1 << NF_IP_LOCAL_IN))) {
+ printk("ip_local_deliver: bad loopback skb: ");
+ debug_print_hooks_ip(skb->nf_debug);
+ nf_dump_skb(PF_INET, skb);
+ }
+ }
+ else {
+ if (skb->nf_debug != ((1<<NF_IP_PRE_ROUTING)
+ | (1<<NF_IP_LOCAL_IN))) {
+ printk("ip_local_deliver: bad non-lo skb: ");
+ debug_print_hooks_ip(skb->nf_debug);
+ nf_dump_skb(PF_INET, skb);
+ }
+ }
+}
+
+void nf_debug_ip_loopback_xmit(struct sk_buff *newskb)
+{
+ if (newskb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
+ | (1 << NF_IP_POST_ROUTING))) {
+ printk("ip_dev_loopback_xmit: bad owned skb = %p: ",
+ newskb);
+ debug_print_hooks_ip(newskb->nf_debug);
+ nf_dump_skb(PF_INET, newskb);
+ }
+ /* Clear to avoid confusing input check */
+ newskb->nf_debug = 0;
+}
+
+void nf_debug_ip_finish_output2(struct sk_buff *skb)
+{
+ /* If it's owned, it must have gone through the
+ * NF_IP_LOCAL_OUT and NF_IP_POST_ROUTING.
+ * Otherwise, must have gone through
+ * NF_IP_PRE_ROUTING, NF_IP_FORWARD and NF_IP_POST_ROUTING.
+ */
+ if (skb->sk) {
+ if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
+ | (1 << NF_IP_POST_ROUTING))) {
+ printk("ip_finish_output: bad owned skb = %p: ", skb);
+ debug_print_hooks_ip(skb->nf_debug);
+ nf_dump_skb(PF_INET, skb);
+ }
+ } else {
+ if (skb->nf_debug != ((1 << NF_IP_PRE_ROUTING)
+ | (1 << NF_IP_FORWARD)
+ | (1 << NF_IP_POST_ROUTING))) {
+ /* Fragments, entunnelled packets, TCP RSTs
+ generated by ipt_REJECT will have no
+ owners, but still may be local */
+ if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
+ | (1 << NF_IP_POST_ROUTING))){
+ printk("ip_finish_output:"
+ " bad unowned skb = %p: ",skb);
+ debug_print_hooks_ip(skb->nf_debug);
+ nf_dump_skb(PF_INET, skb);
+ }
+ }
+ }
+}
+#endif /*CONFIG_NETFILTER_DEBUG*/
+
+/* Call get/setsockopt() */
+static int nf_sockopt(struct sock *sk, int pf, int val,
+ char *opt, int *len, int get)
+{
+ struct list_head *i;
+ struct nf_sockopt_ops *ops;
+ int ret;
+
+ if (down_interruptible(&nf_sockopt_mutex) != 0)
+ return -EINTR;
+
+ for (i = nf_sockopts.next; i != &nf_sockopts; i = i->next) {
+ ops = (struct nf_sockopt_ops *)i;
+ if (ops->pf == pf) {
+ if (get) {
+ if (val >= ops->get_optmin
+ && val < ops->get_optmax) {
+ ops->use++;
+ up(&nf_sockopt_mutex);
+ ret = ops->get(sk, val, opt, len);
+ goto out;
+ }
+ } else {
+ if (val >= ops->set_optmin
+ && val < ops->set_optmax) {
+ ops->use++;
+ up(&nf_sockopt_mutex);
+ ret = ops->set(sk, val, opt, *len);
+ goto out;
+ }
+ }
+ }
+ }
+ up(&nf_sockopt_mutex);
+ return -ENOPROTOOPT;
+
+ out:
+ down(&nf_sockopt_mutex);
+ ops->use--;
+ if (ops->cleanup_task)
+ wake_up_process(ops->cleanup_task);
+ up(&nf_sockopt_mutex);
+ return ret;
+}
+
+int nf_setsockopt(struct sock *sk, int pf, int val, char *opt,
+ int len)
+{
+ return nf_sockopt(sk, pf, val, opt, &len, 0);
+}
+
+int nf_getsockopt(struct sock *sk, int pf, int val, char *opt, int *len)
+{
+ return nf_sockopt(sk, pf, val, opt, len, 1);
+}
+
+static unsigned int nf_iterate(struct list_head *head,
+ struct sk_buff **skb,
+ int hook,
+ const struct net_device *indev,
+ const struct net_device *outdev,
+ struct list_head **i,
+ int (*okfn)(struct sk_buff *),
+ int hook_thresh)
+{
+ for (*i = (*i)->next; *i != head; *i = (*i)->next) {
+ struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;
+
+ if (hook_thresh > elem->priority)
+ continue;
+
+ switch (elem->hook(hook, skb, indev, outdev, okfn)) {
+ case NF_QUEUE:
+ return NF_QUEUE;
+
+ case NF_STOLEN:
+ return NF_STOLEN;
+
+ case NF_DROP:
+ return NF_DROP;
+
+ case NF_REPEAT:
+ *i = (*i)->prev;
+ break;
+
+#ifdef CONFIG_NETFILTER_DEBUG
+ case NF_ACCEPT:
+ break;
+
+ default:
+ NFDEBUG("Evil return from %p(%u).\n",
+ elem->hook, hook);
+#endif
+ }
+ }
+ return NF_ACCEPT;
+}
+
+int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data)
+{
+ int ret;
+
+ br_write_lock_bh(BR_NETPROTO_LOCK);
+ if (queue_handler[pf].outfn)
+ ret = -EBUSY;
+ else {
+ queue_handler[pf].outfn = outfn;
+ queue_handler[pf].data = data;
+ ret = 0;
+ }
+ br_write_unlock_bh(BR_NETPROTO_LOCK);
+
+ return ret;
+}
+
+/* The caller must flush their queue before this */
+int nf_unregister_queue_handler(int pf)
+{
+ br_write_lock_bh(BR_NETPROTO_LOCK);
+ queue_handler[pf].outfn = NULL;
+ queue_handler[pf].data = NULL;
+ br_write_unlock_bh(BR_NETPROTO_LOCK);
+ return 0;
+}
+
+/*
+ * Any packet that leaves via this function must come back
+ * through nf_reinject().
+ */
+static void nf_queue(struct sk_buff *skb,
+ struct list_head *elem,
+ int pf, unsigned int hook,
+ struct net_device *indev,
+ struct net_device *outdev,
+ int (*okfn)(struct sk_buff *))
+{
+ int status;
+ struct nf_info *info;
+ struct net_device *physindev;
+ struct net_device *physoutdev;
+
+ if (!queue_handler[pf].outfn) {
+ kfree_skb(skb);
+ return;
+ }
+
+ info = kmalloc(sizeof(*info), GFP_ATOMIC);
+ if (!info) {
+ if (net_ratelimit())
+ printk(KERN_ERR "OOM queueing packet %p\n",
+ skb);
+ kfree_skb(skb);
+ return;
+ }
+
+ *info = (struct nf_info) {
+ (struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn };
+
+ /* Bump dev refs so they don't vanish while packet is out */
+ if (indev) dev_hold(indev);
+ if (outdev) dev_hold(outdev);
+
+ if ((physindev = skb->physindev)) dev_hold(physindev);
+ if ((physoutdev = skb->physoutdev)) dev_hold(physoutdev);
+
+ status = queue_handler[pf].outfn(skb, info, queue_handler[pf].data);
+ if (status < 0) {
+ /* James M doesn't say fuck enough. */
+ if (indev) dev_put(indev);
+ if (outdev) dev_put(outdev);
+ if (physindev) dev_put(physindev);
+ if (physoutdev) dev_put(physoutdev);
+ kfree(info);
+ kfree_skb(skb);
+ return;
+ }
+}
+
+int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb,
+ struct net_device *indev,
+ struct net_device *outdev,
+ int (*okfn)(struct sk_buff *),
+ int hook_thresh)
+{
+ struct list_head *elem;
+ unsigned int verdict;
+ int ret = 0;
+
+ /* This stopgap cannot be removed until all the hooks are audited. */
+ if (skb_is_nonlinear(skb) && skb_linearize(skb, GFP_ATOMIC) != 0) {
+ kfree_skb(skb);
+ return -ENOMEM;
+ }
+ if (skb->ip_summed == CHECKSUM_HW) {
+ if (outdev == NULL) {
+ skb->ip_summed = CHECKSUM_NONE;
+ } else {
+ skb_checksum_help(skb);
+ }
+ }
+
+ /* We may already have this, but read-locks nest anyway */
+ br_read_lock_bh(BR_NETPROTO_LOCK);
+
+#ifdef CONFIG_NETFILTER_DEBUG
+ if (skb->nf_debug & (1 << hook)) {
+ printk("nf_hook: hook %i already set.\n", hook);
+ nf_dump_skb(pf, skb);
+ }
+ skb->nf_debug |= (1 << hook);
+#endif
+
+ elem = &nf_hooks[pf][hook];
+ verdict = nf_iterate(&nf_hooks[pf][hook], &skb, hook, indev,
+ outdev, &elem, okfn, hook_thresh);
+ if (verdict == NF_QUEUE) {
+ NFDEBUG("nf_hook: Verdict = QUEUE.\n");
+ nf_queue(skb, elem, pf, hook, indev, outdev, okfn);
+ }
+
+ switch (verdict) {
+ case NF_ACCEPT:
+ ret = okfn(skb);
+ break;
+
+ case NF_DROP:
+ kfree_skb(skb);
+ ret = -EPERM;
+ break;
+ }
+
+ br_read_unlock_bh(BR_NETPROTO_LOCK);
+ return ret;
+}
+
+void nf_reinject(struct sk_buff *skb, struct nf_info *info,
+ unsigned int verdict)
+{
+ struct list_head *elem = &info->elem->list;
+ struct list_head *i;
+
+ /* We don't have BR_NETPROTO_LOCK here */
+ br_read_lock_bh(BR_NETPROTO_LOCK);
+ for (i = nf_hooks[info->pf][info->hook].next; i != elem; i = i->next) {
+ if (i == &nf_hooks[info->pf][info->hook]) {
+ /* The module which sent it to userspace is gone. */
+ NFDEBUG("%s: module disappeared, dropping packet.\n",
+ __FUNCTION__);
+ verdict = NF_DROP;
+ break;
+ }
+ }
+
+ /* Continue traversal iff userspace said ok... */
+ if (verdict == NF_REPEAT) {
+ elem = elem->prev;
+ verdict = NF_ACCEPT;
+ }
+
+ if (verdict == NF_ACCEPT) {
+ verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
+ &skb, info->hook,
+ info->indev, info->outdev, &elem,
+ info->okfn, INT_MIN);
+ }
+
+ switch (verdict) {
+ case NF_ACCEPT:
+ info->okfn(skb);
+ break;
+
+ case NF_QUEUE:
+ nf_queue(skb, elem, info->pf, info->hook,
+ info->indev, info->outdev, info->okfn);
+ break;
+
+ case NF_DROP:
+ kfree_skb(skb);
+ break;
+ }
+ br_read_unlock_bh(BR_NETPROTO_LOCK);
+
+ /* Release those devices we held, or Alexey will kill me. */
+ if (info->indev) dev_put(info->indev);
+ if (info->outdev) dev_put(info->outdev);
+
+ kfree(info);
+ return;
+}
+
+#ifdef CONFIG_INET
+/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
+int ip_route_me_harder(struct sk_buff **pskb)
+{
+ struct iphdr *iph = (*pskb)->nh.iph;
+ struct rtable *rt;
+ struct rt_key key = { dst:iph->daddr,
+ src:iph->saddr,
+ oif:(*pskb)->sk ? (*pskb)->sk->bound_dev_if : 0,
+ tos:RT_TOS(iph->tos)|RTO_CONN,
+#ifdef CONFIG_IP_ROUTE_FWMARK
+ fwmark:(*pskb)->nfmark
+#endif
+ };
+ struct net_device *dev_src = NULL;
+ int err;
+
+ /* accomodate ip_route_output_slow(), which expects the key src to be
+ 0 or a local address; however some non-standard hacks like
+ ipt_REJECT.c:send_reset() can cause packets with foreign
+ saddr to be appear on the NF_IP_LOCAL_OUT hook -MB */
+ if(key.src && !(dev_src = ip_dev_find(key.src)))
+ key.src = 0;
+
+ if ((err=ip_route_output_key(&rt, &key)) != 0) {
+ printk("route_me_harder: ip_route_output_key(dst=%u.%u.%u.%u, src=%u.%u.%u.%u, oif=%d, tos=0x%x, fwmark=0x%lx) error %d\n",
+ NIPQUAD(iph->daddr), NIPQUAD(iph->saddr),
+ (*pskb)->sk ? (*pskb)->sk->bound_dev_if : 0,
+ RT_TOS(iph->tos)|RTO_CONN,
+#ifdef CONFIG_IP_ROUTE_FWMARK
+ (*pskb)->nfmark,
+#else
+ 0UL,
+#endif
+ err);
+ goto out;
+ }
+
+ /* Drop old route. */
+ dst_release((*pskb)->dst);
+
+ (*pskb)->dst = &rt->u.dst;
+
+ /* Change in oif may mean change in hh_len. */
+ if (skb_headroom(*pskb) < (*pskb)->dst->dev->hard_header_len) {
+ struct sk_buff *nskb;
+
+ nskb = skb_realloc_headroom(*pskb,
+ (*pskb)->dst->dev->hard_header_len);
+ if (!nskb) {
+ err = -ENOMEM;
+ goto out;
+ }
+ if ((*pskb)->sk)
+ skb_set_owner_w(nskb, (*pskb)->sk);
+ kfree_skb(*pskb);
+ *pskb = nskb;
+ }
+
+out:
+ if (dev_src)
+ dev_put(dev_src);
+
+ return err;
+}
+#endif /*CONFIG_INET*/
+
+/* This does not belong here, but ipt_REJECT needs it if connection
+ tracking in use: without this, connection may not be in hash table,
+ and hence manufactured ICMP or RST packets will not be associated
+ with it. */
+void (*ip_ct_attach)(struct sk_buff *, struct nf_ct_info *);
+
+void __init netfilter_init(void)
+{
+ int i, h;
+
+ for (i = 0; i < NPROTO; i++) {
+ for (h = 0; h < NF_MAX_HOOKS; h++)
+ INIT_LIST_HEAD(&nf_hooks[i][h]);
+ }
+}
diff --git a/br-nf-bds/linux2.5/net/core/skbuff.c b/br-nf-bds/linux2.5/net/core/skbuff.c
new file mode 100644
index 0000000..eecc16e
--- /dev/null
+++ b/br-nf-bds/linux2.5/net/core/skbuff.c
@@ -0,0 +1,1208 @@
+/*
+ * Routines having to do with the 'struct sk_buff' memory handlers.
+ *
+ * Authors: Alan Cox <iiitac@pyr.swan.ac.uk>
+ * Florian La Roche <rzsfl@rz.uni-sb.de>
+ *
+ * Version: $Id: skbuff.c,v 1.1 2002/08/24 09:27:04 bdschuym Exp $
+ *
+ * Fixes:
+ * Alan Cox : Fixed the worst of the load
+ * balancer bugs.
+ * Dave Platt : Interrupt stacking fix.
+ * Richard Kooijman : Timestamp fixes.
+ * Alan Cox : Changed buffer format.
+ * Alan Cox : destructor hook for AF_UNIX etc.
+ * Linus Torvalds : Better skb_clone.
+ * Alan Cox : Added skb_copy.
+ * Alan Cox : Added all the changed routines Linus
+ * only put in the headers
+ * Ray VanTassle : Fixed --skb->lock in free
+ * Alan Cox : skb_copy copy arp field
+ * Andi Kleen : slabified it.
+ *
+ * NOTE:
+ * The __skb_ routines should be called with interrupts
+ * disabled, or you better be *real* sure that the operation is atomic
+ * with respect to whatever list is being frobbed (e.g. via lock_sock()
+ * or via disabling bottom half handlers, etc).
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/*
+ * The functions in this file will not compile correctly with gcc 2.4.x
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/slab.h>
+#include <linux/netdevice.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/cache.h>
+#include <linux/rtnetlink.h>
+#include <linux/init.h>
+#include <linux/highmem.h>
+
+#include <net/protocol.h>
+#include <net/dst.h>
+#include <net/sock.h>
+#include <net/checksum.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+int sysctl_hot_list_len = 128;
+
+static kmem_cache_t *skbuff_head_cache;
+
+static union {
+ struct sk_buff_head list;
+ char pad[SMP_CACHE_BYTES];
+} skb_head_pool[NR_CPUS];
+
+/*
+ * Keep out-of-line to prevent kernel bloat.
+ * __builtin_return_address is not used because it is not always
+ * reliable.
+ */
+
+/**
+ * skb_over_panic - private function
+ * @skb: buffer
+ * @sz: size
+ * @here: address
+ *
+ * Out of line support code for skb_put(). Not user callable.
+ */
+void skb_over_panic(struct sk_buff *skb, int sz, void *here)
+{
+ printk(KERN_INFO "skput:over: %p:%d put:%d dev:%s",
+ here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
+ BUG();
+}
+
+/**
+ * skb_under_panic - private function
+ * @skb: buffer
+ * @sz: size
+ * @here: address
+ *
+ * Out of line support code for skb_push(). Not user callable.
+ */
+
+void skb_under_panic(struct sk_buff *skb, int sz, void *here)
+{
+ printk(KERN_INFO "skput:under: %p:%d put:%d dev:%s",
+ here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
+ BUG();
+}
+
+static __inline__ struct sk_buff *skb_head_from_pool(void)
+{
+ struct sk_buff_head *list;
+ struct sk_buff *skb = NULL;
+ unsigned long flags;
+
+ local_irq_save(flags);
+
+ list = &skb_head_pool[smp_processor_id()].list;
+
+ if (skb_queue_len(list))
+ skb = __skb_dequeue(list);
+
+ local_irq_restore(flags);
+ return skb;
+}
+
+static __inline__ void skb_head_to_pool(struct sk_buff *skb)
+{
+ struct sk_buff_head *list;
+ unsigned long flags;
+
+ local_irq_save(flags);
+
+ list = &skb_head_pool[smp_processor_id()].list;
+
+ if (skb_queue_len(list) < sysctl_hot_list_len) {
+ __skb_queue_head(list, skb);
+ local_irq_restore(flags);
+
+ return;
+ }
+
+ local_irq_restore(flags);
+ kmem_cache_free(skbuff_head_cache, skb);
+}
+
+
+/* Allocate a new skbuff. We do this ourselves so we can fill in a few
+ * 'private' fields and also do memory statistics to find all the
+ * [BEEP] leaks.
+ *
+ */
+
+/**
+ * alloc_skb - allocate a network buffer
+ * @size: size to allocate
+ * @gfp_mask: allocation mask
+ *
+ * Allocate a new &sk_buff. The returned buffer has no headroom and a
+ * tail room of size bytes. The object has a reference count of one.
+ * The return is the buffer. On a failure the return is %NULL.
+ *
+ * Buffers may only be allocated from interrupts using a @gfp_mask of
+ * %GFP_ATOMIC.
+ */
+struct sk_buff *alloc_skb(unsigned int size, int gfp_mask)
+{
+ struct sk_buff *skb;
+ u8 *data;
+
+ if (in_interrupt() && (gfp_mask & __GFP_WAIT)) {
+ static int count;
+ if (++count < 5) {
+ printk(KERN_ERR "alloc_skb called nonatomically "
+ "from interrupt %p\n", NET_CALLER(size));
+ BUG();
+ }
+ gfp_mask &= ~__GFP_WAIT;
+ }
+
+ /* Get the HEAD */
+ skb = skb_head_from_pool();
+ if (!skb) {
+ skb = kmem_cache_alloc(skbuff_head_cache,
+ gfp_mask & ~__GFP_DMA);
+ if (!skb)
+ goto out;
+ }
+
+ /* Get the DATA. Size must match skb_add_mtu(). */
+ size = SKB_DATA_ALIGN(size);
+ data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
+ if (!data)
+ goto nodata;
+
+ /* XXX: does not include slab overhead */
+ skb->truesize = size + sizeof(struct sk_buff);
+
+ /* Load the data pointers. */
+ skb->head = skb->data = skb->tail = data;
+ skb->end = data + size;
+
+ /* Set up other state */
+ skb->len = 0;
+ skb->cloned = 0;
+ skb->data_len = 0;
+
+ atomic_set(&skb->users, 1);
+ atomic_set(&(skb_shinfo(skb)->dataref), 1);
+ skb_shinfo(skb)->nr_frags = 0;
+ skb_shinfo(skb)->frag_list = NULL;
+out:
+ return skb;
+nodata:
+ skb_head_to_pool(skb);
+ skb = NULL;
+ goto out;
+}
+
+
+/*
+ * Slab constructor for a skb head.
+ */
+static inline void skb_headerinit(void *p, kmem_cache_t *cache,
+ unsigned long flags)
+{
+ struct sk_buff *skb = p;
+
+ skb->next = skb->prev = NULL;
+ skb->list = NULL;
+ skb->sk = NULL;
+ skb->stamp.tv_sec = 0; /* No idea about time */
+ skb->dev = NULL;
+ skb->physindev = NULL;
+ skb->physoutdev = NULL;
+ skb->dst = NULL;
+ memset(skb->cb, 0, sizeof(skb->cb));
+ skb->pkt_type = PACKET_HOST; /* Default type */
+ skb->ip_summed = 0;
+ skb->priority = 0;
+ skb->security = 0; /* By default packets are insecure */
+ skb->destructor = NULL;
+
+#ifdef CONFIG_NETFILTER
+ skb->nfmark = skb->nfcache = 0;
+ skb->nfct = NULL;
+#ifdef CONFIG_NETFILTER_DEBUG
+ skb->nf_debug = 0;
+#endif
+#endif
+#ifdef CONFIG_NET_SCHED
+ skb->tc_index = 0;
+#endif
+}
+
+static void skb_drop_fraglist(struct sk_buff *skb)
+{
+ struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+ skb_shinfo(skb)->frag_list = NULL;
+
+ do {
+ struct sk_buff *this = list;
+ list = list->next;
+ kfree_skb(this);
+ } while (list);
+}
+
+static void skb_clone_fraglist(struct sk_buff *skb)
+{
+ struct sk_buff *list;
+
+ for (list = skb_shinfo(skb)->frag_list; list; list = list->next)
+ skb_get(list);
+}
+
+static void skb_release_data(struct sk_buff *skb)
+{
+ if (!skb->cloned ||
+ atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) {
+ if (skb_shinfo(skb)->nr_frags) {
+ int i;
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+ put_page(skb_shinfo(skb)->frags[i].page);
+ }
+
+ if (skb_shinfo(skb)->frag_list)
+ skb_drop_fraglist(skb);
+
+ kfree(skb->head);
+ }
+}
+
+/*
+ * Free an skbuff by memory without cleaning the state.
+ */
+void kfree_skbmem(struct sk_buff *skb)
+{
+ skb_release_data(skb);
+ skb_head_to_pool(skb);
+}
+
+/**
+ * __kfree_skb - private function
+ * @skb: buffer
+ *
+ * Free an sk_buff. Release anything attached to the buffer.
+ * Clean the state. This is an internal helper function. Users should
+ * always call kfree_skb
+ */
+
+void __kfree_skb(struct sk_buff *skb)
+{
+ if (skb->list) {
+ printk(KERN_WARNING "Warning: kfree_skb passed an skb still "
+ "on a list (from %p).\n", NET_CALLER(skb));
+ BUG();
+ }
+
+ dst_release(skb->dst);
+ if(skb->destructor) {
+ if (in_irq())
+ printk(KERN_WARNING "Warning: kfree_skb on "
+ "hard IRQ %p\n", NET_CALLER(skb));
+ skb->destructor(skb);
+ }
+#ifdef CONFIG_NETFILTER
+ nf_conntrack_put(skb->nfct);
+#endif
+ skb_headerinit(skb, NULL, 0); /* clean state */
+ kfree_skbmem(skb);
+}
+
+/**
+ * skb_clone - duplicate an sk_buff
+ * @skb: buffer to clone
+ * @gfp_mask: allocation priority
+ *
+ * Duplicate an &sk_buff. The new one is not owned by a socket. Both
+ * copies share the same packet data but not structure. The new
+ * buffer has a reference count of 1. If the allocation fails the
+ * function returns %NULL otherwise the new buffer is returned.
+ *
+ * If this function is called from an interrupt gfp_mask() must be
+ * %GFP_ATOMIC.
+ */
+
+struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
+{
+ struct sk_buff *n = skb_head_from_pool();
+
+ if (!n) {
+ n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
+ if (!n)
+ return NULL;
+ }
+
+#define C(x) n->x = skb->x
+
+ n->next = n->prev = NULL;
+ n->list = NULL;
+ n->sk = NULL;
+ C(stamp);
+ C(dev);
+ C(physindev);
+ C(physoutdev);
+ C(physindev);
+ C(physoutdev);
+ C(h);
+ C(nh);
+ C(mac);
+ C(dst);
+ dst_clone(n->dst);
+ memcpy(n->cb, skb->cb, sizeof(skb->cb));
+ C(len);
+ C(data_len);
+ C(csum);
+ n->cloned = 1;
+ C(pkt_type);
+ C(ip_summed);
+ C(priority);
+ atomic_set(&n->users, 1);
+ C(protocol);
+ C(security);
+ C(truesize);
+ C(head);
+ C(data);
+ C(tail);
+ C(end);
+ n->destructor = NULL;
+#ifdef CONFIG_NETFILTER
+ C(nfmark);
+ C(nfcache);
+ C(nfct);
+#ifdef CONFIG_NETFILTER_DEBUG
+ C(nf_debug);
+#endif
+#endif /*CONFIG_NETFILTER*/
+#if defined(CONFIG_HIPPI)
+ C(private);
+#endif
+#ifdef CONFIG_NET_SCHED
+ C(tc_index);
+#endif
+
+ atomic_inc(&(skb_shinfo(skb)->dataref));
+ skb->cloned = 1;
+#ifdef CONFIG_NETFILTER
+ nf_conntrack_get(skb->nfct);
+#endif
+ return n;
+}
+
+static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
+{
+ /*
+ * Shift between the two data areas in bytes
+ */
+ unsigned long offset = new->data - old->data;
+
+ new->list = NULL;
+ new->sk = NULL;
+ new->dev = old->dev;
+ new->physindev = old->physindev;
+ new->physoutdev = old->physoutdev;
+ new->priority = old->priority;
+ new->protocol = old->protocol;
+ new->dst = dst_clone(old->dst);
+ new->h.raw = old->h.raw + offset;
+ new->nh.raw = old->nh.raw + offset;
+ new->mac.raw = old->mac.raw + offset;
+ memcpy(new->cb, old->cb, sizeof(old->cb));
+ atomic_set(&new->users, 1);
+ new->pkt_type = old->pkt_type;
+ new->stamp = old->stamp;
+ new->destructor = NULL;
+ new->security = old->security;
+#ifdef CONFIG_NETFILTER
+ new->nfmark = old->nfmark;
+ new->nfcache = old->nfcache;
+ new->nfct = old->nfct;
+ nf_conntrack_get(new->nfct);
+#ifdef CONFIG_NETFILTER_DEBUG
+ new->nf_debug = old->nf_debug;
+#endif
+#endif
+#ifdef CONFIG_NET_SCHED
+ new->tc_index = old->tc_index;
+#endif
+}
+
+/**
+ * skb_copy - create private copy of an sk_buff
+ * @skb: buffer to copy
+ * @gfp_mask: allocation priority
+ *
+ * Make a copy of both an &sk_buff and its data. This is used when the
+ * caller wishes to modify the data and needs a private copy of the
+ * data to alter. Returns %NULL on failure or the pointer to the buffer
+ * on success. The returned buffer has a reference count of 1.
+ *
+ * As by-product this function converts non-linear &sk_buff to linear
+ * one, so that &sk_buff becomes completely private and caller is allowed
+ * to modify all the data of returned buffer. This means that this
+ * function is not recommended for use in circumstances when only
+ * header is going to be modified. Use pskb_copy() instead.
+ */
+
+struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask)
+{
+ int headerlen = skb->data - skb->head;
+ /*
+ * Allocate the copy buffer
+ */
+ struct sk_buff *n = alloc_skb(skb->end - skb->head + skb->data_len,
+ gfp_mask);
+ if (!n)
+ return NULL;
+
+ /* Set the data pointer */
+ skb_reserve(n, headerlen);
+ /* Set the tail pointer and length */
+ skb_put(n, skb->len);
+ n->csum = skb->csum;
+ n->ip_summed = skb->ip_summed;
+
+ if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len))
+ BUG();
+
+ copy_skb_header(n, skb);
+ return n;
+}
+
+/* Keep head the same: replace data */
+int skb_linearize(struct sk_buff *skb, int gfp_mask)
+{
+ unsigned int size;
+ u8 *data;
+ long offset;
+ int headerlen = skb->data - skb->head;
+ int expand = (skb->tail + skb->data_len) - skb->end;
+
+ if (skb_shared(skb))
+ BUG();
+
+ if (expand <= 0)
+ expand = 0;
+
+ size = skb->end - skb->head + expand;
+ size = SKB_DATA_ALIGN(size);
+ data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
+ if (!data)
+ return -ENOMEM;
+
+ /* Copy entire thing */
+ if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len))
+ BUG();
+
+ /* Offset between the two in bytes */
+ offset = data - skb->head;
+
+ /* Free old data. */
+ skb_release_data(skb);
+
+ skb->head = data;
+ skb->end = data + size;
+
+ /* Set up new pointers */
+ skb->h.raw += offset;
+ skb->nh.raw += offset;
+ skb->mac.raw += offset;
+ skb->tail += offset;
+ skb->data += offset;
+
+ /* Set up shinfo */
+ atomic_set(&(skb_shinfo(skb)->dataref), 1);
+ skb_shinfo(skb)->nr_frags = 0;
+ skb_shinfo(skb)->frag_list = NULL;
+
+ /* We are no longer a clone, even if we were. */
+ skb->cloned = 0;
+
+ skb->tail += skb->data_len;
+ skb->data_len = 0;
+ return 0;
+}
+
+
+/**
+ * pskb_copy - create copy of an sk_buff with private head.
+ * @skb: buffer to copy
+ * @gfp_mask: allocation priority
+ *
+ * Make a copy of both an &sk_buff and part of its data, located
+ * in header. Fragmented data remain shared. This is used when
+ * the caller wishes to modify only header of &sk_buff and needs
+ * private copy of the header to alter. Returns %NULL on failure
+ * or the pointer to the buffer on success.
+ * The returned buffer has a reference count of 1.
+ */
+
+struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask)
+{
+ /*
+ * Allocate the copy buffer
+ */
+ struct sk_buff *n = alloc_skb(skb->end - skb->head, gfp_mask);
+
+ if (!n)
+ goto out;
+
+ /* Set the data pointer */
+ skb_reserve(n, skb->data - skb->head);
+ /* Set the tail pointer and length */
+ skb_put(n, skb_headlen(skb));
+ /* Copy the bytes */
+ memcpy(n->data, skb->data, n->len);
+ n->csum = skb->csum;
+ n->ip_summed = skb->ip_summed;
+
+ n->data_len = skb->data_len;
+ n->len = skb->len;
+
+ if (skb_shinfo(skb)->nr_frags) {
+ int i;
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
+ get_page(skb_shinfo(n)->frags[i].page);
+ }
+ skb_shinfo(n)->nr_frags = i;
+ }
+
+ if (skb_shinfo(skb)->frag_list) {
+ skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
+ skb_clone_fraglist(n);
+ }
+
+ copy_skb_header(n, skb);
+out:
+ return n;
+}
+
+/**
+ * pskb_expand_head - reallocate header of &sk_buff
+ * @skb: buffer to reallocate
+ * @nhead: room to add at head
+ * @ntail: room to add at tail
+ * @gfp_mask: allocation priority
+ *
+ * Expands (or creates identical copy, if &nhead and &ntail are zero)
+ * header of skb. &sk_buff itself is not changed. &sk_buff MUST have
+ * reference count of 1. Returns zero in the case of success or error,
+ * if expansion failed. In the last case, &sk_buff is not changed.
+ *
+ * All the pointers pointing into skb header may change and must be
+ * reloaded after call to this function.
+ */
+
+int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask)
+{
+ int i;
+ u8 *data;
+ int size = nhead + (skb->end - skb->head) + ntail;
+ long off;
+
+ if (skb_shared(skb))
+ BUG();
+
+ size = SKB_DATA_ALIGN(size);
+
+ data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
+ if (!data)
+ goto nodata;
+
+ /* Copy only real data... and, alas, header. This should be
+ * optimized for the cases when header is void. */
+ memcpy(data + nhead, skb->head, skb->tail - skb->head);
+ memcpy(data + size, skb->end, sizeof(struct skb_shared_info));
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+ get_page(skb_shinfo(skb)->frags[i].page);
+
+ if (skb_shinfo(skb)->frag_list)
+ skb_clone_fraglist(skb);
+
+ skb_release_data(skb);
+
+ off = (data + nhead) - skb->head;
+
+ skb->head = data;
+ skb->end = data + size;
+ skb->data += off;
+ skb->tail += off;
+ skb->mac.raw += off;
+ skb->h.raw += off;
+ skb->nh.raw += off;
+ skb->cloned = 0;
+ atomic_set(&skb_shinfo(skb)->dataref, 1);
+ return 0;
+
+nodata:
+ return -ENOMEM;
+}
+
+/* Make private copy of skb with writable head and some headroom */
+
+struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
+{
+ struct sk_buff *skb2;
+ int delta = headroom - skb_headroom(skb);
+
+ if (delta <= 0)
+ skb2 = pskb_copy(skb, GFP_ATOMIC);
+ else {
+ skb2 = skb_clone(skb, GFP_ATOMIC);
+ if (skb2 && pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0,
+ GFP_ATOMIC)) {
+ kfree_skb(skb2);
+ skb2 = NULL;
+ }
+ }
+ return skb2;
+}
+
+
+/**
+ * skb_copy_expand - copy and expand sk_buff
+ * @skb: buffer to copy
+ * @newheadroom: new free bytes at head
+ * @newtailroom: new free bytes at tail
+ * @gfp_mask: allocation priority
+ *
+ * Make a copy of both an &sk_buff and its data and while doing so
+ * allocate additional space.
+ *
+ * This is used when the caller wishes to modify the data and needs a
+ * private copy of the data to alter as well as more space for new fields.
+ * Returns %NULL on failure or the pointer to the buffer
+ * on success. The returned buffer has a reference count of 1.
+ *
+ * You must pass %GFP_ATOMIC as the allocation priority if this function
+ * is called from an interrupt.
+ */
+struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
+ int newheadroom, int newtailroom, int gfp_mask)
+{
+ /*
+ * Allocate the copy buffer
+ */
+ struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom,
+ gfp_mask);
+ if (!n)
+ return NULL;
+
+ skb_reserve(n, newheadroom);
+
+ /* Set the tail pointer and length */
+ skb_put(n, skb->len);
+
+ /* Copy the data only. */
+ if (skb_copy_bits(skb, 0, n->data, skb->len))
+ BUG();
+
+ copy_skb_header(n, skb);
+
+ return n;
+}
+
+/* Trims skb to length len. It can change skb pointers, if "realloc" is 1.
+ * If realloc==0 and trimming is impossible without change of data,
+ * it is BUG().
+ */
+
+int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc)
+{
+ int offset = skb_headlen(skb);
+ int nfrags = skb_shinfo(skb)->nr_frags;
+ int i;
+
+ for (i = 0; i < nfrags; i++) {
+ int end = offset + skb_shinfo(skb)->frags[i].size;
+ if (end > len) {
+ if (skb_cloned(skb)) {
+ if (!realloc)
+ BUG();
+ if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+ return -ENOMEM;
+ }
+ if (len <= offset) {
+ put_page(skb_shinfo(skb)->frags[i].page);
+ skb_shinfo(skb)->nr_frags--;
+ } else {
+ skb_shinfo(skb)->frags[i].size = len - offset;
+ }
+ }
+ offset = end;
+ }
+
+ if (offset < len) {
+ skb->data_len -= skb->len - len;
+ skb->len = len;
+ } else {
+ if (len <= skb_headlen(skb)) {
+ skb->len = len;
+ skb->data_len = 0;
+ skb->tail = skb->data + len;
+ if (skb_shinfo(skb)->frag_list && !skb_cloned(skb))
+ skb_drop_fraglist(skb);
+ } else {
+ skb->data_len -= skb->len - len;
+ skb->len = len;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * __pskb_pull_tail - advance tail of skb header
+ * @skb: buffer to reallocate
+ * @delta: number of bytes to advance tail
+ *
+ * The function makes a sense only on a fragmented &sk_buff,
+ * it expands header moving its tail forward and copying necessary
+ * data from fragmented part.
+ *
+ * &sk_buff MUST have reference count of 1.
+ *
+ * Returns %NULL (and &sk_buff does not change) if pull failed
+ * or value of new tail of skb in the case of success.
+ *
+ * All the pointers pointing into skb header may change and must be
+ * reloaded after call to this function.
+ */
+
+/* Moves tail of skb head forward, copying data from fragmented part,
+ * when it is necessary.
+ * 1. It may fail due to malloc failure.
+ * 2. It may change skb pointers.
+ *
+ * It is pretty complicated. Luckily, it is called only in exceptional cases.
+ */
+unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
+{
+ /* If skb has not enough free space at tail, get new one
+ * plus 128 bytes for future expansions. If we have enough
+ * room at tail, reallocate without expansion only if skb is cloned.
+ */
+ int i, k, eat = (skb->tail + delta) - skb->end;
+
+ if (eat > 0 || skb_cloned(skb)) {
+ if (pskb_expand_head(skb, 0, eat > 0 ? eat + 128 : 0,
+ GFP_ATOMIC))
+ return NULL;
+ }
+
+ if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta))
+ BUG();
+
+ /* Optimization: no fragments, no reasons to preestimate
+ * size of pulled pages. Superb.
+ */
+ if (!skb_shinfo(skb)->frag_list)
+ goto pull_pages;
+
+ /* Estimate size of pulled pages. */
+ eat = delta;
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ if (skb_shinfo(skb)->frags[i].size >= eat)
+ goto pull_pages;
+ eat -= skb_shinfo(skb)->frags[i].size;
+ }
+
+ /* If we need update frag list, we are in troubles.
+ * Certainly, it possible to add an offset to skb data,
+ * but taking into account that pulling is expected to
+ * be very rare operation, it is worth to fight against
+ * further bloating skb head and crucify ourselves here instead.
+ * Pure masohism, indeed. 8)8)
+ */
+ if (eat) {
+ struct sk_buff *list = skb_shinfo(skb)->frag_list;
+ struct sk_buff *clone = NULL;
+ struct sk_buff *insp = NULL;
+
+ do {
+ if (!list)
+ BUG();
+
+ if (list->len <= eat) {
+ /* Eaten as whole. */
+ eat -= list->len;
+ list = list->next;
+ insp = list;
+ } else {
+ /* Eaten partially. */
+
+ if (skb_shared(list)) {
+ /* Sucks! We need to fork list. :-( */
+ clone = skb_clone(list, GFP_ATOMIC);
+ if (!clone)
+ return NULL;
+ insp = list->next;
+ list = clone;
+ } else {
+ /* This may be pulled without
+ * problems. */
+ insp = list;
+ }
+ if (!pskb_pull(list, eat)) {
+ if (clone)
+ kfree_skb(clone);
+ return NULL;
+ }
+ break;
+ }
+ } while (eat);
+
+ /* Free pulled out fragments. */
+ while ((list = skb_shinfo(skb)->frag_list) != insp) {
+ skb_shinfo(skb)->frag_list = list->next;
+ kfree_skb(list);
+ }
+ /* And insert new clone at head. */
+ if (clone) {
+ clone->next = list;
+ skb_shinfo(skb)->frag_list = clone;
+ }
+ }
+ /* Success! Now we may commit changes to skb data. */
+
+pull_pages:
+ eat = delta;
+ k = 0;
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ if (skb_shinfo(skb)->frags[i].size <= eat) {
+ put_page(skb_shinfo(skb)->frags[i].page);
+ eat -= skb_shinfo(skb)->frags[i].size;
+ } else {
+ skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
+ if (eat) {
+ skb_shinfo(skb)->frags[k].page_offset += eat;
+ skb_shinfo(skb)->frags[k].size -= eat;
+ eat = 0;
+ }
+ k++;
+ }
+ }
+ skb_shinfo(skb)->nr_frags = k;
+
+ skb->tail += delta;
+ skb->data_len -= delta;
+
+ return skb->tail;
+}
+
+/* Copy some data bits from skb to kernel buffer. */
+
+int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
+{
+ int i, copy;
+ int start = skb->len - skb->data_len;
+
+ if (offset > (int)skb->len - len)
+ goto fault;
+
+ /* Copy header. */
+ if ((copy = start - offset) > 0) {
+ if (copy > len)
+ copy = len;
+ memcpy(to, skb->data + offset, copy);
+ if ((len -= copy) == 0)
+ return 0;
+ offset += copy;
+ to += copy;
+ }
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ int end;
+
+ BUG_TRAP(start <= offset + len);
+
+ end = start + skb_shinfo(skb)->frags[i].size;
+ if ((copy = end - offset) > 0) {
+ u8 *vaddr;
+
+ if (copy > len)
+ copy = len;
+
+ vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
+ memcpy(to,
+ vaddr + skb_shinfo(skb)->frags[i].page_offset+
+ offset - start, copy);
+ kunmap_skb_frag(vaddr);
+
+ if ((len -= copy) == 0)
+ return 0;
+ offset += copy;
+ to += copy;
+ }
+ start = end;
+ }
+
+ if (skb_shinfo(skb)->frag_list) {
+ struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+ for (; list; list = list->next) {
+ int end;
+
+ BUG_TRAP(start <= offset + len);
+
+ end = start + list->len;
+ if ((copy = end - offset) > 0) {
+ if (copy > len)
+ copy = len;
+ if (skb_copy_bits(list, offset - start,
+ to, copy))
+ goto fault;
+ if ((len -= copy) == 0)
+ return 0;
+ offset += copy;
+ to += copy;
+ }
+ start = end;
+ }
+ }
+ if (!len)
+ return 0;
+
+fault:
+ return -EFAULT;
+}
+
+/* Checksum skb data. */
+
+unsigned int skb_checksum(const struct sk_buff *skb, int offset,
+ int len, unsigned int csum)
+{
+ int start = skb->len - skb->data_len;
+ int i, copy = start - offset;
+ int pos = 0;
+
+ /* Checksum header. */
+ if (copy > 0) {
+ if (copy > len)
+ copy = len;
+ csum = csum_partial(skb->data + offset, copy, csum);
+ if ((len -= copy) == 0)
+ return csum;
+ offset += copy;
+ pos = copy;
+ }
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ int end;
+
+ BUG_TRAP(start <= offset + len);
+
+ end = start + skb_shinfo(skb)->frags[i].size;
+ if ((copy = end - offset) > 0) {
+ unsigned int csum2;
+ u8 *vaddr;
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+ if (copy > len)
+ copy = len;
+ vaddr = kmap_skb_frag(frag);
+ csum2 = csum_partial(vaddr + frag->page_offset +
+ offset - start, copy, 0);
+ kunmap_skb_frag(vaddr);
+ csum = csum_block_add(csum, csum2, pos);
+ if (!(len -= copy))
+ return csum;
+ offset += copy;
+ pos += copy;
+ }
+ start = end;
+ }
+
+ if (skb_shinfo(skb)->frag_list) {
+ struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+ for (; list; list = list->next) {
+ int end;
+
+ BUG_TRAP(start <= offset + len);
+
+ end = start + list->len;
+ if ((copy = end - offset) > 0) {
+ unsigned int csum2;
+ if (copy > len)
+ copy = len;
+ csum2 = skb_checksum(list, offset - start,
+ copy, 0);
+ csum = csum_block_add(csum, csum2, pos);
+ if ((len -= copy) == 0)
+ return csum;
+ offset += copy;
+ pos += copy;
+ }
+ start = end;
+ }
+ }
+ if (len)
+ BUG();
+
+ return csum;
+}
+
+/* Both of above in one bottle. */
+
+unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
+ u8 *to, int len, unsigned int csum)
+{
+ int start = skb->len - skb->data_len;
+ int i, copy = start - offset;
+ int pos = 0;
+
+ /* Copy header. */
+ if (copy > 0) {
+ if (copy > len)
+ copy = len;
+ csum = csum_partial_copy_nocheck(skb->data + offset, to,
+ copy, csum);
+ if ((len -= copy) == 0)
+ return csum;
+ offset += copy;
+ to += copy;
+ pos = copy;
+ }
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ int end;
+
+ BUG_TRAP(start <= offset + len);
+
+ end = start + skb_shinfo(skb)->frags[i].size;
+ if ((copy = end - offset) > 0) {
+ unsigned int csum2;
+ u8 *vaddr;
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+ if (copy > len)
+ copy = len;
+ vaddr = kmap_skb_frag(frag);
+ csum2 = csum_partial_copy_nocheck(vaddr +
+ frag->page_offset +
+ offset - start, to,
+ copy, 0);
+ kunmap_skb_frag(vaddr);
+ csum = csum_block_add(csum, csum2, pos);
+ if (!(len -= copy))
+ return csum;
+ offset += copy;
+ to += copy;
+ pos += copy;
+ }
+ start = end;
+ }
+
+ if (skb_shinfo(skb)->frag_list) {
+ struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+ for (; list; list = list->next) {
+ unsigned int csum2;
+ int end;
+
+ BUG_TRAP(start <= offset + len);
+
+ end = start + list->len;
+ if ((copy = end - offset) > 0) {
+ if (copy > len)
+ copy = len;
+ csum2 = skb_copy_and_csum_bits(list,
+ offset - start,
+ to, copy, 0);
+ csum = csum_block_add(csum, csum2, pos);
+ if ((len -= copy) == 0)
+ return csum;
+ offset += copy;
+ to += copy;
+ pos += copy;
+ }
+ start = end;
+ }
+ }
+ if (len)
+ BUG();
+ return csum;
+}
+
+void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
+{
+ unsigned int csum;
+ long csstart;
+
+ if (skb->ip_summed == CHECKSUM_HW)
+ csstart = skb->h.raw - skb->data;
+ else
+ csstart = skb->len - skb->data_len;
+
+ if (csstart > skb->len - skb->data_len)
+ BUG();
+
+ memcpy(to, skb->data, csstart);
+
+ csum = 0;
+ if (csstart != skb->len)
+ csum = skb_copy_and_csum_bits(skb, csstart, to + csstart,
+ skb->len - csstart, 0);
+
+ if (skb->ip_summed == CHECKSUM_HW) {
+ long csstuff = csstart + skb->csum;
+
+ *((unsigned short *)(to + csstuff)) = csum_fold(csum);
+ }
+}
+
+#if 0
+/*
+ * Tune the memory allocator for a new MTU size.
+ */
+void skb_add_mtu(int mtu)
+{
+ /* Must match allocation in alloc_skb */
+ mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info);
+
+ kmem_add_cache_size(mtu);
+}
+#endif
+
+void __init skb_init(void)
+{
+ int i;
+
+ skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
+ sizeof(struct sk_buff),
+ 0,
+ SLAB_HWCACHE_ALIGN,
+ skb_headerinit, NULL);
+ if (!skbuff_head_cache)
+ panic("cannot create skbuff cache");
+
+ for (i = 0; i < NR_CPUS; i++)
+ skb_queue_head_init(&skb_head_pool[i].list);
+}
diff --git a/br-nf-bds/linux2.5/net/ipv4/ip_output.c b/br-nf-bds/linux2.5/net/ipv4/ip_output.c
new file mode 100644
index 0000000..130d631
--- /dev/null
+++ b/br-nf-bds/linux2.5/net/ipv4/ip_output.c
@@ -0,0 +1,1033 @@
+/*
+ * INET An implementation of the TCP/IP protocol suite for the LINUX
+ * operating system. INET is implemented using the BSD Socket
+ * interface as the means of communication with the user level.
+ *
+ * The Internet Protocol (IP) output module.
+ *
+ * Version: $Id: ip_output.c,v 1.1 2002/08/24 09:25:29 bdschuym Exp $
+ *
+ * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
+ * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ * Donald Becker, <becker@super.org>
+ * Alan Cox, <Alan.Cox@linux.org>
+ * Richard Underwood
+ * Stefan Becker, <stefanb@yello.ping.de>
+ * Jorge Cwik, <jorge@laser.satlink.net>
+ * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ *
+ * See ip_input.c for original log
+ *
+ * Fixes:
+ * Alan Cox : Missing nonblock feature in ip_build_xmit.
+ * Mike Kilburn : htons() missing in ip_build_xmit.
+ * Bradford Johnson: Fix faulty handling of some frames when
+ * no route is found.
+ * Alexander Demenshin: Missing sk/skb free in ip_queue_xmit
+ * (in case if packet not accepted by
+ * output firewall rules)
+ * Mike McLagan : Routing by source
+ * Alexey Kuznetsov: use new route cache
+ * Andi Kleen: Fix broken PMTU recovery and remove
+ * some redundant tests.
+ * Vitaly E. Lavrov : Transparent proxy revived after year coma.
+ * Andi Kleen : Replace ip_reply with ip_send_reply.
+ * Andi Kleen : Split fast and slow ip_build_xmit path
+ * for decreased register pressure on x86
+ * and more readibility.
+ * Marc Boucher : When call_out_firewall returns FW_QUEUE,
+ * silently drop skb instead of failing with -EPERM.
+ * Detlev Wengorz : Copy protocol for fragments.
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/config.h>
+
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/stat.h>
+#include <linux/init.h>
+
+#include <net/snmp.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/route.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/arp.h>
+#include <net/icmp.h>
+#include <net/raw.h>
+#include <net/checksum.h>
+#include <net/inetpeer.h>
+#include <linux/igmp.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/mroute.h>
+#include <linux/netlink.h>
+
+/*
+ * Shall we try to damage output packets if routing dev changes?
+ */
+
+int sysctl_ip_dynaddr = 0;
+int sysctl_ip_default_ttl = IPDEFTTL;
+
+/* Generate a checksum for an outgoing IP datagram. */
+__inline__ void ip_send_check(struct iphdr *iph)
+{
+ iph->check = 0;
+ iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
+}
+
+/* dev_loopback_xmit for use with netfilter. */
+static int ip_dev_loopback_xmit(struct sk_buff *newskb)
+{
+ newskb->mac.raw = newskb->data;
+ __skb_pull(newskb, newskb->nh.raw - newskb->data);
+ newskb->pkt_type = PACKET_LOOPBACK;
+ newskb->ip_summed = CHECKSUM_UNNECESSARY;
+ BUG_TRAP(newskb->dst);
+
+#ifdef CONFIG_NETFILTER_DEBUG
+ nf_debug_ip_loopback_xmit(newskb);
+#endif
+ netif_rx(newskb);
+ return 0;
+}
+
+/* Don't just hand NF_HOOK skb->dst->output, in case netfilter hook
+ changes route */
+static inline int
+output_maybe_reroute(struct sk_buff *skb)
+{
+ return skb->dst->output(skb);
+}
+
+/*
+ * Add an ip header to a skbuff and send it out.
+ */
+int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
+ u32 saddr, u32 daddr, struct ip_options *opt)
+{
+ struct inet_opt *inet = inet_sk(sk);
+ struct rtable *rt = (struct rtable *)skb->dst;
+ struct iphdr *iph;
+
+ /* Build the IP header. */
+ if (opt)
+ iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr) + opt->optlen);
+ else
+ iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr));
+
+ iph->version = 4;
+ iph->ihl = 5;
+ iph->tos = inet->tos;
+ if (ip_dont_fragment(sk, &rt->u.dst))
+ iph->frag_off = __constant_htons(IP_DF);
+ else
+ iph->frag_off = 0;
+ iph->ttl = inet->ttl;
+ iph->daddr = rt->rt_dst;
+ iph->saddr = rt->rt_src;
+ iph->protocol = sk->protocol;
+ iph->tot_len = htons(skb->len);
+ ip_select_ident(iph, &rt->u.dst, sk);
+ skb->nh.iph = iph;
+
+ if (opt && opt->optlen) {
+ iph->ihl += opt->optlen>>2;
+ ip_options_build(skb, opt, daddr, rt, 0);
+ }
+ ip_send_check(iph);
+
+ /* Send it out. */
+ return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
+ output_maybe_reroute);
+}
+
+static inline int ip_finish_output2(struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb->dst;
+ struct hh_cache *hh = dst->hh;
+
+#ifdef CONFIG_NETFILTER_DEBUG
+ nf_debug_ip_finish_output2(skb);
+#endif /*CONFIG_NETFILTER_DEBUG*/
+
+ if (hh) {
+ read_lock_bh(&hh->hh_lock);
+ memcpy(skb->data - 16, hh->hh_data, 16);
+ read_unlock_bh(&hh->hh_lock);
+ skb_push(skb, hh->hh_len);
+ return hh->hh_output(skb);
+ } else if (dst->neighbour)
+ return dst->neighbour->output(skb);
+
+ if (net_ratelimit())
+ printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n");
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
+__inline__ int ip_finish_output(struct sk_buff *skb)
+{
+ struct net_device *dev = skb->dst->dev;
+
+ skb->dev = dev;
+ skb->protocol = __constant_htons(ETH_P_IP);
+
+ return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev,
+ ip_finish_output2);
+}
+
+int ip_mc_output(struct sk_buff *skb)
+{
+ struct sock *sk = skb->sk;
+ struct rtable *rt = (struct rtable*)skb->dst;
+ struct net_device *dev = rt->u.dst.dev;
+
+ /*
+ * If the indicated interface is up and running, send the packet.
+ */
+ IP_INC_STATS(IpOutRequests);
+#ifdef CONFIG_IP_ROUTE_NAT
+ if (rt->rt_flags & RTCF_NAT)
+ ip_do_nat(skb);
+#endif
+
+ skb->dev = dev;
+ skb->protocol = __constant_htons(ETH_P_IP);
+
+ /*
+ * Multicasts are looped back for other local users
+ */
+
+ if (rt->rt_flags&RTCF_MULTICAST) {
+ if ((!sk || inet_sk(sk)->mc_loop)
+#ifdef CONFIG_IP_MROUTE
+ /* Small optimization: do not loopback not local frames,
+ which returned after forwarding; they will be dropped
+ by ip_mr_input in any case.
+ Note, that local frames are looped back to be delivered
+ to local recipients.
+
+ This check is duplicated in ip_mr_input at the moment.
+ */
+ && ((rt->rt_flags&RTCF_LOCAL) || !(IPCB(skb)->flags&IPSKB_FORWARDED))
+#endif
+ ) {
+ struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
+ if (newskb)
+ NF_HOOK(PF_INET, NF_IP_POST_ROUTING, newskb, NULL,
+ newskb->dev,
+ ip_dev_loopback_xmit);
+ }
+
+ /* Multicasts with ttl 0 must not go beyond the host */
+
+ if (skb->nh.iph->ttl == 0) {
+ kfree_skb(skb);
+ return 0;
+ }
+ }
+
+ if (rt->rt_flags&RTCF_BROADCAST) {
+ struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
+ if (newskb)
+ NF_HOOK(PF_INET, NF_IP_POST_ROUTING, newskb, NULL,
+ newskb->dev, ip_dev_loopback_xmit);
+ }
+
+ return ip_finish_output(skb);
+}
+
+int ip_output(struct sk_buff *skb)
+{
+#ifdef CONFIG_IP_ROUTE_NAT
+ struct rtable *rt = (struct rtable*)skb->dst;
+#endif
+
+ IP_INC_STATS(IpOutRequests);
+
+#ifdef CONFIG_IP_ROUTE_NAT
+ if (rt->rt_flags&RTCF_NAT)
+ ip_do_nat(skb);
+#endif
+
+ return ip_finish_output(skb);
+}
+
+/* Queues a packet to be sent, and starts the transmitter if necessary.
+ * This routine also needs to put in the total length and compute the
+ * checksum. We use to do this in two stages, ip_build_header() then
+ * this, but that scheme created a mess when routes disappeared etc.
+ * So we do it all here, and the TCP send engine has been changed to
+ * match. (No more unroutable FIN disasters, etc. wheee...) This will
+ * most likely make other reliable transport layers above IP easier
+ * to implement under Linux.
+ */
+static inline int ip_queue_xmit2(struct sk_buff *skb)
+{
+ struct sock *sk = skb->sk;
+ struct rtable *rt = (struct rtable *)skb->dst;
+ struct net_device *dev;
+ struct iphdr *iph = skb->nh.iph;
+
+ dev = rt->u.dst.dev;
+
+ /* This can happen when the transport layer has segments queued
+ * with a cached route, and by the time we get here things are
+ * re-routed to a device with a different MTU than the original
+ * device. Sick, but we must cover it.
+ */
+ if (skb_headroom(skb) < dev->hard_header_len && dev->hard_header) {
+ struct sk_buff *skb2;
+
+ skb2 = skb_realloc_headroom(skb, (dev->hard_header_len + 15) & ~15);
+ kfree_skb(skb);
+ if (skb2 == NULL)
+ return -ENOMEM;
+ if (sk)
+ skb_set_owner_w(skb2, sk);
+ skb = skb2;
+ iph = skb->nh.iph;
+ }
+
+ if (skb->len > rt->u.dst.pmtu)
+ goto fragment;
+
+ ip_select_ident(iph, &rt->u.dst, sk);
+
+ /* Add an IP checksum. */
+ ip_send_check(iph);
+
+ skb->priority = sk->priority;
+ return skb->dst->output(skb);
+
+fragment:
+ if (ip_dont_fragment(sk, &rt->u.dst)) {
+ /* Reject packet ONLY if TCP might fragment
+ * it itself, if were careful enough.
+ */
+ NETDEBUG(printk(KERN_DEBUG "sending pkt_too_big (len[%u] pmtu[%u]) to self\n",
+ skb->len, rt->u.dst.pmtu));
+
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+ htonl(rt->u.dst.pmtu));
+ kfree_skb(skb);
+ return -EMSGSIZE;
+ }
+ ip_select_ident(iph, &rt->u.dst, sk);
+ if (skb->ip_summed == CHECKSUM_HW &&
+ (skb = skb_checksum_help(skb)) == NULL)
+ return -ENOMEM;
+ return ip_fragment(skb, skb->dst->output);
+}
+
+int ip_queue_xmit(struct sk_buff *skb)
+{
+ struct sock *sk = skb->sk;
+ struct inet_opt *inet = inet_sk(sk);
+ struct ip_options *opt = inet->opt;
+ struct rtable *rt;
+ struct iphdr *iph;
+
+ /* Skip all of this if the packet is already routed,
+ * f.e. by something like SCTP.
+ */
+ rt = (struct rtable *) skb->dst;
+ if (rt != NULL)
+ goto packet_routed;
+
+ /* Make sure we can route this packet. */
+ rt = (struct rtable *)__sk_dst_check(sk, 0);
+ if (rt == NULL) {
+ u32 daddr;
+
+ /* Use correct destination address if we have options. */
+ daddr = inet->daddr;
+ if(opt && opt->srr)
+ daddr = opt->faddr;
+
+ /* If this fails, retransmit mechanism of transport layer will
+ * keep trying until route appears or the connection times itself
+ * out.
+ */
+ if (ip_route_output(&rt, daddr, inet->saddr,
+ RT_CONN_FLAGS(sk),
+ sk->bound_dev_if))
+ goto no_route;
+ __sk_dst_set(sk, &rt->u.dst);
+ sk->route_caps = rt->u.dst.dev->features;
+ }
+ skb->dst = dst_clone(&rt->u.dst);
+
+packet_routed:
+ if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
+ goto no_route;
+
+ /* OK, we know where to send it, allocate and build IP header. */
+ iph = (struct iphdr *) skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
+ *((__u16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
+ iph->tot_len = htons(skb->len);
+ if (ip_dont_fragment(sk, &rt->u.dst))
+ iph->frag_off = __constant_htons(IP_DF);
+ else
+ iph->frag_off = 0;
+ iph->ttl = inet->ttl;
+ iph->protocol = sk->protocol;
+ iph->saddr = rt->rt_src;
+ iph->daddr = rt->rt_dst;
+ skb->nh.iph = iph;
+ /* Transport layer set skb->h.foo itself. */
+
+ if(opt && opt->optlen) {
+ iph->ihl += opt->optlen >> 2;
+ ip_options_build(skb, opt, inet->daddr, rt, 0);
+ }
+
+ return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
+ ip_queue_xmit2);
+
+no_route:
+ IP_INC_STATS(IpOutNoRoutes);
+ kfree_skb(skb);
+ return -EHOSTUNREACH;
+}
+
+/*
+ * Build and send a packet, with as little as one copy
+ *
+ * Doesn't care much about ip options... option length can be
+ * different for fragment at 0 and other fragments.
+ *
+ * Note that the fragment at the highest offset is sent first,
+ * so the getfrag routine can fill in the TCP/UDP checksum header
+ * field in the last fragment it sends... actually it also helps
+ * the reassemblers, they can put most packets in at the head of
+ * the fragment queue, and they know the total size in advance. This
+ * last feature will measurably improve the Linux fragment handler one
+ * day.
+ *
+ * The callback has five args, an arbitrary pointer (copy of frag),
+ * the source IP address (may depend on the routing table), the
+ * destination address (char *), the offset to copy from, and the
+ * length to be copied.
+ */
+
+static int ip_build_xmit_slow(struct sock *sk,
+ int getfrag (const void *,
+ char *,
+ unsigned int,
+ unsigned int),
+ const void *frag,
+ unsigned length,
+ struct ipcm_cookie *ipc,
+ struct rtable *rt,
+ int flags)
+{
+ struct inet_opt *inet = inet_sk(sk);
+ unsigned int fraglen, maxfraglen, fragheaderlen;
+ int err;
+ int offset, mf;
+ int mtu;
+ u16 id;
+
+ int hh_len = (rt->u.dst.dev->hard_header_len + 15)&~15;
+ int nfrags=0;
+ struct ip_options *opt = ipc->opt;
+ int df = 0;
+
+ mtu = rt->u.dst.pmtu;
+ if (ip_dont_fragment(sk, &rt->u.dst))
+ df = __constant_htons(IP_DF);
+
+ length -= sizeof(struct iphdr);
+
+ if (opt) {
+ fragheaderlen = sizeof(struct iphdr) + opt->optlen;
+ maxfraglen = ((mtu-sizeof(struct iphdr)-opt->optlen) & ~7) + fragheaderlen;
+ } else {
+ fragheaderlen = sizeof(struct iphdr);
+
+ /*
+ * Fragheaderlen is the size of 'overhead' on each buffer. Now work
+ * out the size of the frames to send.
+ */
+
+ maxfraglen = ((mtu-sizeof(struct iphdr)) & ~7) + fragheaderlen;
+ }
+
+ if (length + fragheaderlen > 0xFFFF) {
+ ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu);
+ return -EMSGSIZE;
+ }
+
+ /*
+ * Start at the end of the frame by handling the remainder.
+ */
+
+ offset = length - (length % (maxfraglen - fragheaderlen));
+
+ /*
+ * Amount of memory to allocate for final fragment.
+ */
+
+ fraglen = length - offset + fragheaderlen;
+
+ if (length-offset==0) {
+ fraglen = maxfraglen;
+ offset -= maxfraglen-fragheaderlen;
+ }
+
+ /*
+ * The last fragment will not have MF (more fragments) set.
+ */
+
+ mf = 0;
+
+ /*
+ * Don't fragment packets for path mtu discovery.
+ */
+
+ if (offset > 0 && inet->pmtudisc == IP_PMTUDISC_DO) {
+ ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu);
+ return -EMSGSIZE;
+ }
+ if (flags&MSG_PROBE)
+ goto out;
+
+ /*
+ * Begin outputting the bytes.
+ */
+
+ id = inet->id++;
+
+ do {
+ char *data;
+ struct sk_buff * skb;
+
+ /*
+ * Get the memory we require with some space left for alignment.
+ */
+ if (!(flags & MSG_DONTWAIT) || nfrags == 0) {
+ skb = sock_alloc_send_skb(sk, fraglen + hh_len + 15,
+ (flags & MSG_DONTWAIT), &err);
+ } else {
+ /* On a non-blocking write, we check for send buffer
+ * usage on the first fragment only.
+ */
+ skb = sock_wmalloc(sk, fraglen + hh_len + 15, 1,
+ sk->allocation);
+ if (!skb)
+ err = -ENOBUFS;
+ }
+ if (skb == NULL)
+ goto error;
+
+ /*
+ * Fill in the control structures
+ */
+
+ skb->priority = sk->priority;
+ skb->dst = dst_clone(&rt->u.dst);
+ skb_reserve(skb, hh_len);
+
+ /*
+ * Find where to start putting bytes.
+ */
+
+ data = skb_put(skb, fraglen);
+ skb->nh.iph = (struct iphdr *)data;
+
+ /*
+ * Only write IP header onto non-raw packets
+ */
+
+ {
+ struct iphdr *iph = (struct iphdr *)data;
+
+ iph->version = 4;
+ iph->ihl = 5;
+ if (opt) {
+ iph->ihl += opt->optlen>>2;
+ ip_options_build(skb, opt,
+ ipc->addr, rt, offset);
+ }
+ iph->tos = inet->tos;
+ iph->tot_len = htons(fraglen - fragheaderlen + iph->ihl*4);
+ iph->frag_off = htons(offset>>3)|mf|df;
+ iph->id = id;
+ if (!mf) {
+ if (offset || !df) {
+ /* Select an unpredictable ident only
+ * for packets without DF or having
+ * been fragmented.
+ */
+ __ip_select_ident(iph, &rt->u.dst);
+ id = iph->id;
+ }
+
+ /*
+ * Any further fragments will have MF set.
+ */
+ mf = __constant_htons(IP_MF);
+ }
+ if (rt->rt_type == RTN_MULTICAST)
+ iph->ttl = inet->mc_ttl;
+ else
+ iph->ttl = inet->ttl;
+ iph->protocol = sk->protocol;
+ iph->check = 0;
+ iph->saddr = rt->rt_src;
+ iph->daddr = rt->rt_dst;
+ iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
+ data += iph->ihl*4;
+ }
+
+ /*
+ * User data callback
+ */
+
+ if (getfrag(frag, data, offset, fraglen-fragheaderlen)) {
+ err = -EFAULT;
+ kfree_skb(skb);
+ goto error;
+ }
+
+ offset -= (maxfraglen-fragheaderlen);
+ fraglen = maxfraglen;
+
+ nfrags++;
+
+ err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL,
+ skb->dst->dev, output_maybe_reroute);
+ if (err) {
+ if (err > 0)
+ err = inet->recverr ? net_xmit_errno(err) : 0;
+ if (err)
+ goto error;
+ }
+ } while (offset >= 0);
+
+ if (nfrags>1)
+ ip_statistics[smp_processor_id()*2 + !in_softirq()].IpFragCreates += nfrags;
+out:
+ return 0;
+
+error:
+ IP_INC_STATS(IpOutDiscards);
+ if (nfrags>1)
+ ip_statistics[smp_processor_id()*2 + !in_softirq()].IpFragCreates += nfrags;
+ return err;
+}
+
+/*
+ * Fast path for unfragmented packets.
+ */
+int ip_build_xmit(struct sock *sk,
+ int getfrag (const void *,
+ char *,
+ unsigned int,
+ unsigned int),
+ const void *frag,
+ unsigned length,
+ struct ipcm_cookie *ipc,
+ struct rtable *rt,
+ int flags)
+{
+ struct inet_opt *inet = inet_sk(sk);
+ int err;
+ struct sk_buff *skb;
+ int df;
+ struct iphdr *iph;
+
+ /*
+ * Try the simple case first. This leaves fragmented frames, and by
+ * choice RAW frames within 20 bytes of maximum size(rare) to the long path
+ */
+
+ if (!inet->hdrincl) {
+ length += sizeof(struct iphdr);
+
+ /*
+ * Check for slow path.
+ */
+ if (length > rt->u.dst.pmtu || ipc->opt != NULL)
+ return ip_build_xmit_slow(sk,getfrag,frag,length,ipc,rt,flags);
+ } else {
+ if (length > rt->u.dst.dev->mtu) {
+ ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport,
+ rt->u.dst.dev->mtu);
+ return -EMSGSIZE;
+ }
+ }
+ if (flags&MSG_PROBE)
+ goto out;
+
+ /*
+ * Do path mtu discovery if needed.
+ */
+ df = 0;
+ if (ip_dont_fragment(sk, &rt->u.dst))
+ df = __constant_htons(IP_DF);
+
+ /*
+ * Fast path for unfragmented frames without options.
+ */
+ {
+ int hh_len = (rt->u.dst.dev->hard_header_len + 15)&~15;
+
+ skb = sock_alloc_send_skb(sk, length+hh_len+15,
+ flags&MSG_DONTWAIT, &err);
+ if(skb==NULL)
+ goto error;
+ skb_reserve(skb, hh_len);
+ }
+
+ skb->priority = sk->priority;
+ skb->dst = dst_clone(&rt->u.dst);
+
+ skb->nh.iph = iph = (struct iphdr *)skb_put(skb, length);
+
+ if (!inet->hdrincl) {
+ iph->version=4;
+ iph->ihl=5;
+ iph->tos = inet->tos;
+ iph->tot_len = htons(length);
+ iph->frag_off = df;
+ iph->ttl = inet->mc_ttl;
+ ip_select_ident(iph, &rt->u.dst, sk);
+ if (rt->rt_type != RTN_MULTICAST)
+ iph->ttl = inet->ttl;
+ iph->protocol=sk->protocol;
+ iph->saddr=rt->rt_src;
+ iph->daddr=rt->rt_dst;
+ iph->check=0;
+ iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
+ err = getfrag(frag, ((char *)iph)+iph->ihl*4,0, length-iph->ihl*4);
+ }
+ else
+ err = getfrag(frag, (void *)iph, 0, length);
+
+ if (err)
+ goto error_fault;
+
+ err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
+ output_maybe_reroute);
+ if (err > 0)
+ err = inet->recverr ? net_xmit_errno(err) : 0;
+ if (err)
+ goto error;
+out:
+ return 0;
+
+error_fault:
+ err = -EFAULT;
+ kfree_skb(skb);
+error:
+ IP_INC_STATS(IpOutDiscards);
+ return err;
+}
+
+/*
+ * This IP datagram is too large to be sent in one piece. Break it up into
+ * smaller pieces (each of size equal to IP header plus
+ * a block of the data of the original IP data part) that will yet fit in a
+ * single device frame, and queue such a frame for sending.
+ *
+ * Yes this is inefficient, feel free to submit a quicker one.
+ */
+
+int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
+{
+ struct iphdr *iph;
+ int raw = 0;
+ int ptr;
+ struct net_device *dev;
+ struct sk_buff *skb2;
+ unsigned int mtu, hlen, left, len;
+ int offset;
+ int not_last_frag;
+ struct rtable *rt = (struct rtable*)skb->dst;
+ int err = 0;
+
+ dev = rt->u.dst.dev;
+
+ /*
+ * Point into the IP datagram header.
+ */
+
+ iph = skb->nh.iph;
+
+ /*
+ * Setup starting values.
+ */
+
+ hlen = iph->ihl * 4;
+ left = skb->len - hlen; /* Space per frame */
+ mtu = rt->u.dst.pmtu - hlen; /* Size of data space */
+ ptr = raw + hlen; /* Where to start from */
+
+ /*
+ * Fragment the datagram.
+ */
+
+ offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
+ not_last_frag = iph->frag_off & __constant_htons(IP_MF);
+
+ /*
+ * Keep copying data until we run out.
+ */
+
+ while(left > 0) {
+ len = left;
+ /* IF: it doesn't fit, use 'mtu' - the data space left */
+ if (len > mtu)
+ len = mtu;
+ /* IF: we are not sending upto and including the packet end
+ then align the next start on an eight byte boundary */
+ if (len < left) {
+ len &= ~7;
+ }
+ /*
+ * Allocate buffer.
+ */
+
+ if ((skb2 = alloc_skb(len+hlen+dev->hard_header_len+15,GFP_ATOMIC)) == NULL) {
+ NETDEBUG(printk(KERN_INFO "IP: frag: no memory for new fragment!\n"));
+ err = -ENOMEM;
+ goto fail;
+ }
+
+ /*
+ * Set up data on packet
+ */
+
+ skb2->pkt_type = skb->pkt_type;
+ skb2->priority = skb->priority;
+ skb_reserve(skb2, (dev->hard_header_len+15)&~15);
+ skb_put(skb2, len + hlen);
+ skb2->nh.raw = skb2->data;
+ skb2->h.raw = skb2->data + hlen;
+ skb2->protocol = skb->protocol;
+ skb2->security = skb->security;
+
+ /*
+ * Charge the memory for the fragment to any owner
+ * it might possess
+ */
+
+ if (skb->sk)
+ skb_set_owner_w(skb2, skb->sk);
+ skb2->dst = dst_clone(skb->dst);
+ skb2->dev = skb->dev;
+ skb2->physindev = skb->physindev;
+ skb2->physoutdev = skb->physoutdev;
+
+ /*
+ * Copy the packet header into the new buffer.
+ */
+
+ memcpy(skb2->nh.raw, skb->data, hlen);
+
+ /*
+ * Copy a block of the IP datagram.
+ */
+ if (skb_copy_bits(skb, ptr, skb2->h.raw, len))
+ BUG();
+ left -= len;
+
+ /*
+ * Fill in the new header fields.
+ */
+ iph = skb2->nh.iph;
+ iph->frag_off = htons((offset >> 3));
+
+ /* ANK: dirty, but effective trick. Upgrade options only if
+ * the segment to be fragmented was THE FIRST (otherwise,
+ * options are already fixed) and make it ONCE
+ * on the initial skb, so that all the following fragments
+ * will inherit fixed options.
+ */
+ if (offset == 0)
+ ip_options_fragment(skb);
+
+ /* Copy the flags to each fragment. */
+ IPCB(skb2)->flags = IPCB(skb)->flags;
+
+ /*
+ * Added AC : If we are fragmenting a fragment that's not the
+ * last fragment then keep MF on each bit
+ */
+ if (left > 0 || not_last_frag)
+ iph->frag_off |= __constant_htons(IP_MF);
+ ptr += len;
+ offset += len;
+
+#ifdef CONFIG_NET_SCHED
+ skb2->tc_index = skb->tc_index;
+#endif
+#ifdef CONFIG_NETFILTER
+ skb2->nfmark = skb->nfmark;
+ /* Connection association is same as pre-frag packet */
+ skb2->nfct = skb->nfct;
+ nf_conntrack_get(skb2->nfct);
+#ifdef CONFIG_NETFILTER_DEBUG
+ skb2->nf_debug = skb->nf_debug;
+#endif
+#endif
+
+ /*
+ * Put this fragment into the sending queue.
+ */
+
+ IP_INC_STATS(IpFragCreates);
+
+ iph->tot_len = htons(len + hlen);
+
+ ip_send_check(iph);
+ memcpy(skb2->data - 16, skb->data - 16, 16);
+
+ err = output(skb2);
+ if (err)
+ goto fail;
+ }
+ kfree_skb(skb);
+ IP_INC_STATS(IpFragOKs);
+ return err;
+
+fail:
+ kfree_skb(skb);
+ IP_INC_STATS(IpFragFails);
+ return err;
+}
+
+/*
+ * Fetch data from kernel space and fill in checksum if needed.
+ */
+static int ip_reply_glue_bits(const void *dptr, char *to, unsigned int offset,
+ unsigned int fraglen)
+{
+ struct ip_reply_arg *dp = (struct ip_reply_arg*)dptr;
+ u16 *pktp = (u16 *)to;
+ struct iovec *iov;
+ int len;
+ int hdrflag = 1;
+
+ iov = &dp->iov[0];
+ if (offset >= iov->iov_len) {
+ offset -= iov->iov_len;
+ iov++;
+ hdrflag = 0;
+ }
+ len = iov->iov_len - offset;
+ if (fraglen > len) { /* overlapping. */
+ dp->csum = csum_partial_copy_nocheck(iov->iov_base+offset, to, len,
+ dp->csum);
+ offset = 0;
+ fraglen -= len;
+ to += len;
+ iov++;
+ }
+
+ dp->csum = csum_partial_copy_nocheck(iov->iov_base+offset, to, fraglen,
+ dp->csum);
+
+ if (hdrflag && dp->csumoffset)
+ *(pktp + dp->csumoffset) = csum_fold(dp->csum); /* fill in checksum */
+ return 0;
+}
+
+/*
+ * Generic function to send a packet as reply to another packet.
+ * Used to send TCP resets so far. ICMP should use this function too.
+ *
+ * Should run single threaded per socket because it uses the sock
+ * structure to pass arguments.
+ */
+void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
+ unsigned int len)
+{
+ struct inet_opt *inet = inet_sk(sk);
+ struct {
+ struct ip_options opt;
+ char data[40];
+ } replyopts;
+ struct ipcm_cookie ipc;
+ u32 daddr;
+ struct rtable *rt = (struct rtable*)skb->dst;
+
+ if (ip_options_echo(&replyopts.opt, skb))
+ return;
+
+ daddr = ipc.addr = rt->rt_src;
+ ipc.opt = NULL;
+
+ if (replyopts.opt.optlen) {
+ ipc.opt = &replyopts.opt;
+
+ if (ipc.opt->srr)
+ daddr = replyopts.opt.faddr;
+ }
+
+ if (ip_route_output(&rt, daddr, rt->rt_spec_dst, RT_TOS(skb->nh.iph->tos), 0))
+ return;
+
+ /* And let IP do all the hard work.
+
+ This chunk is not reenterable, hence spinlock.
+ Note that it uses the fact, that this function is called
+ with locally disabled BH and that sk cannot be already spinlocked.
+ */
+ bh_lock_sock(sk);
+ inet->tos = skb->nh.iph->tos;
+ sk->priority = skb->priority;
+ sk->protocol = skb->nh.iph->protocol;
+ ip_build_xmit(sk, ip_reply_glue_bits, arg, len, &ipc, rt, MSG_DONTWAIT);
+ bh_unlock_sock(sk);
+
+ ip_rt_put(rt);
+}
+
+/*
+ * IP protocol layer initialiser
+ */
+
+static struct packet_type ip_packet_type =
+{
+ __constant_htons(ETH_P_IP),
+ NULL, /* All devices */
+ ip_rcv,
+ (void*)1,
+ NULL,
+};
+
+/*
+ * IP registers the packet type and then calls the subprotocol initialisers
+ */
+
+void __init ip_init(void)
+{
+ dev_add_pack(&ip_packet_type);
+
+ ip_rt_init();
+ inet_initpeers();
+
+#ifdef CONFIG_IP_MULTICAST
+ proc_net_create("igmp", 0, ip_mc_procinfo);
+#endif
+}
diff --git a/br-nf-bds/linux2.5/net/ipv4/netfilter/ip_tables.c b/br-nf-bds/linux2.5/net/ipv4/netfilter/ip_tables.c
new file mode 100644
index 0000000..928e67a
--- /dev/null
+++ b/br-nf-bds/linux2.5/net/ipv4/netfilter/ip_tables.c
@@ -0,0 +1,1811 @@
+/*
+ * Packet matching code.
+ *
+ * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
+ * Copyright (C) 2009-2002 Netfilter core team <coreteam@netfilter.org>
+ *
+ * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
+ * - increase module usage count as soon as we have rules inside
+ * a table
+ */
+#include <linux/config.h>
+#include <linux/cache.h>
+#include <linux/skbuff.h>
+#include <linux/kmod.h>
+#include <linux/vmalloc.h>
+#include <linux/netdevice.h>
+#include <linux/module.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/icmp.h>
+#include <net/ip.h>
+#include <asm/uaccess.h>
+#include <asm/semaphore.h>
+#include <linux/proc_fs.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+
+/*#define DEBUG_IP_FIREWALL*/
+/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
+/*#define DEBUG_IP_FIREWALL_USER*/
+
+#ifdef DEBUG_IP_FIREWALL
+#define dprintf(format, args...) printk(format , ## args)
+#else
+#define dprintf(format, args...)
+#endif
+
+#ifdef DEBUG_IP_FIREWALL_USER
+#define duprintf(format, args...) printk(format , ## args)
+#else
+#define duprintf(format, args...)
+#endif
+
+#ifdef CONFIG_NETFILTER_DEBUG
+#define IP_NF_ASSERT(x) \
+do { \
+ if (!(x)) \
+ printk("IP_NF_ASSERT: %s:%s:%u\n", \
+ __FUNCTION__, __FILE__, __LINE__); \
+} while(0)
+#else
+#define IP_NF_ASSERT(x)
+#endif
+#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
+
+/* Mutex protects lists (only traversed in user context). */
+static DECLARE_MUTEX(ipt_mutex);
+
+/* Must have mutex */
+#define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
+#define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
+#include <linux/netfilter_ipv4/lockhelp.h>
+#include <linux/netfilter_ipv4/listhelp.h>
+
+#if 0
+/* All the better to debug you with... */
+#define static
+#define inline
+#endif
+
+/* Locking is simple: we assume at worst case there will be one packet
+ in user context and one from bottom halves (or soft irq if Alexey's
+ softnet patch was applied).
+
+ We keep a set of rules for each CPU, so we can avoid write-locking
+ them; doing a readlock_bh() stops packets coming through if we're
+ in user context.
+
+ To be cache friendly on SMP, we arrange them like so:
+ [ n-entries ]
+ ... cache-align padding ...
+ [ n-entries ]
+
+ Hence the start of any table is given by get_table() below. */
+
+/* The table itself */
+struct ipt_table_info
+{
+ /* Size per table */
+ unsigned int size;
+ /* Number of entries: FIXME. --RR */
+ unsigned int number;
+ /* Initial number of entries. Needed for module usage count */
+ unsigned int initial_entries;
+
+ /* Entry points and underflows */
+ unsigned int hook_entry[NF_IP_NUMHOOKS];
+ unsigned int underflow[NF_IP_NUMHOOKS];
+
+ /* ipt_entry tables: one per CPU */
+ char entries[0] ____cacheline_aligned;
+};
+
+static LIST_HEAD(ipt_target);
+static LIST_HEAD(ipt_match);
+static LIST_HEAD(ipt_tables);
+#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
+
+#ifdef CONFIG_SMP
+#define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
+#else
+#define TABLE_OFFSET(t,p) 0
+#endif
+
+#if 0
+#define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
+#define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
+#define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
+#endif
+
+/* Returns whether matches rule or not. */
+static inline int
+ip_packet_match(const struct iphdr *ip,
+ const char *indev,
+ const char *physindev,
+ const char *outdev,
+ const char *physoutdev,
+ const struct ipt_ip *ipinfo,
+ int isfrag)
+{
+ size_t i;
+ unsigned long ret;
+ unsigned long ret2;
+
+#define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg))
+
+ if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
+ IPT_INV_SRCIP)
+ || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
+ IPT_INV_DSTIP)) {
+ dprintf("Source or dest mismatch.\n");
+
+ dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
+ NIPQUAD(ip->saddr),
+ NIPQUAD(ipinfo->smsk.s_addr),
+ NIPQUAD(ipinfo->src.s_addr),
+ ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
+ dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
+ NIPQUAD(ip->daddr),
+ NIPQUAD(ipinfo->dmsk.s_addr),
+ NIPQUAD(ipinfo->dst.s_addr),
+ ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
+ return 0;
+ }
+
+ /* Look for ifname matches; this should unroll nicely. */
+ for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
+ ret |= (((const unsigned long *)indev)[i]
+ ^ ((const unsigned long *)ipinfo->iniface)[i])
+ & ((const unsigned long *)ipinfo->iniface_mask)[i];
+ }
+
+ for (i = 0, ret2 = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
+ ret2 |= (((const unsigned long *)physindev)[i]
+ ^ ((const unsigned long *)ipinfo->iniface)[i])
+ & ((const unsigned long *)ipinfo->iniface_mask)[i];
+ }
+
+ if (FWINV(ret != 0 && ret2 != 0, IPT_INV_VIA_IN)) {
+ dprintf("VIA in mismatch (%s vs %s).%s\n",
+ indev, ipinfo->iniface,
+ ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
+ return 0;
+ }
+
+ for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
+ ret |= (((const unsigned long *)outdev)[i]
+ ^ ((const unsigned long *)ipinfo->outiface)[i])
+ & ((const unsigned long *)ipinfo->outiface_mask)[i];
+ }
+
+ for (i = 0, ret2 = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
+ ret2 |= (((const unsigned long *)physoutdev)[i]
+ ^ ((const unsigned long *)ipinfo->outiface)[i])
+ & ((const unsigned long *)ipinfo->outiface_mask)[i];
+ }
+
+ if (FWINV(ret != 0 && ret2 != 0, IPT_INV_VIA_OUT)) {
+ dprintf("VIA out mismatch (%s vs %s).%s\n",
+ outdev, ipinfo->outiface,
+ ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
+ return 0;
+ }
+
+ /* Check specific protocol */
+ if (ipinfo->proto
+ && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
+ dprintf("Packet protocol %hi does not match %hi.%s\n",
+ ip->protocol, ipinfo->proto,
+ ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
+ return 0;
+ }
+
+ /* If we have a fragment rule but the packet is not a fragment
+ * then we return zero */
+ if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
+ dprintf("Fragment rule but not fragment.%s\n",
+ ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
+ return 0;
+ }
+
+ return 1;
+}
+
+static inline int
+ip_checkentry(const struct ipt_ip *ip)
+{
+ if (ip->flags & ~IPT_F_MASK) {
+ duprintf("Unknown flag bits set: %08X\n",
+ ip->flags & ~IPT_F_MASK);
+ return 0;
+ }
+ if (ip->invflags & ~IPT_INV_MASK) {
+ duprintf("Unknown invflag bits set: %08X\n",
+ ip->invflags & ~IPT_INV_MASK);
+ return 0;
+ }
+ return 1;
+}
+
+static unsigned int
+ipt_error(struct sk_buff **pskb,
+ unsigned int hooknum,
+ const struct net_device *in,
+ const struct net_device *out,
+ const void *targinfo,
+ void *userinfo)
+{
+ if (net_ratelimit())
+ printk("ip_tables: error: `%s'\n", (char *)targinfo);
+
+ return NF_DROP;
+}
+
+static inline
+int do_match(struct ipt_entry_match *m,
+ const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int offset,
+ const void *hdr,
+ u_int16_t datalen,
+ int *hotdrop)
+{
+ /* Stop iteration if it doesn't match */
+ if (!m->u.kernel.match->match(skb, in, out, m->data,
+ offset, hdr, datalen, hotdrop))
+ return 1;
+ else
+ return 0;
+}
+
+static inline struct ipt_entry *
+get_entry(void *base, unsigned int offset)
+{
+ return (struct ipt_entry *)(base + offset);
+}
+
+/* Returns one of the generic firewall policies, like NF_ACCEPT. */
+unsigned int
+ipt_do_table(struct sk_buff **pskb,
+ unsigned int hook,
+ const struct net_device *in,
+ const struct net_device *out,
+ struct ipt_table *table,
+ void *userdata)
+{
+ static const char nulldevname[IFNAMSIZ] = { 0 };
+ u_int16_t offset;
+ struct iphdr *ip;
+ void *protohdr;
+ u_int16_t datalen;
+ int hotdrop = 0;
+ /* Initializing verdict to NF_DROP keeps gcc happy. */
+ unsigned int verdict = NF_DROP;
+ const char *indev, *outdev;
+ const char *physindev, *physoutdev;
+ void *table_base;
+ struct ipt_entry *e, *back;
+
+ /* Initialization */
+ ip = (*pskb)->nh.iph;
+ protohdr = (u_int32_t *)ip + ip->ihl;
+ datalen = (*pskb)->len - ip->ihl * 4;
+ indev = in ? in->name : nulldevname;
+ outdev = out ? out->name : nulldevname;
+ physindev = (*pskb)->physindev ? (*pskb)->physindev->name : nulldevname;
+ physoutdev = (*pskb)->physoutdev ? (*pskb)->physoutdev->name : nulldevname;
+
+ /* We handle fragments by dealing with the first fragment as
+ * if it was a normal packet. All other fragments are treated
+ * normally, except that they will NEVER match rules that ask
+ * things we don't know, ie. tcp syn flag or ports). If the
+ * rule is also a fragment-specific rule, non-fragments won't
+ * match it. */
+ offset = ntohs(ip->frag_off) & IP_OFFSET;
+
+ read_lock_bh(&table->lock);
+ IP_NF_ASSERT(table->valid_hooks & (1 << hook));
+ table_base = (void *)table->private->entries
+ + TABLE_OFFSET(table->private, smp_processor_id());
+ e = get_entry(table_base, table->private->hook_entry[hook]);
+
+#ifdef CONFIG_NETFILTER_DEBUG
+ /* Check noone else using our table */
+ if (((struct ipt_entry *)table_base)->comefrom != 0xdead57ac
+ && ((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec) {
+ printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
+ smp_processor_id(),
+ table->name,
+ &((struct ipt_entry *)table_base)->comefrom,
+ ((struct ipt_entry *)table_base)->comefrom);
+ }
+ ((struct ipt_entry *)table_base)->comefrom = 0x57acc001;
+#endif
+
+ /* For return from builtin chain */
+ back = get_entry(table_base, table->private->underflow[hook]);
+
+ do {
+ IP_NF_ASSERT(e);
+ IP_NF_ASSERT(back);
+ (*pskb)->nfcache |= e->nfcache;
+ if (ip_packet_match(ip, indev, physindev, outdev, physoutdev, &e->ip, offset)) {
+ struct ipt_entry_target *t;
+
+ if (IPT_MATCH_ITERATE(e, do_match,
+ *pskb, in, out,
+ offset, protohdr,
+ datalen, &hotdrop) != 0)
+ goto no_match;
+
+ ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
+
+ t = ipt_get_target(e);
+ IP_NF_ASSERT(t->u.kernel.target);
+ /* Standard target? */
+ if (!t->u.kernel.target->target) {
+ int v;
+
+ v = ((struct ipt_standard_target *)t)->verdict;
+ if (v < 0) {
+ /* Pop from stack? */
+ if (v != IPT_RETURN) {
+ verdict = (unsigned)(-v) - 1;
+ break;
+ }
+ e = back;
+ back = get_entry(table_base,
+ back->comefrom);
+ continue;
+ }
+ if (table_base + v
+ != (void *)e + e->next_offset) {
+ /* Save old back ptr in next entry */
+ struct ipt_entry *next
+ = (void *)e + e->next_offset;
+ next->comefrom
+ = (void *)back - table_base;
+ /* set back pointer to next entry */
+ back = next;
+ }
+
+ e = get_entry(table_base, v);
+ } else {
+ /* Targets which reenter must return
+ abs. verdicts */
+#ifdef CONFIG_NETFILTER_DEBUG
+ ((struct ipt_entry *)table_base)->comefrom
+ = 0xeeeeeeec;
+#endif
+ verdict = t->u.kernel.target->target(pskb,
+ hook,
+ in, out,
+ t->data,
+ userdata);
+
+#ifdef CONFIG_NETFILTER_DEBUG
+ if (((struct ipt_entry *)table_base)->comefrom
+ != 0xeeeeeeec
+ && verdict == IPT_CONTINUE) {
+ printk("Target %s reentered!\n",
+ t->u.kernel.target->name);
+ verdict = NF_DROP;
+ }
+ ((struct ipt_entry *)table_base)->comefrom
+ = 0x57acc001;
+#endif
+ /* Target might have changed stuff. */
+ ip = (*pskb)->nh.iph;
+ protohdr = (u_int32_t *)ip + ip->ihl;
+ datalen = (*pskb)->len - ip->ihl * 4;
+
+ if (verdict == IPT_CONTINUE)
+ e = (void *)e + e->next_offset;
+ else
+ /* Verdict */
+ break;
+ }
+ } else {
+
+ no_match:
+ e = (void *)e + e->next_offset;
+ }
+ } while (!hotdrop);
+
+#ifdef CONFIG_NETFILTER_DEBUG
+ ((struct ipt_entry *)table_base)->comefrom = 0xdead57ac;
+#endif
+ read_unlock_bh(&table->lock);
+
+#ifdef DEBUG_ALLOW_ALL
+ return NF_ACCEPT;
+#else
+ if (hotdrop)
+ return NF_DROP;
+ else return verdict;
+#endif
+}
+
+/* If it succeeds, returns element and locks mutex */
+static inline void *
+find_inlist_lock_noload(struct list_head *head,
+ const char *name,
+ int *error,
+ struct semaphore *mutex)
+{
+ void *ret;
+
+#if 0
+ duprintf("find_inlist: searching for `%s' in %s.\n",
+ name, head == &ipt_target ? "ipt_target"
+ : head == &ipt_match ? "ipt_match"
+ : head == &ipt_tables ? "ipt_tables" : "UNKNOWN");
+#endif
+
+ *error = down_interruptible(mutex);
+ if (*error != 0)
+ return NULL;
+
+ ret = list_named_find(head, name);
+ if (!ret) {
+ *error = -ENOENT;
+ up(mutex);
+ }
+ return ret;
+}
+
+#ifndef CONFIG_KMOD
+#define find_inlist_lock(h,n,p,e,m) find_inlist_lock_noload((h),(n),(e),(m))
+#else
+static void *
+find_inlist_lock(struct list_head *head,
+ const char *name,
+ const char *prefix,
+ int *error,
+ struct semaphore *mutex)
+{
+ void *ret;
+
+ ret = find_inlist_lock_noload(head, name, error, mutex);
+ if (!ret) {
+ char modulename[IPT_FUNCTION_MAXNAMELEN + strlen(prefix) + 1];
+ strcpy(modulename, prefix);
+ strcat(modulename, name);
+ duprintf("find_inlist: loading `%s'.\n", modulename);
+ request_module(modulename);
+ ret = find_inlist_lock_noload(head, name, error, mutex);
+ }
+
+ return ret;
+}
+#endif
+
+static inline struct ipt_table *
+find_table_lock(const char *name, int *error, struct semaphore *mutex)
+{
+ return find_inlist_lock(&ipt_tables, name, "iptable_", error, mutex);
+}
+
+static inline struct ipt_match *
+find_match_lock(const char *name, int *error, struct semaphore *mutex)
+{
+ return find_inlist_lock(&ipt_match, name, "ipt_", error, mutex);
+}
+
+static inline struct ipt_target *
+find_target_lock(const char *name, int *error, struct semaphore *mutex)
+{
+ return find_inlist_lock(&ipt_target, name, "ipt_", error, mutex);
+}
+
+/* All zeroes == unconditional rule. */
+static inline int
+unconditional(const struct ipt_ip *ip)
+{
+ unsigned int i;
+
+ for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
+ if (((__u32 *)ip)[i])
+ return 0;
+
+ return 1;
+}
+
+/* Figures out from what hook each rule can be called: returns 0 if
+ there are loops. Puts hook bitmask in comefrom. */
+static int
+mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
+{
+ unsigned int hook;
+
+ /* No recursion; use packet counter to save back ptrs (reset
+ to 0 as we leave), and comefrom to save source hook bitmask */
+ for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
+ unsigned int pos = newinfo->hook_entry[hook];
+ struct ipt_entry *e
+ = (struct ipt_entry *)(newinfo->entries + pos);
+
+ if (!(valid_hooks & (1 << hook)))
+ continue;
+
+ /* Set initial back pointer. */
+ e->counters.pcnt = pos;
+
+ for (;;) {
+ struct ipt_standard_target *t
+ = (void *)ipt_get_target(e);
+
+ if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
+ printk("iptables: loop hook %u pos %u %08X.\n",
+ hook, pos, e->comefrom);
+ return 0;
+ }
+ e->comefrom
+ |= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
+
+ /* Unconditional return/END. */
+ if (e->target_offset == sizeof(struct ipt_entry)
+ && (strcmp(t->target.u.user.name,
+ IPT_STANDARD_TARGET) == 0)
+ && t->verdict < 0
+ && unconditional(&e->ip)) {
+ unsigned int oldpos, size;
+
+ /* Return: backtrack through the last
+ big jump. */
+ do {
+ e->comefrom ^= (1<<NF_IP_NUMHOOKS);
+#ifdef DEBUG_IP_FIREWALL_USER
+ if (e->comefrom
+ & (1 << NF_IP_NUMHOOKS)) {
+ duprintf("Back unset "
+ "on hook %u "
+ "rule %u\n",
+ hook, pos);
+ }
+#endif
+ oldpos = pos;
+ pos = e->counters.pcnt;
+ e->counters.pcnt = 0;
+
+ /* We're at the start. */
+ if (pos == oldpos)
+ goto next;
+
+ e = (struct ipt_entry *)
+ (newinfo->entries + pos);
+ } while (oldpos == pos + e->next_offset);
+
+ /* Move along one */
+ size = e->next_offset;
+ e = (struct ipt_entry *)
+ (newinfo->entries + pos + size);
+ e->counters.pcnt = pos;
+ pos += size;
+ } else {
+ int newpos = t->verdict;
+
+ if (strcmp(t->target.u.user.name,
+ IPT_STANDARD_TARGET) == 0
+ && newpos >= 0) {
+ /* This a jump; chase it. */
+ duprintf("Jump rule %u -> %u\n",
+ pos, newpos);
+ } else {
+ /* ... this is a fallthru */
+ newpos = pos + e->next_offset;
+ }
+ e = (struct ipt_entry *)
+ (newinfo->entries + newpos);
+ e->counters.pcnt = pos;
+ pos = newpos;
+ }
+ }
+ next:
+ duprintf("Finished chain %u\n", hook);
+ }
+ return 1;
+}
+
+static inline int
+cleanup_match(struct ipt_entry_match *m, unsigned int *i)
+{
+ if (i && (*i)-- == 0)
+ return 1;
+
+ if (m->u.kernel.match->destroy)
+ m->u.kernel.match->destroy(m->data,
+ m->u.match_size - sizeof(*m));
+
+ if (m->u.kernel.match->me)
+ __MOD_DEC_USE_COUNT(m->u.kernel.match->me);
+
+ return 0;
+}
+
+static inline int
+standard_check(const struct ipt_entry_target *t,
+ unsigned int max_offset)
+{
+ struct ipt_standard_target *targ = (void *)t;
+
+ /* Check standard info. */
+ if (t->u.target_size
+ != IPT_ALIGN(sizeof(struct ipt_standard_target))) {
+ duprintf("standard_check: target size %u != %u\n",
+ t->u.target_size,
+ IPT_ALIGN(sizeof(struct ipt_standard_target)));
+ return 0;
+ }
+
+ if (targ->verdict >= 0
+ && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
+ duprintf("ipt_standard_check: bad verdict (%i)\n",
+ targ->verdict);
+ return 0;
+ }
+
+ if (targ->verdict < -NF_MAX_VERDICT - 1) {
+ duprintf("ipt_standard_check: bad negative verdict (%i)\n",
+ targ->verdict);
+ return 0;
+ }
+ return 1;
+}
+
+static inline int
+check_match(struct ipt_entry_match *m,
+ const char *name,
+ const struct ipt_ip *ip,
+ unsigned int hookmask,
+ unsigned int *i)
+{
+ int ret;
+ struct ipt_match *match;
+
+ match = find_match_lock(m->u.user.name, &ret, &ipt_mutex);
+ if (!match) {
+ duprintf("check_match: `%s' not found\n", m->u.user.name);
+ return ret;
+ }
+ if (match->me)
+ __MOD_INC_USE_COUNT(match->me);
+ m->u.kernel.match = match;
+ up(&ipt_mutex);
+
+ if (m->u.kernel.match->checkentry
+ && !m->u.kernel.match->checkentry(name, ip, m->data,
+ m->u.match_size - sizeof(*m),
+ hookmask)) {
+ if (m->u.kernel.match->me)
+ __MOD_DEC_USE_COUNT(m->u.kernel.match->me);
+ duprintf("ip_tables: check failed for `%s'.\n",
+ m->u.kernel.match->name);
+ return -EINVAL;
+ }
+
+ (*i)++;
+ return 0;
+}
+
+static struct ipt_target ipt_standard_target;
+
+static inline int
+check_entry(struct ipt_entry *e, const char *name, unsigned int size,
+ unsigned int *i)
+{
+ struct ipt_entry_target *t;
+ struct ipt_target *target;
+ int ret;
+ unsigned int j;
+
+ if (!ip_checkentry(&e->ip)) {
+ duprintf("ip_tables: ip check failed %p %s.\n", e, name);
+ return -EINVAL;
+ }
+
+ j = 0;
+ ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
+ if (ret != 0)
+ goto cleanup_matches;
+
+ t = ipt_get_target(e);
+ target = find_target_lock(t->u.user.name, &ret, &ipt_mutex);
+ if (!target) {
+ duprintf("check_entry: `%s' not found\n", t->u.user.name);
+ goto cleanup_matches;
+ }
+ if (target->me)
+ __MOD_INC_USE_COUNT(target->me);
+ t->u.kernel.target = target;
+ up(&ipt_mutex);
+
+ if (t->u.kernel.target == &ipt_standard_target) {
+ if (!standard_check(t, size)) {
+ ret = -EINVAL;
+ goto cleanup_matches;
+ }
+ } else if (t->u.kernel.target->checkentry
+ && !t->u.kernel.target->checkentry(name, e, t->data,
+ t->u.target_size
+ - sizeof(*t),
+ e->comefrom)) {
+ if (t->u.kernel.target->me)
+ __MOD_DEC_USE_COUNT(t->u.kernel.target->me);
+ duprintf("ip_tables: check failed for `%s'.\n",
+ t->u.kernel.target->name);
+ ret = -EINVAL;
+ goto cleanup_matches;
+ }
+
+ (*i)++;
+ return 0;
+
+ cleanup_matches:
+ IPT_MATCH_ITERATE(e, cleanup_match, &j);
+ return ret;
+}
+
+static inline int
+check_entry_size_and_hooks(struct ipt_entry *e,
+ struct ipt_table_info *newinfo,
+ unsigned char *base,
+ unsigned char *limit,
+ const unsigned int *hook_entries,
+ const unsigned int *underflows,
+ unsigned int *i)
+{
+ unsigned int h;
+
+ if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
+ || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
+ duprintf("Bad offset %p\n", e);
+ return -EINVAL;
+ }
+
+ if (e->next_offset
+ < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
+ duprintf("checking: element %p size %u\n",
+ e, e->next_offset);
+ return -EINVAL;
+ }
+
+ /* Check hooks & underflows */
+ for (h = 0; h < NF_IP_NUMHOOKS; h++) {
+ if ((unsigned char *)e - base == hook_entries[h])
+ newinfo->hook_entry[h] = hook_entries[h];
+ if ((unsigned char *)e - base == underflows[h])
+ newinfo->underflow[h] = underflows[h];
+ }
+
+ /* FIXME: underflows must be unconditional, standard verdicts
+ < 0 (not IPT_RETURN). --RR */
+
+ /* Clear counters and comefrom */
+ e->counters = ((struct ipt_counters) { 0, 0 });
+ e->comefrom = 0;
+
+ (*i)++;
+ return 0;
+}
+
+static inline int
+cleanup_entry(struct ipt_entry *e, unsigned int *i)
+{
+ struct ipt_entry_target *t;
+
+ if (i && (*i)-- == 0)
+ return 1;
+
+ /* Cleanup all matches */
+ IPT_MATCH_ITERATE(e, cleanup_match, NULL);
+ t = ipt_get_target(e);
+ if (t->u.kernel.target->destroy)
+ t->u.kernel.target->destroy(t->data,
+ t->u.target_size - sizeof(*t));
+ if (t->u.kernel.target->me)
+ __MOD_DEC_USE_COUNT(t->u.kernel.target->me);
+
+ return 0;
+}
+
+/* Checks and translates the user-supplied table segment (held in
+ newinfo) */
+static int
+translate_table(const char *name,
+ unsigned int valid_hooks,
+ struct ipt_table_info *newinfo,
+ unsigned int size,
+ unsigned int number,
+ const unsigned int *hook_entries,
+ const unsigned int *underflows)
+{
+ unsigned int i;
+ int ret;
+
+ newinfo->size = size;
+ newinfo->number = number;
+
+ /* Init all hooks to impossible value. */
+ for (i = 0; i < NF_IP_NUMHOOKS; i++) {
+ newinfo->hook_entry[i] = 0xFFFFFFFF;
+ newinfo->underflow[i] = 0xFFFFFFFF;
+ }
+
+ duprintf("translate_table: size %u\n", newinfo->size);
+ i = 0;
+ /* Walk through entries, checking offsets. */
+ ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+ check_entry_size_and_hooks,
+ newinfo,
+ newinfo->entries,
+ newinfo->entries + size,
+ hook_entries, underflows, &i);
+ if (ret != 0)
+ return ret;
+
+ if (i != number) {
+ duprintf("translate_table: %u not %u entries\n",
+ i, number);
+ return -EINVAL;
+ }
+
+ /* Check hooks all assigned */
+ for (i = 0; i < NF_IP_NUMHOOKS; i++) {
+ /* Only hooks which are valid */
+ if (!(valid_hooks & (1 << i)))
+ continue;
+ if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
+ duprintf("Invalid hook entry %u %u\n",
+ i, hook_entries[i]);
+ return -EINVAL;
+ }
+ if (newinfo->underflow[i] == 0xFFFFFFFF) {
+ duprintf("Invalid underflow %u %u\n",
+ i, underflows[i]);
+ return -EINVAL;
+ }
+ }
+
+ if (!mark_source_chains(newinfo, valid_hooks))
+ return -ELOOP;
+
+ /* Finally, each sanity check must pass */
+ i = 0;
+ ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+ check_entry, name, size, &i);
+
+ if (ret != 0) {
+ IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+ cleanup_entry, &i);
+ return ret;
+ }
+
+ /* And one copy for every other CPU */
+ for (i = 1; i < NR_CPUS; i++) {
+ memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
+ newinfo->entries,
+ SMP_ALIGN(newinfo->size));
+ }
+
+ return ret;
+}
+
+static struct ipt_table_info *
+replace_table(struct ipt_table *table,
+ unsigned int num_counters,
+ struct ipt_table_info *newinfo,
+ int *error)
+{
+ struct ipt_table_info *oldinfo;
+
+#ifdef CONFIG_NETFILTER_DEBUG
+ {
+ struct ipt_entry *table_base;
+ unsigned int i;
+
+ for (i = 0; i < NR_CPUS; i++) {
+ table_base =
+ (void *)newinfo->entries
+ + TABLE_OFFSET(newinfo, i);
+
+ table_base->comefrom = 0xdead57ac;
+ }
+ }
+#endif
+
+ /* Do the substitution. */
+ write_lock_bh(&table->lock);
+ /* Check inside lock: is the old number correct? */
+ if (num_counters != table->private->number) {
+ duprintf("num_counters != table->private->number (%u/%u)\n",
+ num_counters, table->private->number);
+ write_unlock_bh(&table->lock);
+ *error = -EAGAIN;
+ return NULL;
+ }
+ oldinfo = table->private;
+ table->private = newinfo;
+ newinfo->initial_entries = oldinfo->initial_entries;
+ write_unlock_bh(&table->lock);
+
+ return oldinfo;
+}
+
+/* Gets counters. */
+static inline int
+add_entry_to_counter(const struct ipt_entry *e,
+ struct ipt_counters total[],
+ unsigned int *i)
+{
+ ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
+
+ (*i)++;
+ return 0;
+}
+
+static void
+get_counters(const struct ipt_table_info *t,
+ struct ipt_counters counters[])
+{
+ unsigned int cpu;
+ unsigned int i;
+
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ i = 0;
+ IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
+ t->size,
+ add_entry_to_counter,
+ counters,
+ &i);
+ }
+}
+
+static int
+copy_entries_to_user(unsigned int total_size,
+ struct ipt_table *table,
+ void *userptr)
+{
+ unsigned int off, num, countersize;
+ struct ipt_entry *e;
+ struct ipt_counters *counters;
+ int ret = 0;
+
+ /* We need atomic snapshot of counters: rest doesn't change
+ (other than comefrom, which userspace doesn't care
+ about). */
+ countersize = sizeof(struct ipt_counters) * table->private->number;
+ counters = vmalloc(countersize);
+
+ if (counters == NULL)
+ return -ENOMEM;
+
+ /* First, sum counters... */
+ memset(counters, 0, countersize);
+ write_lock_bh(&table->lock);
+ get_counters(table->private, counters);
+ write_unlock_bh(&table->lock);
+
+ /* ... then copy entire thing from CPU 0... */
+ if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
+ ret = -EFAULT;
+ goto free_counters;
+ }
+
+ /* FIXME: use iterator macros --RR */
+ /* ... then go back and fix counters and names */
+ for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
+ unsigned int i;
+ struct ipt_entry_match *m;
+ struct ipt_entry_target *t;
+
+ e = (struct ipt_entry *)(table->private->entries + off);
+ if (copy_to_user(userptr + off
+ + offsetof(struct ipt_entry, counters),
+ &counters[num],
+ sizeof(counters[num])) != 0) {
+ ret = -EFAULT;
+ goto free_counters;
+ }
+
+ for (i = sizeof(struct ipt_entry);
+ i < e->target_offset;
+ i += m->u.match_size) {
+ m = (void *)e + i;
+
+ if (copy_to_user(userptr + off + i
+ + offsetof(struct ipt_entry_match,
+ u.user.name),
+ m->u.kernel.match->name,
+ strlen(m->u.kernel.match->name)+1)
+ != 0) {
+ ret = -EFAULT;
+ goto free_counters;
+ }
+ }
+
+ t = ipt_get_target(e);
+ if (copy_to_user(userptr + off + e->target_offset
+ + offsetof(struct ipt_entry_target,
+ u.user.name),
+ t->u.kernel.target->name,
+ strlen(t->u.kernel.target->name)+1) != 0) {
+ ret = -EFAULT;
+ goto free_counters;
+ }
+ }
+
+ free_counters:
+ vfree(counters);
+ return ret;
+}
+
+static int
+get_entries(const struct ipt_get_entries *entries,
+ struct ipt_get_entries *uptr)
+{
+ int ret;
+ struct ipt_table *t;
+
+ t = find_table_lock(entries->name, &ret, &ipt_mutex);
+ if (t) {
+ duprintf("t->private->number = %u\n",
+ t->private->number);
+ if (entries->size == t->private->size)
+ ret = copy_entries_to_user(t->private->size,
+ t, uptr->entrytable);
+ else {
+ duprintf("get_entries: I've got %u not %u!\n",
+ t->private->size,
+ entries->size);
+ ret = -EINVAL;
+ }
+ up(&ipt_mutex);
+ } else
+ duprintf("get_entries: Can't find %s!\n",
+ entries->name);
+
+ return ret;
+}
+
+static int
+do_replace(void *user, unsigned int len)
+{
+ int ret;
+ struct ipt_replace tmp;
+ struct ipt_table *t;
+ struct ipt_table_info *newinfo, *oldinfo;
+ struct ipt_counters *counters;
+
+ if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+ return -EFAULT;
+
+ /* Hack: Causes ipchains to give correct error msg --RR */
+ if (len != sizeof(tmp) + tmp.size)
+ return -ENOPROTOOPT;
+
+ /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
+ if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
+ return -ENOMEM;
+
+ newinfo = vmalloc(sizeof(struct ipt_table_info)
+ + SMP_ALIGN(tmp.size) * NR_CPUS);
+ if (!newinfo)
+ return -ENOMEM;
+
+ if (copy_from_user(newinfo->entries, user + sizeof(tmp),
+ tmp.size) != 0) {
+ ret = -EFAULT;
+ goto free_newinfo;
+ }
+
+ counters = vmalloc(tmp.num_counters * sizeof(struct ipt_counters));
+ if (!counters) {
+ ret = -ENOMEM;
+ goto free_newinfo;
+ }
+ memset(counters, 0, tmp.num_counters * sizeof(struct ipt_counters));
+
+ ret = translate_table(tmp.name, tmp.valid_hooks,
+ newinfo, tmp.size, tmp.num_entries,
+ tmp.hook_entry, tmp.underflow);
+ if (ret != 0)
+ goto free_newinfo_counters;
+
+ duprintf("ip_tables: Translated table\n");
+
+ t = find_table_lock(tmp.name, &ret, &ipt_mutex);
+ if (!t)
+ goto free_newinfo_counters_untrans;
+
+ /* You lied! */
+ if (tmp.valid_hooks != t->valid_hooks) {
+ duprintf("Valid hook crap: %08X vs %08X\n",
+ tmp.valid_hooks, t->valid_hooks);
+ ret = -EINVAL;
+ goto free_newinfo_counters_untrans_unlock;
+ }
+
+ oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
+ if (!oldinfo)
+ goto free_newinfo_counters_untrans_unlock;
+
+ /* Update module usage count based on number of rules */
+ duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
+ oldinfo->number, oldinfo->initial_entries, newinfo->number);
+ if (t->me && (oldinfo->number <= oldinfo->initial_entries) &&
+ (newinfo->number > oldinfo->initial_entries))
+ __MOD_INC_USE_COUNT(t->me);
+ else if (t->me && (oldinfo->number > oldinfo->initial_entries) &&
+ (newinfo->number <= oldinfo->initial_entries))
+ __MOD_DEC_USE_COUNT(t->me);
+
+ /* Get the old counters. */
+ get_counters(oldinfo, counters);
+ /* Decrease module usage counts and free resource */
+ IPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
+ vfree(oldinfo);
+ /* Silent error: too late now. */
+ copy_to_user(tmp.counters, counters,
+ sizeof(struct ipt_counters) * tmp.num_counters);
+ vfree(counters);
+ up(&ipt_mutex);
+ return 0;
+
+ free_newinfo_counters_untrans_unlock:
+ up(&ipt_mutex);
+ free_newinfo_counters_untrans:
+ IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
+ free_newinfo_counters:
+ vfree(counters);
+ free_newinfo:
+ vfree(newinfo);
+ return ret;
+}
+
+/* We're lazy, and add to the first CPU; overflow works its fey magic
+ * and everything is OK. */
+static inline int
+add_counter_to_entry(struct ipt_entry *e,
+ const struct ipt_counters addme[],
+ unsigned int *i)
+{
+#if 0
+ duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
+ *i,
+ (long unsigned int)e->counters.pcnt,
+ (long unsigned int)e->counters.bcnt,
+ (long unsigned int)addme[*i].pcnt,
+ (long unsigned int)addme[*i].bcnt);
+#endif
+
+ ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+
+ (*i)++;
+ return 0;
+}
+
+static int
+do_add_counters(void *user, unsigned int len)
+{
+ unsigned int i;
+ struct ipt_counters_info tmp, *paddc;
+ struct ipt_table *t;
+ int ret;
+
+ if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+ return -EFAULT;
+
+ if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters))
+ return -EINVAL;
+
+ paddc = vmalloc(len);
+ if (!paddc)
+ return -ENOMEM;
+
+ if (copy_from_user(paddc, user, len) != 0) {
+ ret = -EFAULT;
+ goto free;
+ }
+
+ t = find_table_lock(tmp.name, &ret, &ipt_mutex);
+ if (!t)
+ goto free;
+
+ write_lock_bh(&t->lock);
+ if (t->private->number != paddc->num_counters) {
+ ret = -EINVAL;
+ goto unlock_up_free;
+ }
+
+ i = 0;
+ IPT_ENTRY_ITERATE(t->private->entries,
+ t->private->size,
+ add_counter_to_entry,
+ paddc->counters,
+ &i);
+ unlock_up_free:
+ write_unlock_bh(&t->lock);
+ up(&ipt_mutex);
+ free:
+ vfree(paddc);
+
+ return ret;
+}
+
+static int
+do_ipt_set_ctl(struct sock *sk, int cmd, void *user, unsigned int len)
+{
+ int ret;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ switch (cmd) {
+ case IPT_SO_SET_REPLACE:
+ ret = do_replace(user, len);
+ break;
+
+ case IPT_SO_SET_ADD_COUNTERS:
+ ret = do_add_counters(user, len);
+ break;
+
+ default:
+ duprintf("do_ipt_set_ctl: unknown request %i\n", cmd);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+static int
+do_ipt_get_ctl(struct sock *sk, int cmd, void *user, int *len)
+{
+ int ret;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ switch (cmd) {
+ case IPT_SO_GET_INFO: {
+ char name[IPT_TABLE_MAXNAMELEN];
+ struct ipt_table *t;
+
+ if (*len != sizeof(struct ipt_getinfo)) {
+ duprintf("length %u != %u\n", *len,
+ sizeof(struct ipt_getinfo));
+ ret = -EINVAL;
+ break;
+ }
+
+ if (copy_from_user(name, user, sizeof(name)) != 0) {
+ ret = -EFAULT;
+ break;
+ }
+ name[IPT_TABLE_MAXNAMELEN-1] = '\0';
+ t = find_table_lock(name, &ret, &ipt_mutex);
+ if (t) {
+ struct ipt_getinfo info;
+
+ info.valid_hooks = t->valid_hooks;
+ memcpy(info.hook_entry, t->private->hook_entry,
+ sizeof(info.hook_entry));
+ memcpy(info.underflow, t->private->underflow,
+ sizeof(info.underflow));
+ info.num_entries = t->private->number;
+ info.size = t->private->size;
+ strcpy(info.name, name);
+
+ if (copy_to_user(user, &info, *len) != 0)
+ ret = -EFAULT;
+ else
+ ret = 0;
+
+ up(&ipt_mutex);
+ }
+ }
+ break;
+
+ case IPT_SO_GET_ENTRIES: {
+ struct ipt_get_entries get;
+
+ if (*len < sizeof(get)) {
+ duprintf("get_entries: %u < %u\n", *len, sizeof(get));
+ ret = -EINVAL;
+ } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
+ ret = -EFAULT;
+ } else if (*len != sizeof(struct ipt_get_entries) + get.size) {
+ duprintf("get_entries: %u != %u\n", *len,
+ sizeof(struct ipt_get_entries) + get.size);
+ ret = -EINVAL;
+ } else
+ ret = get_entries(&get, user);
+ break;
+ }
+
+ default:
+ duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+/* Registration hooks for targets. */
+int
+ipt_register_target(struct ipt_target *target)
+{
+ int ret;
+
+ MOD_INC_USE_COUNT;
+ ret = down_interruptible(&ipt_mutex);
+ if (ret != 0) {
+ MOD_DEC_USE_COUNT;
+ return ret;
+ }
+ if (!list_named_insert(&ipt_target, target)) {
+ duprintf("ipt_register_target: `%s' already in list!\n",
+ target->name);
+ ret = -EINVAL;
+ MOD_DEC_USE_COUNT;
+ }
+ up(&ipt_mutex);
+ return ret;
+}
+
+void
+ipt_unregister_target(struct ipt_target *target)
+{
+ down(&ipt_mutex);
+ LIST_DELETE(&ipt_target, target);
+ up(&ipt_mutex);
+ MOD_DEC_USE_COUNT;
+}
+
+int
+ipt_register_match(struct ipt_match *match)
+{
+ int ret;
+
+ MOD_INC_USE_COUNT;
+ ret = down_interruptible(&ipt_mutex);
+ if (ret != 0) {
+ MOD_DEC_USE_COUNT;
+ return ret;
+ }
+ if (!list_named_insert(&ipt_match, match)) {
+ duprintf("ipt_register_match: `%s' already in list!\n",
+ match->name);
+ MOD_DEC_USE_COUNT;
+ ret = -EINVAL;
+ }
+ up(&ipt_mutex);
+
+ return ret;
+}
+
+void
+ipt_unregister_match(struct ipt_match *match)
+{
+ down(&ipt_mutex);
+ LIST_DELETE(&ipt_match, match);
+ up(&ipt_mutex);
+ MOD_DEC_USE_COUNT;
+}
+
+int ipt_register_table(struct ipt_table *table)
+{
+ int ret;
+ struct ipt_table_info *newinfo;
+ static struct ipt_table_info bootstrap
+ = { 0, 0, 0, { 0 }, { 0 }, { } };
+
+ MOD_INC_USE_COUNT;
+ newinfo = vmalloc(sizeof(struct ipt_table_info)
+ + SMP_ALIGN(table->table->size) * NR_CPUS);
+ if (!newinfo) {
+ ret = -ENOMEM;
+ MOD_DEC_USE_COUNT;
+ return ret;
+ }
+ memcpy(newinfo->entries, table->table->entries, table->table->size);
+
+ ret = translate_table(table->name, table->valid_hooks,
+ newinfo, table->table->size,
+ table->table->num_entries,
+ table->table->hook_entry,
+ table->table->underflow);
+ if (ret != 0) {
+ vfree(newinfo);
+ MOD_DEC_USE_COUNT;
+ return ret;
+ }
+
+ ret = down_interruptible(&ipt_mutex);
+ if (ret != 0) {
+ vfree(newinfo);
+ MOD_DEC_USE_COUNT;
+ return ret;
+ }
+
+ /* Don't autoload: we'd eat our tail... */
+ if (list_named_find(&ipt_tables, table->name)) {
+ ret = -EEXIST;
+ goto free_unlock;
+ }
+
+ /* Simplifies replace_table code. */
+ table->private = &bootstrap;
+ if (!replace_table(table, 0, newinfo, &ret))
+ goto free_unlock;
+
+ duprintf("table->private->number = %u\n",
+ table->private->number);
+
+ /* save number of initial entries */
+ table->private->initial_entries = table->private->number;
+
+ table->lock = RW_LOCK_UNLOCKED;
+ list_prepend(&ipt_tables, table);
+
+ unlock:
+ up(&ipt_mutex);
+ return ret;
+
+ free_unlock:
+ vfree(newinfo);
+ MOD_DEC_USE_COUNT;
+ goto unlock;
+}
+
+void ipt_unregister_table(struct ipt_table *table)
+{
+ down(&ipt_mutex);
+ LIST_DELETE(&ipt_tables, table);
+ up(&ipt_mutex);
+
+ /* Decrease module usage counts and free resources */
+ IPT_ENTRY_ITERATE(table->private->entries, table->private->size,
+ cleanup_entry, NULL);
+ vfree(table->private);
+ MOD_DEC_USE_COUNT;
+}
+
+/* Returns 1 if the port is matched by the range, 0 otherwise */
+static inline int
+port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
+{
+ int ret;
+
+ ret = (port >= min && port <= max) ^ invert;
+ return ret;
+}
+
+static int
+tcp_find_option(u_int8_t option,
+ const struct tcphdr *tcp,
+ u_int16_t datalen,
+ int invert,
+ int *hotdrop)
+{
+ unsigned int i = sizeof(struct tcphdr);
+ const u_int8_t *opt = (u_int8_t *)tcp;
+
+ duprintf("tcp_match: finding option\n");
+ /* If we don't have the whole header, drop packet. */
+ if (tcp->doff * 4 > datalen) {
+ *hotdrop = 1;
+ return 0;
+ }
+
+ while (i < tcp->doff * 4) {
+ if (opt[i] == option) return !invert;
+ if (opt[i] < 2) i++;
+ else i += opt[i+1]?:1;
+ }
+
+ return invert;
+}
+
+static int
+tcp_match(const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const void *matchinfo,
+ int offset,
+ const void *hdr,
+ u_int16_t datalen,
+ int *hotdrop)
+{
+ const struct tcphdr *tcp = hdr;
+ const struct ipt_tcp *tcpinfo = matchinfo;
+
+ /* To quote Alan:
+
+ Don't allow a fragment of TCP 8 bytes in. Nobody normal
+ causes this. Its a cracker trying to break in by doing a
+ flag overwrite to pass the direction checks.
+ */
+
+ if (offset == 1) {
+ duprintf("Dropping evil TCP offset=1 frag.\n");
+ *hotdrop = 1;
+ return 0;
+ } else if (offset == 0 && datalen < sizeof(struct tcphdr)) {
+ /* We've been asked to examine this packet, and we
+ can't. Hence, no choice but to drop. */
+ duprintf("Dropping evil TCP offset=0 tinygram.\n");
+ *hotdrop = 1;
+ return 0;
+ }
+
+ /* FIXME: Try tcp doff >> packet len against various stacks --RR */
+
+#define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
+
+ /* Must not be a fragment. */
+ return !offset
+ && port_match(tcpinfo->spts[0], tcpinfo->spts[1],
+ ntohs(tcp->source),
+ !!(tcpinfo->invflags & IPT_TCP_INV_SRCPT))
+ && port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
+ ntohs(tcp->dest),
+ !!(tcpinfo->invflags & IPT_TCP_INV_DSTPT))
+ && FWINVTCP((((unsigned char *)tcp)[13]
+ & tcpinfo->flg_mask)
+ == tcpinfo->flg_cmp,
+ IPT_TCP_INV_FLAGS)
+ && (!tcpinfo->option
+ || tcp_find_option(tcpinfo->option, tcp, datalen,
+ tcpinfo->invflags
+ & IPT_TCP_INV_OPTION,
+ hotdrop));
+}
+
+/* Called when user tries to insert an entry of this type. */
+static int
+tcp_checkentry(const char *tablename,
+ const struct ipt_ip *ip,
+ void *matchinfo,
+ unsigned int matchsize,
+ unsigned int hook_mask)
+{
+ const struct ipt_tcp *tcpinfo = matchinfo;
+
+ /* Must specify proto == TCP, and no unknown invflags */
+ return ip->proto == IPPROTO_TCP
+ && !(ip->invflags & IPT_INV_PROTO)
+ && matchsize == IPT_ALIGN(sizeof(struct ipt_tcp))
+ && !(tcpinfo->invflags & ~IPT_TCP_INV_MASK);
+}
+
+static int
+udp_match(const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const void *matchinfo,
+ int offset,
+ const void *hdr,
+ u_int16_t datalen,
+ int *hotdrop)
+{
+ const struct udphdr *udp = hdr;
+ const struct ipt_udp *udpinfo = matchinfo;
+
+ if (offset == 0 && datalen < sizeof(struct udphdr)) {
+ /* We've been asked to examine this packet, and we
+ can't. Hence, no choice but to drop. */
+ duprintf("Dropping evil UDP tinygram.\n");
+ *hotdrop = 1;
+ return 0;
+ }
+
+ /* Must not be a fragment. */
+ return !offset
+ && port_match(udpinfo->spts[0], udpinfo->spts[1],
+ ntohs(udp->source),
+ !!(udpinfo->invflags & IPT_UDP_INV_SRCPT))
+ && port_match(udpinfo->dpts[0], udpinfo->dpts[1],
+ ntohs(udp->dest),
+ !!(udpinfo->invflags & IPT_UDP_INV_DSTPT));
+}
+
+/* Called when user tries to insert an entry of this type. */
+static int
+udp_checkentry(const char *tablename,
+ const struct ipt_ip *ip,
+ void *matchinfo,
+ unsigned int matchinfosize,
+ unsigned int hook_mask)
+{
+ const struct ipt_udp *udpinfo = matchinfo;
+
+ /* Must specify proto == UDP, and no unknown invflags */
+ if (ip->proto != IPPROTO_UDP || (ip->invflags & IPT_INV_PROTO)) {
+ duprintf("ipt_udp: Protocol %u != %u\n", ip->proto,
+ IPPROTO_UDP);
+ return 0;
+ }
+ if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_udp))) {
+ duprintf("ipt_udp: matchsize %u != %u\n",
+ matchinfosize, IPT_ALIGN(sizeof(struct ipt_udp)));
+ return 0;
+ }
+ if (udpinfo->invflags & ~IPT_UDP_INV_MASK) {
+ duprintf("ipt_udp: unknown flags %X\n",
+ udpinfo->invflags);
+ return 0;
+ }
+
+ return 1;
+}
+
+/* Returns 1 if the type and code is matched by the range, 0 otherwise */
+static inline int
+icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
+ u_int8_t type, u_int8_t code,
+ int invert)
+{
+ return (type == test_type && code >= min_code && code <= max_code)
+ ^ invert;
+}
+
+static int
+icmp_match(const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const void *matchinfo,
+ int offset,
+ const void *hdr,
+ u_int16_t datalen,
+ int *hotdrop)
+{
+ const struct icmphdr *icmp = hdr;
+ const struct ipt_icmp *icmpinfo = matchinfo;
+
+ if (offset == 0 && datalen < 2) {
+ /* We've been asked to examine this packet, and we
+ can't. Hence, no choice but to drop. */
+ duprintf("Dropping evil ICMP tinygram.\n");
+ *hotdrop = 1;
+ return 0;
+ }
+
+ /* Must not be a fragment. */
+ return !offset
+ && icmp_type_code_match(icmpinfo->type,
+ icmpinfo->code[0],
+ icmpinfo->code[1],
+ icmp->type, icmp->code,
+ !!(icmpinfo->invflags&IPT_ICMP_INV));
+}
+
+/* Called when user tries to insert an entry of this type. */
+static int
+icmp_checkentry(const char *tablename,
+ const struct ipt_ip *ip,
+ void *matchinfo,
+ unsigned int matchsize,
+ unsigned int hook_mask)
+{
+ const struct ipt_icmp *icmpinfo = matchinfo;
+
+ /* Must specify proto == ICMP, and no unknown invflags */
+ return ip->proto == IPPROTO_ICMP
+ && !(ip->invflags & IPT_INV_PROTO)
+ && matchsize == IPT_ALIGN(sizeof(struct ipt_icmp))
+ && !(icmpinfo->invflags & ~IPT_ICMP_INV);
+}
+
+/* The built-in targets: standard (NULL) and error. */
+static struct ipt_target ipt_standard_target
+= { { NULL, NULL }, IPT_STANDARD_TARGET, NULL, NULL, NULL };
+static struct ipt_target ipt_error_target
+= { { NULL, NULL }, IPT_ERROR_TARGET, ipt_error, NULL, NULL };
+
+static struct nf_sockopt_ops ipt_sockopts
+= { { NULL, NULL }, PF_INET, IPT_BASE_CTL, IPT_SO_SET_MAX+1, do_ipt_set_ctl,
+ IPT_BASE_CTL, IPT_SO_GET_MAX+1, do_ipt_get_ctl, 0, NULL };
+
+static struct ipt_match tcp_matchstruct
+= { { NULL, NULL }, "tcp", &tcp_match, &tcp_checkentry, NULL };
+static struct ipt_match udp_matchstruct
+= { { NULL, NULL }, "udp", &udp_match, &udp_checkentry, NULL };
+static struct ipt_match icmp_matchstruct
+= { { NULL, NULL }, "icmp", &icmp_match, &icmp_checkentry, NULL };
+
+#ifdef CONFIG_PROC_FS
+static inline int print_name(const struct ipt_table *t,
+ off_t start_offset, char *buffer, int length,
+ off_t *pos, unsigned int *count)
+{
+ if ((*count)++ >= start_offset) {
+ unsigned int namelen;
+
+ namelen = sprintf(buffer + *pos, "%s\n", t->name);
+ if (*pos + namelen > length) {
+ /* Stop iterating */
+ return 1;
+ }
+ *pos += namelen;
+ }
+ return 0;
+}
+
+static int ipt_get_tables(char *buffer, char **start, off_t offset, int length)
+{
+ off_t pos = 0;
+ unsigned int count = 0;
+
+ if (down_interruptible(&ipt_mutex) != 0)
+ return 0;
+
+ LIST_FIND(&ipt_tables, print_name, struct ipt_table *,
+ offset, buffer, length, &pos, &count);
+
+ up(&ipt_mutex);
+
+ /* `start' hack - see fs/proc/generic.c line ~105 */
+ *start=(char *)((unsigned long)count-offset);
+ return pos;
+}
+#endif /*CONFIG_PROC_FS*/
+
+static int __init init(void)
+{
+ int ret;
+
+ /* Noone else will be downing sem now, so we won't sleep */
+ down(&ipt_mutex);
+ list_append(&ipt_target, &ipt_standard_target);
+ list_append(&ipt_target, &ipt_error_target);
+ list_append(&ipt_match, &tcp_matchstruct);
+ list_append(&ipt_match, &udp_matchstruct);
+ list_append(&ipt_match, &icmp_matchstruct);
+ up(&ipt_mutex);
+
+ /* Register setsockopt */
+ ret = nf_register_sockopt(&ipt_sockopts);
+ if (ret < 0) {
+ duprintf("Unable to register sockopts.\n");
+ return ret;
+ }
+
+#ifdef CONFIG_PROC_FS
+ {
+ struct proc_dir_entry *proc;
+
+ proc = proc_net_create("ip_tables_names", 0, ipt_get_tables);
+ if (!proc) {
+ nf_unregister_sockopt(&ipt_sockopts);
+ return -ENOMEM;
+ }
+ proc->owner = THIS_MODULE;
+ }
+#endif
+
+ printk("ip_tables: (C) 2000-2002 Netfilter core team\n");
+ return 0;
+}
+
+static void __exit fini(void)
+{
+ nf_unregister_sockopt(&ipt_sockopts);
+#ifdef CONFIG_PROC_FS
+ proc_net_remove("ip_tables_names");
+#endif
+}
+
+EXPORT_SYMBOL(ipt_register_table);
+EXPORT_SYMBOL(ipt_unregister_table);
+EXPORT_SYMBOL(ipt_register_match);
+EXPORT_SYMBOL(ipt_unregister_match);
+EXPORT_SYMBOL(ipt_do_table);
+EXPORT_SYMBOL(ipt_register_target);
+EXPORT_SYMBOL(ipt_unregister_target);
+
+module_init(init);
+module_exit(fini);
+MODULE_LICENSE("GPL");
diff --git a/br-nf-bds/linux2.5/net/ipv4/netfilter/ipt_LOG.c b/br-nf-bds/linux2.5/net/ipv4/netfilter/ipt_LOG.c
new file mode 100644
index 0000000..48bb12f
--- /dev/null
+++ b/br-nf-bds/linux2.5/net/ipv4/netfilter/ipt_LOG.c
@@ -0,0 +1,363 @@
+/*
+ * This is a module which is used for logging packets.
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/spinlock.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/tcp.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+
+struct in_device;
+#include <net/route.h>
+#include <linux/netfilter_ipv4/ipt_LOG.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+struct esphdr {
+ __u32 spi;
+}; /* FIXME evil kludge */
+
+/* Use lock to serialize, so printks don't overlap */
+static spinlock_t log_lock = SPIN_LOCK_UNLOCKED;
+
+/* One level of recursion won't kill us */
+static void dump_packet(const struct ipt_log_info *info,
+ struct iphdr *iph, unsigned int len, int recurse)
+{
+ void *protoh = (u_int32_t *)iph + iph->ihl;
+ unsigned int datalen = len - iph->ihl * 4;
+
+ /* Important fields:
+ * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */
+ /* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */
+ printk("SRC=%u.%u.%u.%u DST=%u.%u.%u.%u ",
+ NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));
+
+ /* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */
+ printk("LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ",
+ ntohs(iph->tot_len), iph->tos & IPTOS_TOS_MASK,
+ iph->tos & IPTOS_PREC_MASK, iph->ttl, ntohs(iph->id));
+
+ /* Max length: 6 "CE DF MF " */
+ if (ntohs(iph->frag_off) & IP_CE)
+ printk("CE ");
+ if (ntohs(iph->frag_off) & IP_DF)
+ printk("DF ");
+ if (ntohs(iph->frag_off) & IP_MF)
+ printk("MF ");
+
+ /* Max length: 11 "FRAG:65535 " */
+ if (ntohs(iph->frag_off) & IP_OFFSET)
+ printk("FRAG:%u ", ntohs(iph->frag_off) & IP_OFFSET);
+
+ if ((info->logflags & IPT_LOG_IPOPT)
+ && iph->ihl * 4 != sizeof(struct iphdr)) {
+ unsigned int i;
+
+ /* Max length: 127 "OPT (" 15*4*2chars ") " */
+ printk("OPT (");
+ for (i = sizeof(struct iphdr); i < iph->ihl * 4; i++)
+ printk("%02X", ((u_int8_t *)iph)[i]);
+ printk(") ");
+ }
+
+ switch (iph->protocol) {
+ case IPPROTO_TCP: {
+ struct tcphdr *tcph = protoh;
+
+ /* Max length: 10 "PROTO=TCP " */
+ printk("PROTO=TCP ");
+
+ if (ntohs(iph->frag_off) & IP_OFFSET)
+ break;
+
+ /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ if (datalen < sizeof (*tcph)) {
+ printk("INCOMPLETE [%u bytes] ", datalen);
+ break;
+ }
+
+ /* Max length: 20 "SPT=65535 DPT=65535 " */
+ printk("SPT=%u DPT=%u ",
+ ntohs(tcph->source), ntohs(tcph->dest));
+ /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
+ if (info->logflags & IPT_LOG_TCPSEQ)
+ printk("SEQ=%u ACK=%u ",
+ ntohl(tcph->seq), ntohl(tcph->ack_seq));
+ /* Max length: 13 "WINDOW=65535 " */
+ printk("WINDOW=%u ", ntohs(tcph->window));
+ /* Max length: 9 "RES=0x3F " */
+ printk("RES=0x%02x ", (u_int8_t)(ntohl(tcp_flag_word(tcph) & TCP_RESERVED_BITS) >> 22));
+ /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */
+ if (tcph->cwr)
+ printk("CWR ");
+ if (tcph->ece)
+ printk("ECE ");
+ if (tcph->urg)
+ printk("URG ");
+ if (tcph->ack)
+ printk("ACK ");
+ if (tcph->psh)
+ printk("PSH ");
+ if (tcph->rst)
+ printk("RST ");
+ if (tcph->syn)
+ printk("SYN ");
+ if (tcph->fin)
+ printk("FIN ");
+ /* Max length: 11 "URGP=65535 " */
+ printk("URGP=%u ", ntohs(tcph->urg_ptr));
+
+ if ((info->logflags & IPT_LOG_TCPOPT)
+ && tcph->doff * 4 != sizeof(struct tcphdr)) {
+ unsigned int i;
+
+ /* Max length: 127 "OPT (" 15*4*2chars ") " */
+ printk("OPT (");
+ for (i =sizeof(struct tcphdr); i < tcph->doff * 4; i++)
+ printk("%02X", ((u_int8_t *)tcph)[i]);
+ printk(") ");
+ }
+ break;
+ }
+ case IPPROTO_UDP: {
+ struct udphdr *udph = protoh;
+
+ /* Max length: 10 "PROTO=UDP " */
+ printk("PROTO=UDP ");
+
+ if (ntohs(iph->frag_off) & IP_OFFSET)
+ break;
+
+ /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ if (datalen < sizeof (*udph)) {
+ printk("INCOMPLETE [%u bytes] ", datalen);
+ break;
+ }
+
+ /* Max length: 20 "SPT=65535 DPT=65535 " */
+ printk("SPT=%u DPT=%u LEN=%u ",
+ ntohs(udph->source), ntohs(udph->dest),
+ ntohs(udph->len));
+ break;
+ }
+ case IPPROTO_ICMP: {
+ struct icmphdr *icmph = protoh;
+ static size_t required_len[NR_ICMP_TYPES+1]
+ = { [ICMP_ECHOREPLY] = 4,
+ [ICMP_DEST_UNREACH]
+ = 8 + sizeof(struct iphdr) + 8,
+ [ICMP_SOURCE_QUENCH]
+ = 8 + sizeof(struct iphdr) + 8,
+ [ICMP_REDIRECT]
+ = 8 + sizeof(struct iphdr) + 8,
+ [ICMP_ECHO] = 4,
+ [ICMP_TIME_EXCEEDED]
+ = 8 + sizeof(struct iphdr) + 8,
+ [ICMP_PARAMETERPROB]
+ = 8 + sizeof(struct iphdr) + 8,
+ [ICMP_TIMESTAMP] = 20,
+ [ICMP_TIMESTAMPREPLY] = 20,
+ [ICMP_ADDRESS] = 12,
+ [ICMP_ADDRESSREPLY] = 12 };
+
+ /* Max length: 11 "PROTO=ICMP " */
+ printk("PROTO=ICMP ");
+
+ if (ntohs(iph->frag_off) & IP_OFFSET)
+ break;
+
+ /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ if (datalen < 4) {
+ printk("INCOMPLETE [%u bytes] ", datalen);
+ break;
+ }
+
+ /* Max length: 18 "TYPE=255 CODE=255 " */
+ printk("TYPE=%u CODE=%u ", icmph->type, icmph->code);
+
+ /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ if (icmph->type <= NR_ICMP_TYPES
+ && required_len[icmph->type]
+ && datalen < required_len[icmph->type]) {
+ printk("INCOMPLETE [%u bytes] ", datalen);
+ break;
+ }
+
+ switch (icmph->type) {
+ case ICMP_ECHOREPLY:
+ case ICMP_ECHO:
+ /* Max length: 19 "ID=65535 SEQ=65535 " */
+ printk("ID=%u SEQ=%u ",
+ ntohs(icmph->un.echo.id),
+ ntohs(icmph->un.echo.sequence));
+ break;
+
+ case ICMP_PARAMETERPROB:
+ /* Max length: 14 "PARAMETER=255 " */
+ printk("PARAMETER=%u ",
+ ntohl(icmph->un.gateway) >> 24);
+ break;
+ case ICMP_REDIRECT:
+ /* Max length: 24 "GATEWAY=255.255.255.255 " */
+ printk("GATEWAY=%u.%u.%u.%u ", NIPQUAD(icmph->un.gateway));
+ /* Fall through */
+ case ICMP_DEST_UNREACH:
+ case ICMP_SOURCE_QUENCH:
+ case ICMP_TIME_EXCEEDED:
+ /* Max length: 3+maxlen */
+ if (recurse) {
+ printk("[");
+ dump_packet(info,
+ (struct iphdr *)(icmph + 1),
+ datalen-sizeof(struct icmphdr),
+ 0);
+ printk("] ");
+ }
+
+ /* Max length: 10 "MTU=65535 " */
+ if (icmph->type == ICMP_DEST_UNREACH
+ && icmph->code == ICMP_FRAG_NEEDED)
+ printk("MTU=%u ", ntohs(icmph->un.frag.mtu));
+ }
+ break;
+ }
+ /* Max Length */
+ case IPPROTO_AH:
+ case IPPROTO_ESP: {
+ struct esphdr *esph = protoh;
+ int esp= (iph->protocol==IPPROTO_ESP);
+
+ /* Max length: 10 "PROTO=ESP " */
+ printk("PROTO=%s ",esp? "ESP" : "AH");
+
+ if (ntohs(iph->frag_off) & IP_OFFSET)
+ break;
+
+ /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ if (datalen < sizeof (*esph)) {
+ printk("INCOMPLETE [%u bytes] ", datalen);
+ break;
+ }
+
+ /* Length: 15 "SPI=0xF1234567 " */
+ printk("SPI=0x%x ", ntohl(esph->spi) );
+ break;
+ }
+ /* Max length: 10 "PROTO 255 " */
+ default:
+ printk("PROTO=%u ", iph->protocol);
+ }
+
+ /* Proto Max log string length */
+ /* IP: 40+46+6+11+127 = 230 */
+ /* TCP: 10+max(25,20+30+13+9+32+11+127) = 252 */
+ /* UDP: 10+max(25,20) = 35 */
+ /* ICMP: 11+max(25, 18+25+max(19,14,24+3+n+10,3+n+10)) = 91+n */
+ /* ESP: 10+max(25)+15 = 50 */
+ /* AH: 9+max(25)+15 = 49 */
+ /* unknown: 10 */
+
+ /* (ICMP allows recursion one level deep) */
+ /* maxlen = IP + ICMP + IP + max(TCP,UDP,ICMP,unknown) */
+ /* maxlen = 230+ 91 + 230 + 252 = 803 */
+}
+
+static unsigned int
+ipt_log_target(struct sk_buff **pskb,
+ unsigned int hooknum,
+ const struct net_device *in,
+ const struct net_device *out,
+ const void *targinfo,
+ void *userinfo)
+{
+ struct iphdr *iph = (*pskb)->nh.iph;
+ const struct ipt_log_info *loginfo = targinfo;
+ char level_string[4] = "< >";
+
+ level_string[1] = '0' + (loginfo->level % 8);
+ spin_lock_bh(&log_lock);
+ printk(level_string);
+ printk("%sIN=%s ", loginfo->prefix, in ? in->name : "");
+ if ((*pskb)->physindev && in != (*pskb)->physindev)
+ printk("PHYSIN=%s ", (*pskb)->physindev->name);
+ printk("OUT=%s ", out ? out->name : "");
+ if ((*pskb)->physoutdev && out != (*pskb)->physoutdev)
+ printk("PHYSOUT=%s ", (*pskb)->physoutdev->name);
+
+ if (in && !out) {
+ /* MAC logging for input chain only. */
+ printk("MAC=");
+ if ((*pskb)->dev && (*pskb)->dev->hard_header_len && (*pskb)->mac.raw != (void*)iph) {
+ int i;
+ unsigned char *p = (*pskb)->mac.raw;
+ for (i = 0; i < (*pskb)->dev->hard_header_len; i++,p++)
+ printk("%02x%c", *p,
+ i==(*pskb)->dev->hard_header_len - 1
+ ? ' ':':');
+ } else
+ printk(" ");
+ }
+
+ dump_packet(loginfo, iph, (*pskb)->len, 1);
+ printk("\n");
+ spin_unlock_bh(&log_lock);
+
+ return IPT_CONTINUE;
+}
+
+static int ipt_log_checkentry(const char *tablename,
+ const struct ipt_entry *e,
+ void *targinfo,
+ unsigned int targinfosize,
+ unsigned int hook_mask)
+{
+ const struct ipt_log_info *loginfo = targinfo;
+
+ if (targinfosize != IPT_ALIGN(sizeof(struct ipt_log_info))) {
+ DEBUGP("LOG: targinfosize %u != %u\n",
+ targinfosize, IPT_ALIGN(sizeof(struct ipt_log_info)));
+ return 0;
+ }
+
+ if (loginfo->level >= 8) {
+ DEBUGP("LOG: level %u >= 8\n", loginfo->level);
+ return 0;
+ }
+
+ if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') {
+ DEBUGP("LOG: prefix term %i\n",
+ loginfo->prefix[sizeof(loginfo->prefix)-1]);
+ return 0;
+ }
+
+ return 1;
+}
+
+static struct ipt_target ipt_log_reg
+= { { NULL, NULL }, "LOG", ipt_log_target, ipt_log_checkentry, NULL,
+ THIS_MODULE };
+
+static int __init init(void)
+{
+ if (ipt_register_target(&ipt_log_reg))
+ return -EINVAL;
+
+ return 0;
+}
+
+static void __exit fini(void)
+{
+ ipt_unregister_target(&ipt_log_reg);
+}
+
+module_init(init);
+module_exit(fini);
+MODULE_LICENSE("GPL");