/* netfilter.c: look after the filters for various protocols. * Heavily influenced by the old firewall.c by David Bonn and Alan Cox. * * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any * way. * * Rusty Russell (C)2000 -- This code is GPL. * * February 2000: Modified by James Morris to have 1 queue per protocol. * 15-Mar-2000: Added NF_REPEAT --RR. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define __KERNEL_SYSCALLS__ #include /* In this code, we can be waiting indefinitely for userspace to * service a packet if a hook returns NF_QUEUE. We could keep a count * of skbuffs queued for userspace, and not deregister a hook unless * this is zero, but that sucks. Now, we simply check when the * packets come back: if the hook is gone, the packet is discarded. */ #ifdef CONFIG_NETFILTER_DEBUG #define NFDEBUG(format, args...) printk(format , ## args) #else #define NFDEBUG(format, args...) #endif /* Sockopts only registered and called from user context, so BR_NETPROTO_LOCK would be overkill. Also, [gs]etsockopt calls may sleep. */ static DECLARE_MUTEX(nf_sockopt_mutex); struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS]; static LIST_HEAD(nf_sockopts); /* * A queue handler may be registered for each protocol. Each is protected by * long term mutex. The handler must provide an an outfn() to accept packets * for queueing and must reinject all packets it receives, no matter what. */ static struct nf_queue_handler_t { nf_queue_outfn_t outfn; void *data; } queue_handler[NPROTO]; int nf_register_hook(struct nf_hook_ops *reg) { struct list_head *i; br_write_lock_bh(BR_NETPROTO_LOCK); for (i = nf_hooks[reg->pf][reg->hooknum].next; i != &nf_hooks[reg->pf][reg->hooknum]; i = i->next) { if (reg->priority < ((struct nf_hook_ops *)i)->priority) break; } list_add(®->list, i->prev); br_write_unlock_bh(BR_NETPROTO_LOCK); return 0; } void nf_unregister_hook(struct nf_hook_ops *reg) { br_write_lock_bh(BR_NETPROTO_LOCK); list_del(®->list); br_write_unlock_bh(BR_NETPROTO_LOCK); } /* Do exclusive ranges overlap? */ static inline int overlap(int min1, int max1, int min2, int max2) { return max1 > min2 && min1 < max2; } /* Functions to register sockopt ranges (exclusive). */ int nf_register_sockopt(struct nf_sockopt_ops *reg) { struct list_head *i; int ret = 0; if (down_interruptible(&nf_sockopt_mutex) != 0) return -EINTR; for (i = nf_sockopts.next; i != &nf_sockopts; i = i->next) { struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i; if (ops->pf == reg->pf && (overlap(ops->set_optmin, ops->set_optmax, reg->set_optmin, reg->set_optmax) || overlap(ops->get_optmin, ops->get_optmax, reg->get_optmin, reg->get_optmax))) { NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n", ops->set_optmin, ops->set_optmax, ops->get_optmin, ops->get_optmax, reg->set_optmin, reg->set_optmax, reg->get_optmin, reg->get_optmax); ret = -EBUSY; goto out; } } list_add(®->list, &nf_sockopts); out: up(&nf_sockopt_mutex); return ret; } void nf_unregister_sockopt(struct nf_sockopt_ops *reg) { /* No point being interruptible: we're probably in cleanup_module() */ restart: down(&nf_sockopt_mutex); if (reg->use != 0) { /* To be woken by nf_sockopt call... */ /* FIXME: Stuart Young's name appears gratuitously. */ set_current_state(TASK_UNINTERRUPTIBLE); reg->cleanup_task = current; up(&nf_sockopt_mutex); schedule(); goto restart; } list_del(®->list); up(&nf_sockopt_mutex); } #ifdef CONFIG_NETFILTER_DEBUG #include #include #include #include static void debug_print_hooks_ip(unsigned int nf_debug) { if (nf_debug & (1 << NF_IP_PRE_ROUTING)) { printk("PRE_ROUTING "); nf_debug ^= (1 << NF_IP_PRE_ROUTING); } if (nf_debug & (1 << NF_IP_LOCAL_IN)) { printk("LOCAL_IN "); nf_debug ^= (1 << NF_IP_LOCAL_IN); } if (nf_debug & (1 << NF_IP_FORWARD)) { printk("FORWARD "); nf_debug ^= (1 << NF_IP_FORWARD); } if (nf_debug & (1 << NF_IP_LOCAL_OUT)) { printk("LOCAL_OUT "); nf_debug ^= (1 << NF_IP_LOCAL_OUT); } if (nf_debug & (1 << NF_IP_POST_ROUTING)) { printk("POST_ROUTING "); nf_debug ^= (1 << NF_IP_POST_ROUTING); } if (nf_debug) printk("Crap bits: 0x%04X", nf_debug); printk("\n"); } void nf_dump_skb(int pf, struct sk_buff *skb) { printk("skb: pf=%i %s dev=%s len=%u\n", pf, skb->sk ? "(owned)" : "(unowned)", skb->dev ? skb->dev->name : "(no dev)", skb->len); switch (pf) { case PF_INET: { const struct iphdr *ip = skb->nh.iph; __u32 *opt = (__u32 *) (ip + 1); int opti; __u16 src_port = 0, dst_port = 0; if (ip->protocol == IPPROTO_TCP || ip->protocol == IPPROTO_UDP) { struct tcphdr *tcp=(struct tcphdr *)((__u32 *)ip+ip->ihl); src_port = ntohs(tcp->source); dst_port = ntohs(tcp->dest); } printk("PROTO=%d %u.%u.%u.%u:%hu %u.%u.%u.%u:%hu" " L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu", ip->protocol, NIPQUAD(ip->saddr), src_port, NIPQUAD(ip->daddr), dst_port, ntohs(ip->tot_len), ip->tos, ntohs(ip->id), ntohs(ip->frag_off), ip->ttl); for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++) printk(" O=0x%8.8X", *opt++); printk("\n"); } } } void nf_debug_ip_local_deliver(struct sk_buff *skb) { /* If it's a loopback packet, it must have come through * NF_IP_LOCAL_OUT, NF_IP_RAW_INPUT, NF_IP_PRE_ROUTING and * NF_IP_LOCAL_IN. Otherwise, must have gone through * NF_IP_RAW_INPUT and NF_IP_PRE_ROUTING. */ if (!skb->dev) { printk("ip_local_deliver: skb->dev is NULL.\n"); } else if (strcmp(skb->dev->name, "lo") == 0) { if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_POST_ROUTING) | (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_IN))) { printk("ip_local_deliver: bad loopback skb: "); debug_print_hooks_ip(skb->nf_debug); nf_dump_skb(PF_INET, skb); } } else { if (skb->nf_debug != ((1<nf_debug); nf_dump_skb(PF_INET, skb); } } } void nf_debug_ip_loopback_xmit(struct sk_buff *newskb) { if (newskb->nf_debug != ((1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_POST_ROUTING))) { printk("ip_dev_loopback_xmit: bad owned skb = %p: ", newskb); debug_print_hooks_ip(newskb->nf_debug); nf_dump_skb(PF_INET, newskb); } /* Clear to avoid confusing input check */ newskb->nf_debug = 0; } void nf_debug_ip_finish_output2(struct sk_buff *skb) { /* If it's owned, it must have gone through the * NF_IP_LOCAL_OUT and NF_IP_POST_ROUTING. * Otherwise, must have gone through * NF_IP_PRE_ROUTING, NF_IP_FORWARD and NF_IP_POST_ROUTING. */ if (skb->sk) { if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_POST_ROUTING))) { printk("ip_finish_output: bad owned skb = %p: ", skb); debug_print_hooks_ip(skb->nf_debug); nf_dump_skb(PF_INET, skb); } } else { if (skb->nf_debug != ((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_FORWARD) | (1 << NF_IP_POST_ROUTING))) { /* Fragments, entunnelled packets, TCP RSTs generated by ipt_REJECT will have no owners, but still may be local */ if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_POST_ROUTING))){ printk("ip_finish_output:" " bad unowned skb = %p: ",skb); debug_print_hooks_ip(skb->nf_debug); nf_dump_skb(PF_INET, skb); } } } } #endif /*CONFIG_NETFILTER_DEBUG*/ /* Call get/setsockopt() */ static int nf_sockopt(struct sock *sk, int pf, int val, char *opt, int *len, int get) { struct list_head *i; struct nf_sockopt_ops *ops; int ret; if (down_interruptible(&nf_sockopt_mutex) != 0) return -EINTR; for (i = nf_sockopts.next; i != &nf_sockopts; i = i->next) { ops = (struct nf_sockopt_ops *)i; if (ops->pf == pf) { if (get) { if (val >= ops->get_optmin && val < ops->get_optmax) { ops->use++; up(&nf_sockopt_mutex); ret = ops->get(sk, val, opt, len); goto out; } } else { if (val >= ops->set_optmin && val < ops->set_optmax) { ops->use++; up(&nf_sockopt_mutex); ret = ops->set(sk, val, opt, *len); goto out; } } } } up(&nf_sockopt_mutex); return -ENOPROTOOPT; out: down(&nf_sockopt_mutex); ops->use--; if (ops->cleanup_task) wake_up_process(ops->cleanup_task); up(&nf_sockopt_mutex); return ret; } int nf_setsockopt(struct sock *sk, int pf, int val, char *opt, int len) { return nf_sockopt(sk, pf, val, opt, &len, 0); } int nf_getsockopt(struct sock *sk, int pf, int val, char *opt, int *len) { return nf_sockopt(sk, pf, val, opt, len, 1); } static unsigned int nf_iterate(struct list_head *head, struct sk_buff **skb, int hook, const struct net_device *indev, const struct net_device *outdev, struct list_head **i, int (*okfn)(struct sk_buff *), int hook_thresh) { for (*i = (*i)->next; *i != head; *i = (*i)->next) { struct nf_hook_ops *elem = (struct nf_hook_ops *)*i; if (hook_thresh > elem->priority) continue; switch (elem->hook(hook, skb, indev, outdev, okfn)) { case NF_QUEUE: return NF_QUEUE; case NF_STOLEN: return NF_STOLEN; case NF_DROP: return NF_DROP; case NF_REPEAT: *i = (*i)->prev; break; #ifdef CONFIG_NETFILTER_DEBUG case NF_ACCEPT: break; default: NFDEBUG("Evil return from %p(%u).\n", elem->hook, hook); #endif } } return NF_ACCEPT; } int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data) { int ret; br_write_lock_bh(BR_NETPROTO_LOCK); if (queue_handler[pf].outfn) ret = -EBUSY; else { queue_handler[pf].outfn = outfn; queue_handler[pf].data = data; ret = 0; } br_write_unlock_bh(BR_NETPROTO_LOCK); return ret; } /* The caller must flush their queue before this */ int nf_unregister_queue_handler(int pf) { br_write_lock_bh(BR_NETPROTO_LOCK); queue_handler[pf].outfn = NULL; queue_handler[pf].data = NULL; br_write_unlock_bh(BR_NETPROTO_LOCK); return 0; } /* * Any packet that leaves via this function must come back * through nf_reinject(). */ static void nf_queue(struct sk_buff *skb, struct list_head *elem, int pf, unsigned int hook, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *)) { int status; struct nf_info *info; struct net_device *physindev = NULL; struct net_device *physoutdev = NULL; if (!queue_handler[pf].outfn) { kfree_skb(skb); return; } info = kmalloc(sizeof(*info), GFP_ATOMIC); if (!info) { if (net_ratelimit()) printk(KERN_ERR "OOM queueing packet %p\n", skb); kfree_skb(skb); return; } *info = (struct nf_info) { (struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn }; /* Bump dev refs so they don't vanish while packet is out */ if (indev) dev_hold(indev); if (outdev) dev_hold(outdev); if (skb->nf_bridge) { physindev = skb->nf_bridge->physindev; if (physindev) dev_hold(physindev); physoutdev = skb->nf_bridge->physoutdev; if (physoutdev) dev_hold(physoutdev); } status = queue_handler[pf].outfn(skb, info, queue_handler[pf].data); if (status < 0) { /* James M doesn't say fuck enough. */ if (indev) dev_put(indev); if (outdev) dev_put(outdev); if (physindev) dev_put(physindev); if (physoutdev) dev_put(physoutdev); kfree(info); kfree_skb(skb); return; } } int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *), int hook_thresh) { struct list_head *elem; unsigned int verdict; int ret = 0; /* This stopgap cannot be removed until all the hooks are audited. */ if (skb_is_nonlinear(skb) && skb_linearize(skb, GFP_ATOMIC) != 0) { kfree_skb(skb); return -ENOMEM; } if (skb->ip_summed == CHECKSUM_HW) { if (outdev == NULL) { skb->ip_summed = CHECKSUM_NONE; } else { skb_checksum_help(skb); } } /* We may already have this, but read-locks nest anyway */ br_read_lock_bh(BR_NETPROTO_LOCK); #ifdef CONFIG_NETFILTER_DEBUG if (skb->nf_debug & (1 << hook)) { printk("nf_hook: hook %i already set.\n", hook); nf_dump_skb(pf, skb); } skb->nf_debug |= (1 << hook); #endif elem = &nf_hooks[pf][hook]; verdict = nf_iterate(&nf_hooks[pf][hook], &skb, hook, indev, outdev, &elem, okfn, hook_thresh); if (verdict == NF_QUEUE) { NFDEBUG("nf_hook: Verdict = QUEUE.\n"); nf_queue(skb, elem, pf, hook, indev, outdev, okfn); } switch (verdict) { case NF_ACCEPT: ret = okfn(skb); break; case NF_DROP: kfree_skb(skb); ret = -EPERM; break; } br_read_unlock_bh(BR_NETPROTO_LOCK); return ret; } void nf_reinject(struct sk_buff *skb, struct nf_info *info, unsigned int verdict) { struct list_head *elem = &info->elem->list; struct list_head *i; /* We don't have BR_NETPROTO_LOCK here */ br_read_lock_bh(BR_NETPROTO_LOCK); for (i = nf_hooks[info->pf][info->hook].next; i != elem; i = i->next) { if (i == &nf_hooks[info->pf][info->hook]) { /* The module which sent it to userspace is gone. */ NFDEBUG("%s: module disappeared, dropping packet.\n", __FUNCTION__); verdict = NF_DROP; break; } } /* Continue traversal iff userspace said ok... */ if (verdict == NF_REPEAT) { elem = elem->prev; verdict = NF_ACCEPT; } if (verdict == NF_ACCEPT) { verdict = nf_iterate(&nf_hooks[info->pf][info->hook], &skb, info->hook, info->indev, info->outdev, &elem, info->okfn, INT_MIN); } switch (verdict) { case NF_ACCEPT: info->okfn(skb); break; case NF_QUEUE: nf_queue(skb, elem, info->pf, info->hook, info->indev, info->outdev, info->okfn); break; case NF_DROP: kfree_skb(skb); break; } br_read_unlock_bh(BR_NETPROTO_LOCK); /* Release those devices we held, or Alexey will kill me. */ if (info->indev) dev_put(info->indev); if (info->outdev) dev_put(info->outdev); kfree(info); return; } #ifdef CONFIG_INET /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ int ip_route_me_harder(struct sk_buff **pskb) { struct iphdr *iph = (*pskb)->nh.iph; struct rtable *rt; struct flowi fl = { .nl_u = { .ip4_u = { .daddr = iph->daddr, .saddr = iph->saddr, .tos = RT_TOS(iph->tos)|RTO_CONN, #ifdef CONFIG_IP_ROUTE_FWMARK .fwmark = (*pskb)->nfmark #endif } }, .oif = (*pskb)->sk ? (*pskb)->sk->bound_dev_if : 0, }; struct net_device *dev_src = NULL; int err; /* accomodate ip_route_output_slow(), which expects the key src to be 0 or a local address; however some non-standard hacks like ipt_REJECT.c:send_reset() can cause packets with foreign saddr to be appear on the NF_IP_LOCAL_OUT hook -MB */ if(fl.fl4_src && !(dev_src = ip_dev_find(fl.fl4_src))) fl.fl4_src = 0; if ((err=ip_route_output_key(&rt, &fl)) != 0) { printk("route_me_harder: ip_route_output_key(dst=%u.%u.%u.%u, src=%u.%u.%u.%u, oif=%d, tos=0x%x, fwmark=0x%lx) error %d\n", NIPQUAD(iph->daddr), NIPQUAD(iph->saddr), (*pskb)->sk ? (*pskb)->sk->bound_dev_if : 0, RT_TOS(iph->tos)|RTO_CONN, #ifdef CONFIG_IP_ROUTE_FWMARK (*pskb)->nfmark, #else 0UL, #endif err); goto out; } /* Drop old route. */ dst_release((*pskb)->dst); (*pskb)->dst = &rt->u.dst; /* Change in oif may mean change in hh_len. */ if (skb_headroom(*pskb) < (*pskb)->dst->dev->hard_header_len) { struct sk_buff *nskb; nskb = skb_realloc_headroom(*pskb, (*pskb)->dst->dev->hard_header_len); if (!nskb) { err = -ENOMEM; goto out; } if ((*pskb)->sk) skb_set_owner_w(nskb, (*pskb)->sk); kfree_skb(*pskb); *pskb = nskb; } out: if (dev_src) dev_put(dev_src); return err; } #endif /*CONFIG_INET*/ /* This does not belong here, but ipt_REJECT needs it if connection tracking in use: without this, connection may not be in hash table, and hence manufactured ICMP or RST packets will not be associated with it. */ void (*ip_ct_attach)(struct sk_buff *, struct nf_ct_info *); void __init netfilter_init(void) { int i, h; for (i = 0; i < NPROTO; i++) { for (h = 0; h < NF_MAX_HOOKS; h++) INIT_LIST_HEAD(&nf_hooks[i][h]); } }