summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--br-nf-bds/linux2.5/include/linux/skbuff.h9
-rw-r--r--br-nf-bds/linux2.5/net/core/netfilter.c18
-rw-r--r--br-nf-bds/linux2.5/net/ipv4/ip_output.c1223
3 files changed, 743 insertions, 507 deletions
diff --git a/br-nf-bds/linux2.5/include/linux/skbuff.h b/br-nf-bds/linux2.5/include/linux/skbuff.h
index 0a43a95..cefecda 100644
--- a/br-nf-bds/linux2.5/include/linux/skbuff.h
+++ b/br-nf-bds/linux2.5/include/linux/skbuff.h
@@ -775,6 +775,15 @@ static inline int skb_headlen(const struct sk_buff *skb)
return skb->len - skb->data_len;
}
+static inline int skb_pagelen(const struct sk_buff *skb)
+{
+ int i, len = 0;
+
+ for (i = (int)skb_shinfo(skb)->nr_frags - 1; i >= 0; i--)
+ len += skb_shinfo(skb)->frags[i].size;
+ return len + skb_headlen(skb);
+}
+
#define SKB_PAGE_ASSERT(skb) do { if (skb_shinfo(skb)->nr_frags) \
BUG(); } while (0)
#define SKB_FRAG_ASSERT(skb) do { if (skb_shinfo(skb)->frag_list) \
diff --git a/br-nf-bds/linux2.5/net/core/netfilter.c b/br-nf-bds/linux2.5/net/core/netfilter.c
index 00ea7f9..f5a5af3 100644
--- a/br-nf-bds/linux2.5/net/core/netfilter.c
+++ b/br-nf-bds/linux2.5/net/core/netfilter.c
@@ -580,13 +580,15 @@ int ip_route_me_harder(struct sk_buff **pskb)
{
struct iphdr *iph = (*pskb)->nh.iph;
struct rtable *rt;
- struct rt_key key = { dst:iph->daddr,
- src:iph->saddr,
- oif:(*pskb)->sk ? (*pskb)->sk->bound_dev_if : 0,
- tos:RT_TOS(iph->tos)|RTO_CONN,
+ struct flowi fl = { .nl_u = { .ip4_u =
+ { .daddr = iph->daddr,
+ .saddr = iph->saddr,
+ .tos = RT_TOS(iph->tos)|RTO_CONN,
#ifdef CONFIG_IP_ROUTE_FWMARK
- fwmark:(*pskb)->nfmark
+ .fwmark = (*pskb)->nfmark
#endif
+ } },
+ .oif = (*pskb)->sk ? (*pskb)->sk->bound_dev_if : 0,
};
struct net_device *dev_src = NULL;
int err;
@@ -595,10 +597,10 @@ int ip_route_me_harder(struct sk_buff **pskb)
0 or a local address; however some non-standard hacks like
ipt_REJECT.c:send_reset() can cause packets with foreign
saddr to be appear on the NF_IP_LOCAL_OUT hook -MB */
- if(key.src && !(dev_src = ip_dev_find(key.src)))
- key.src = 0;
+ if(fl.fl4_src && !(dev_src = ip_dev_find(fl.fl4_src)))
+ fl.fl4_src = 0;
- if ((err=ip_route_output_key(&rt, &key)) != 0) {
+ if ((err=ip_route_output_key(&rt, &fl)) != 0) {
printk("route_me_harder: ip_route_output_key(dst=%u.%u.%u.%u, src=%u.%u.%u.%u, oif=%d, tos=0x%x, fwmark=0x%lx) error %d\n",
NIPQUAD(iph->daddr), NIPQUAD(iph->saddr),
(*pskb)->sk ? (*pskb)->sk->bound_dev_if : 0,
diff --git a/br-nf-bds/linux2.5/net/ipv4/ip_output.c b/br-nf-bds/linux2.5/net/ipv4/ip_output.c
index c103c48..d41926e 100644
--- a/br-nf-bds/linux2.5/net/ipv4/ip_output.c
+++ b/br-nf-bds/linux2.5/net/ipv4/ip_output.c
@@ -5,7 +5,7 @@
*
* The Internet Protocol (IP) output module.
*
- * Version: $Id: ip_output.c,v 1.6 2002/10/21 17:28:24 bdschuym Exp $
+ * Version: $Id: ip_output.c,v 1.7 2002/10/21 17:45:17 bdschuym Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -15,6 +15,7 @@
* Stefan Becker, <stefanb@yello.ping.de>
* Jorge Cwik, <jorge@laser.satlink.net>
* Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ * Hirokazu Takahashi, <taka@valinux.co.jp>
*
* See ip_input.c for original log
*
@@ -38,6 +39,9 @@
* Marc Boucher : When call_out_firewall returns FW_QUEUE,
* silently drop skb instead of failing with -EPERM.
* Detlev Wengorz : Copy protocol for fragments.
+ * Hirokazu Takahashi: HW checksumming for outgoing UDP
+ * datagrams.
+ * Hirokazu Takahashi: sendfile() on UDP works now.
*/
#include <asm/uaccess.h>
@@ -108,16 +112,9 @@ static int ip_dev_loopback_xmit(struct sk_buff *newskb)
return 0;
}
-/* Don't just hand NF_HOOK skb->dst->output, in case netfilter hook
- changes route */
-static inline int
-output_maybe_reroute(struct sk_buff *skb)
-{
- return skb->dst->output(skb);
-}
-
/*
* Add an ip header to a skbuff and send it out.
+ *
*/
int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
u32 saddr, u32 daddr, struct ip_options *opt)
@@ -153,15 +150,34 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
}
ip_send_check(iph);
+ skb->priority = sk->priority;
+
/* Send it out. */
return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
- output_maybe_reroute);
+ dst_output);
}
static inline int ip_finish_output2(struct sk_buff *skb)
{
struct dst_entry *dst = skb->dst;
struct hh_cache *hh = dst->hh;
+ struct net_device *dev = dst->dev;
+
+ /* Be paranoid, rather than too clever. */
+ if (unlikely(skb_headroom(skb) < dev->hard_header_len
+ && dev->hard_header)) {
+ struct sk_buff *skb2;
+
+ skb2 = skb_realloc_headroom(skb, (dev->hard_header_len&~15) + 16);
+ if (skb2 == NULL) {
+ kfree_skb(skb);
+ return -ENOMEM;
+ }
+ if (skb->sk)
+ skb_set_owner_w(skb2, skb->sk);
+ kfree_skb(skb);
+ skb = skb2;
+ }
#ifdef CONFIG_NETFILTER_DEBUG
nf_debug_ip_finish_output2(skb);
@@ -203,10 +219,6 @@ int ip_mc_output(struct sk_buff *skb)
* If the indicated interface is up and running, send the packet.
*/
IP_INC_STATS(IpOutRequests);
-#ifdef CONFIG_IP_ROUTE_NAT
- if (rt->rt_flags & RTCF_NAT)
- ip_do_nat(skb);
-#endif
skb->dev = dev;
skb->protocol = htons(ETH_P_IP);
@@ -251,100 +263,21 @@ int ip_mc_output(struct sk_buff *skb)
newskb->dev, ip_dev_loopback_xmit);
}
- return ip_finish_output(skb);
+ if (skb->len > dev->mtu || skb_shinfo(skb)->frag_list)
+ return ip_fragment(skb, ip_finish_output);
+ else
+ return ip_finish_output(skb);
}
int ip_output(struct sk_buff *skb)
{
-#ifdef CONFIG_IP_ROUTE_NAT
- struct rtable *rt = (struct rtable*)skb->dst;
-#endif
-
IP_INC_STATS(IpOutRequests);
-#ifdef CONFIG_IP_ROUTE_NAT
- if (rt->rt_flags&RTCF_NAT)
- ip_do_nat(skb);
-#endif
-
- return ip_finish_output(skb);
-}
-
-/* Queues a packet to be sent, and starts the transmitter if necessary.
- * This routine also needs to put in the total length and compute the
- * checksum. We use to do this in two stages, ip_build_header() then
- * this, but that scheme created a mess when routes disappeared etc.
- * So we do it all here, and the TCP send engine has been changed to
- * match. (No more unroutable FIN disasters, etc. wheee...) This will
- * most likely make other reliable transport layers above IP easier
- * to implement under Linux.
- */
-static inline int ip_queue_xmit2(struct sk_buff *skb)
-{
- struct sock *sk = skb->sk;
- struct rtable *rt = (struct rtable *)skb->dst;
- struct net_device *dev;
- struct iphdr *iph = skb->nh.iph;
-
- dev = rt->u.dst.dev;
-
- /* This can happen when the transport layer has segments queued
- * with a cached route, and by the time we get here things are
- * re-routed to a device with a different MTU than the original
- * device. Sick, but we must cover it.
- */
- if (skb_headroom(skb) < dev->hard_header_len && dev->hard_header) {
- struct sk_buff *skb2;
-
- skb2 = skb_realloc_headroom(skb, (dev->hard_header_len + 15) & ~15);
- kfree_skb(skb);
- if (skb2 == NULL)
- return -ENOMEM;
- if (sk)
- skb_set_owner_w(skb2, sk);
- skb = skb2;
- iph = skb->nh.iph;
- }
-
- if (skb->len > rt->u.dst.pmtu) {
- unsigned int hlen;
- if (!(sk->route_caps&NETIF_F_TSO))
- goto fragment;
-
- /* Hack zone: all this must be done by TCP. */
- hlen = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2));
- skb_shinfo(skb)->tso_size = rt->u.dst.pmtu - hlen;
- skb_shinfo(skb)->tso_segs =
- (skb->len - hlen + skb_shinfo(skb)->tso_size - 1)/
- skb_shinfo(skb)->tso_size - 1;
- }
-
- ip_select_ident_more(iph, &rt->u.dst, sk, skb_shinfo(skb)->tso_segs);
-
- /* Add an IP checksum. */
- ip_send_check(iph);
-
- skb->priority = sk->priority;
- return skb->dst->output(skb);
-
-fragment:
- if (ip_dont_fragment(sk, &rt->u.dst)) {
- /* Reject packet ONLY if TCP might fragment
- * it itself, if were careful enough.
- */
- NETDEBUG(printk(KERN_DEBUG "sending pkt_too_big (len[%u] pmtu[%u]) to self\n",
- skb->len, rt->u.dst.pmtu));
-
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
- htonl(rt->u.dst.pmtu));
- kfree_skb(skb);
- return -EMSGSIZE;
- }
- ip_select_ident(iph, &rt->u.dst, sk);
- if (skb->ip_summed == CHECKSUM_HW &&
- (skb = skb_checksum_help(skb)) == NULL)
- return -ENOMEM;
- return ip_fragment(skb, skb->dst->output);
+ if ((skb->len > skb->dst->dev->mtu || skb_shinfo(skb)->frag_list) &&
+ !skb_shinfo(skb)->tso_size)
+ return ip_fragment(skb, ip_finish_output);
+ else
+ return ip_finish_output(skb);
}
int ip_queue_xmit(struct sk_buff *skb)
@@ -372,14 +305,20 @@ int ip_queue_xmit(struct sk_buff *skb)
if(opt && opt->srr)
daddr = opt->faddr;
- /* If this fails, retransmit mechanism of transport layer will
- * keep trying until route appears or the connection times itself
- * out.
- */
- if (ip_route_output(&rt, daddr, inet->saddr,
- RT_CONN_FLAGS(sk),
- sk->bound_dev_if))
- goto no_route;
+ {
+ struct flowi fl = { .nl_u = { .ip4_u =
+ { .daddr = daddr,
+ .saddr = inet->saddr,
+ .tos = RT_CONN_FLAGS(sk) } },
+ .oif = sk->bound_dev_if };
+
+ /* If this fails, retransmit mechanism of transport layer will
+ * keep trying until route appears or the connection times itself
+ * out.
+ */
+ if (ip_route_output_key(&rt, &fl))
+ goto no_route;
+ }
__sk_dst_set(sk, &rt->u.dst);
tcp_v4_setup_caps(sk, &rt->u.dst);
}
@@ -409,348 +348,60 @@ packet_routed:
ip_options_build(skb, opt, inet->daddr, rt, 0);
}
- return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
- ip_queue_xmit2);
-
-no_route:
- IP_INC_STATS(IpOutNoRoutes);
- kfree_skb(skb);
- return -EHOSTUNREACH;
-}
-
-/*
- * Build and send a packet, with as little as one copy
- *
- * Doesn't care much about ip options... option length can be
- * different for fragment at 0 and other fragments.
- *
- * Note that the fragment at the highest offset is sent first,
- * so the getfrag routine can fill in the TCP/UDP checksum header
- * field in the last fragment it sends... actually it also helps
- * the reassemblers, they can put most packets in at the head of
- * the fragment queue, and they know the total size in advance. This
- * last feature will measurably improve the Linux fragment handler one
- * day.
- *
- * The callback has five args, an arbitrary pointer (copy of frag),
- * the source IP address (may depend on the routing table), the
- * destination address (char *), the offset to copy from, and the
- * length to be copied.
- */
-
-static int ip_build_xmit_slow(struct sock *sk,
- int getfrag (const void *,
- char *,
- unsigned int,
- unsigned int),
- const void *frag,
- unsigned length,
- struct ipcm_cookie *ipc,
- struct rtable *rt,
- int flags)
-{
- struct inet_opt *inet = inet_sk(sk);
- unsigned int fraglen, maxfraglen, fragheaderlen;
- int err;
- int offset, mf;
- int mtu;
- u16 id;
-
- int hh_len = (rt->u.dst.dev->hard_header_len + 15)&~15;
- int nfrags=0;
- struct ip_options *opt = ipc->opt;
- int df = 0;
-
- mtu = rt->u.dst.pmtu;
- if (ip_dont_fragment(sk, &rt->u.dst))
- df = htons(IP_DF);
-
- length -= sizeof(struct iphdr);
-
- if (opt) {
- fragheaderlen = sizeof(struct iphdr) + opt->optlen;
- maxfraglen = ((mtu-sizeof(struct iphdr)-opt->optlen) & ~7) + fragheaderlen;
- } else {
- fragheaderlen = sizeof(struct iphdr);
-
- /*
- * Fragheaderlen is the size of 'overhead' on each buffer. Now work
- * out the size of the frames to send.
- */
-
- maxfraglen = ((mtu-sizeof(struct iphdr)) & ~7) + fragheaderlen;
- }
-
- if (length + fragheaderlen > 0xFFFF) {
- ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu);
- return -EMSGSIZE;
- }
-
- /*
- * Start at the end of the frame by handling the remainder.
- */
-
- offset = length - (length % (maxfraglen - fragheaderlen));
-
- /*
- * Amount of memory to allocate for final fragment.
- */
-
- fraglen = length - offset + fragheaderlen;
-
- if (length-offset==0) {
- fraglen = maxfraglen;
- offset -= maxfraglen-fragheaderlen;
- }
-
- /*
- * The last fragment will not have MF (more fragments) set.
- */
-
- mf = 0;
-
- /*
- * Don't fragment packets for path mtu discovery.
- */
+ if (skb->len > rt->u.dst.pmtu && (sk->route_caps&NETIF_F_TSO)) {
+ unsigned int hlen;
- if (offset > 0 && inet->pmtudisc == IP_PMTUDISC_DO) {
- ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu);
- return -EMSGSIZE;
+ /* Hack zone: all this must be done by TCP. */
+ hlen = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2));
+ skb_shinfo(skb)->tso_size = rt->u.dst.pmtu - hlen;
+ skb_shinfo(skb)->tso_segs =
+ (skb->len - hlen + skb_shinfo(skb)->tso_size - 1)/
+ skb_shinfo(skb)->tso_size - 1;
}
- if (flags&MSG_PROBE)
- goto out;
-
- /*
- * Begin outputting the bytes.
- */
-
- id = inet->id++;
-
- do {
- char *data;
- struct sk_buff * skb;
-
- /*
- * Get the memory we require with some space left for alignment.
- */
- if (!(flags & MSG_DONTWAIT) || nfrags == 0) {
- skb = sock_alloc_send_skb(sk, fraglen + hh_len + 15,
- (flags & MSG_DONTWAIT), &err);
- } else {
- /* On a non-blocking write, we check for send buffer
- * usage on the first fragment only.
- */
- skb = sock_wmalloc(sk, fraglen + hh_len + 15, 1,
- sk->allocation);
- if (!skb)
- err = -ENOBUFS;
- }
- if (skb == NULL)
- goto error;
-
- /*
- * Fill in the control structures
- */
-
- skb->priority = sk->priority;
- skb->dst = dst_clone(&rt->u.dst);
- skb_reserve(skb, hh_len);
-
- /*
- * Find where to start putting bytes.
- */
-
- data = skb_put(skb, fraglen);
- skb->nh.iph = (struct iphdr *)data;
-
- /*
- * Only write IP header onto non-raw packets
- */
-
- {
- struct iphdr *iph = (struct iphdr *)data;
-
- iph->version = 4;
- iph->ihl = 5;
- if (opt) {
- iph->ihl += opt->optlen>>2;
- ip_options_build(skb, opt,
- ipc->addr, rt, offset);
- }
- iph->tos = inet->tos;
- iph->tot_len = htons(fraglen - fragheaderlen + iph->ihl*4);
- iph->frag_off = htons(offset>>3)|mf|df;
- iph->id = id;
- if (!mf) {
- if (offset || !df) {
- /* Select an unpredictable ident only
- * for packets without DF or having
- * been fragmented.
- */
- __ip_select_ident(iph, &rt->u.dst, 0);
- id = iph->id;
- }
-
- /*
- * Any further fragments will have MF set.
- */
- mf = htons(IP_MF);
- }
- if (rt->rt_type == RTN_MULTICAST)
- iph->ttl = inet->mc_ttl;
- else
- iph->ttl = inet->ttl;
- iph->protocol = sk->protocol;
- iph->check = 0;
- iph->saddr = rt->rt_src;
- iph->daddr = rt->rt_dst;
- iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
- data += iph->ihl*4;
- }
-
- /*
- * User data callback
- */
-
- if (getfrag(frag, data, offset, fraglen-fragheaderlen)) {
- err = -EFAULT;
- kfree_skb(skb);
- goto error;
- }
- offset -= (maxfraglen-fragheaderlen);
- fraglen = maxfraglen;
+ ip_select_ident_more(iph, &rt->u.dst, sk, skb_shinfo(skb)->tso_segs);
- nfrags++;
+ /* Add an IP checksum. */
+ ip_send_check(iph);
- err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL,
- skb->dst->dev, output_maybe_reroute);
- if (err) {
- if (err > 0)
- err = inet->recverr ? net_xmit_errno(err) : 0;
- if (err)
- goto error;
- }
- } while (offset >= 0);
+ skb->priority = sk->priority;
- if (nfrags>1)
- ip_statistics[smp_processor_id()*2 + !in_softirq()].IpFragCreates += nfrags;
-out:
- return 0;
+ return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
+ dst_output);
-error:
- IP_INC_STATS(IpOutDiscards);
- if (nfrags>1)
- ip_statistics[smp_processor_id()*2 + !in_softirq()].IpFragCreates += nfrags;
- return err;
+no_route:
+ IP_INC_STATS(IpOutNoRoutes);
+ kfree_skb(skb);
+ return -EHOSTUNREACH;
}
-/*
- * Fast path for unfragmented packets.
- */
-int ip_build_xmit(struct sock *sk,
- int getfrag (const void *,
- char *,
- unsigned int,
- unsigned int),
- const void *frag,
- unsigned length,
- struct ipcm_cookie *ipc,
- struct rtable *rt,
- int flags)
-{
- struct inet_opt *inet = inet_sk(sk);
- int err;
- struct sk_buff *skb;
- int df;
- struct iphdr *iph;
-
- /*
- * Try the simple case first. This leaves fragmented frames, and by
- * choice RAW frames within 20 bytes of maximum size(rare) to the long path
- */
-
- if (!inet->hdrincl) {
- length += sizeof(struct iphdr);
-
- /*
- * Check for slow path.
- */
- if (length > rt->u.dst.pmtu || ipc->opt != NULL)
- return ip_build_xmit_slow(sk,getfrag,frag,length,ipc,rt,flags);
- } else {
- if (length > rt->u.dst.dev->mtu) {
- ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport,
- rt->u.dst.dev->mtu);
- return -EMSGSIZE;
- }
- }
- if (flags&MSG_PROBE)
- goto out;
- /*
- * Do path mtu discovery if needed.
- */
- df = 0;
- if (ip_dont_fragment(sk, &rt->u.dst))
- df = htons(IP_DF);
-
- /*
- * Fast path for unfragmented frames without options.
- */
- {
- int hh_len = (rt->u.dst.dev->hard_header_len + 15)&~15;
-
- skb = sock_alloc_send_skb(sk, length+hh_len+15,
- flags&MSG_DONTWAIT, &err);
- if(skb==NULL)
- goto error;
- skb_reserve(skb, hh_len);
- }
-
- skb->priority = sk->priority;
- skb->dst = dst_clone(&rt->u.dst);
-
- skb->nh.iph = iph = (struct iphdr *)skb_put(skb, length);
-
- if (!inet->hdrincl) {
- iph->version=4;
- iph->ihl=5;
- iph->tos = inet->tos;
- iph->tot_len = htons(length);
- iph->frag_off = df;
- iph->ttl = inet->mc_ttl;
- ip_select_ident(iph, &rt->u.dst, sk);
- if (rt->rt_type != RTN_MULTICAST)
- iph->ttl = inet->ttl;
- iph->protocol=sk->protocol;
- iph->saddr=rt->rt_src;
- iph->daddr=rt->rt_dst;
- iph->check=0;
- iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
- err = getfrag(frag, ((char *)iph)+iph->ihl*4,0, length-iph->ihl*4);
- }
- else
- err = getfrag(frag, (void *)iph, 0, length);
-
- if (err)
- goto error_fault;
+static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
+{
+ to->pkt_type = from->pkt_type;
+ to->priority = from->priority;
+ to->protocol = from->protocol;
+ to->security = from->security;
+ to->dst = dst_clone(from->dst);
+ to->dev = from->dev;
- err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
- output_maybe_reroute);
- if (err > 0)
- err = inet->recverr ? net_xmit_errno(err) : 0;
- if (err)
- goto error;
-out:
- return 0;
+ /* Copy the flags to each fragment. */
+ IPCB(to)->flags = IPCB(from)->flags;
-error_fault:
- err = -EFAULT;
- kfree_skb(skb);
-error:
- IP_INC_STATS(IpOutDiscards);
- return err;
+#ifdef CONFIG_NET_SCHED
+ to->tc_index = from->tc_index;
+#endif
+#ifdef CONFIG_NETFILTER
+ to->nfmark = from->nfmark;
+ /* Connection association is same as pre-frag packet */
+ to->nfct = from->nfct;
+ nf_conntrack_get(to->nfct);
+ to->nf_bridge = from->nf_bridge;
+ nf_bridge_get(to->nf_bridge);
+#ifdef CONFIG_NETFILTER_DEBUG
+ to->nf_debug = from->nf_debug;
+#endif
+#endif
}
/*
@@ -758,8 +409,6 @@ error:
* smaller pieces (each of size equal to IP header plus
* a block of the data of the original IP data part) that will yet fit in a
* single device frame, and queue such a frame for sending.
- *
- * Yes this is inefficient, feel free to submit a quicker one.
*/
int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
@@ -783,13 +432,111 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
iph = skb->nh.iph;
+ if (unlikely(iph->frag_off & htons(IP_DF))) {
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+ htonl(rt->u.dst.pmtu));
+ kfree_skb(skb);
+ return -EMSGSIZE;
+ }
+
/*
* Setup starting values.
*/
hlen = iph->ihl * 4;
- left = skb->len - hlen; /* Space per frame */
mtu = rt->u.dst.pmtu - hlen; /* Size of data space */
+
+ /* When frag_list is given, use it. First, check its validity:
+ * some transformers could create wrong frag_list or break existing
+ * one, it is not prohibited. In this case fall back to copying.
+ *
+ * LATER: this step can be merged to real generation of fragments,
+ * we can switch to copy when see the first bad fragment.
+ */
+ if (skb_shinfo(skb)->frag_list) {
+ struct sk_buff *frag;
+ int first_len = skb_pagelen(skb);
+
+ if (first_len - hlen > mtu ||
+ ((first_len - hlen) & 7) ||
+ (iph->frag_off & htons(IP_MF|IP_OFFSET)) ||
+ skb_cloned(skb))
+ goto slow_path;
+
+ for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
+ /* Correct geometry. */
+ if (frag->len > mtu ||
+ ((frag->len & 7) && frag->next) ||
+ skb_headroom(frag) < hlen)
+ goto slow_path;
+
+ /* Correct socket ownership. */
+ if (frag->sk == NULL)
+ goto slow_path;
+
+ /* Partially cloned skb? */
+ if (skb_shared(frag))
+ goto slow_path;
+ }
+
+ /* Everything is OK. Generate! */
+
+ err = 0;
+ offset = 0;
+ frag = skb_shinfo(skb)->frag_list;
+ skb_shinfo(skb)->frag_list = 0;
+ skb->data_len = first_len - skb_headlen(skb);
+ skb->len = first_len;
+ iph->tot_len = htons(first_len);
+ iph->frag_off |= htons(IP_MF);
+ ip_send_check(iph);
+
+ for (;;) {
+ /* Prepare header of the next frame,
+ * before previous one went down. */
+ if (frag) {
+ frag->h.raw = frag->data;
+ frag->nh.raw = __skb_push(frag, hlen);
+ memcpy(frag->nh.raw, iph, hlen);
+ iph = frag->nh.iph;
+ iph->tot_len = htons(frag->len);
+ ip_copy_metadata(frag, skb);
+ if (offset == 0)
+ ip_options_fragment(frag);
+ offset += skb->len - hlen;
+ iph->frag_off = htons(offset>>3);
+ if (frag->next != NULL)
+ iph->frag_off |= htons(IP_MF);
+ /* Ready, complete checksum */
+ ip_send_check(iph);
+ }
+
+ err = output(skb);
+
+ if (err || !frag)
+ break;
+
+ skb = frag;
+ frag = skb->next;
+ skb->next = NULL;
+ }
+
+ if (err == 0) {
+ IP_INC_STATS(IpFragOKs);
+ return 0;
+ }
+
+ while (frag) {
+ skb = frag->next;
+ kfree_skb(frag);
+ frag = skb;
+ }
+ IP_INC_STATS(IpFragFails);
+ return err;
+ }
+
+slow_path:
+ left = skb->len - hlen; /* Space per frame */
ptr = raw + hlen; /* Where to start from */
/*
@@ -817,7 +564,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
* Allocate buffer.
*/
- if ((skb2 = alloc_skb(len+hlen+dev->hard_header_len+15,GFP_ATOMIC)) == NULL) {
+ if ((skb2 = alloc_skb(len+hlen+rt->u.dst.dev->hard_header_len+16,GFP_ATOMIC)) == NULL) {
NETDEBUG(printk(KERN_INFO "IP: frag: no memory for new fragment!\n"));
err = -ENOMEM;
goto fail;
@@ -827,14 +574,11 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
* Set up data on packet
*/
- skb2->pkt_type = skb->pkt_type;
- skb2->priority = skb->priority;
- skb_reserve(skb2, (dev->hard_header_len+15)&~15);
+ ip_copy_metadata(skb2, skb);
+ skb_reserve(skb2, (rt->u.dst.dev->hard_header_len&~15)+16);
skb_put(skb2, len + hlen);
skb2->nh.raw = skb2->data;
skb2->h.raw = skb2->data + hlen;
- skb2->protocol = skb->protocol;
- skb2->security = skb->security;
/*
* Charge the memory for the fragment to any owner
@@ -843,8 +587,6 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
if (skb->sk)
skb_set_owner_w(skb2, skb->sk);
- skb2->dst = dst_clone(skb->dst);
- skb2->dev = skb->dev;
/*
* Copy the packet header into the new buffer.
@@ -874,9 +616,6 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
if (offset == 0)
ip_options_fragment(skb);
- /* Copy the flags to each fragment. */
- IPCB(skb2)->flags = IPCB(skb)->flags;
-
/*
* Added AC : If we are fragmenting a fragment that's not the
* last fragment then keep MF on each bit
@@ -886,21 +625,6 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
ptr += len;
offset += len;
-#ifdef CONFIG_NET_SCHED
- skb2->tc_index = skb->tc_index;
-#endif
-#ifdef CONFIG_NETFILTER
- skb2->nfmark = skb->nfmark;
- /* Connection association is same as pre-frag packet */
- skb2->nfct = skb->nfct;
- nf_conntrack_get(skb2->nfct);
- skb2->nf_bridge = skb->nf_bridge;
- nf_bridge_get(skb2->nf_bridge);
-#ifdef CONFIG_NETFILTER_DEBUG
- skb2->nf_debug = skb->nf_debug;
-#endif
-#endif
-
/*
* Put this fragment into the sending queue.
*/
@@ -925,40 +649,525 @@ fail:
return err;
}
+int
+ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
+{
+ struct iovec *iov = from;
+
+ if (skb->ip_summed == CHECKSUM_HW) {
+ if (memcpy_fromiovecend(to, iov, offset, len) < 0)
+ return -EFAULT;
+ } else {
+ unsigned int csum = 0;
+ if (csum_partial_copy_fromiovecend(to, iov, offset, len, &csum) < 0)
+ return -EFAULT;
+ skb->csum = csum_block_add(skb->csum, csum, odd);
+ }
+ return 0;
+}
+
+static inline int
+skb_can_coalesce(struct sk_buff *skb, int i, struct page *page, int off)
+{
+ if (i) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
+ return page == frag->page &&
+ off == frag->page_offset+frag->size;
+ }
+ return 0;
+}
+
+static inline void
+skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, int off, int size)
+{
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ frag->page = page;
+ frag->page_offset = off;
+ frag->size = size;
+ skb_shinfo(skb)->nr_frags = i+1;
+}
+
+static inline unsigned int
+csum_page(struct page *page, int offset, int copy)
+{
+ char *kaddr;
+ unsigned int csum;
+ kaddr = kmap(page);
+ csum = csum_partial(kaddr + offset, copy, 0);
+ kunmap(page);
+ return csum;
+}
+
/*
- * Fetch data from kernel space and fill in checksum if needed.
+ * ip_append_data() and ip_append_page() can make one large IP datagram
+ * from many pieces of data. Each pieces will be holded on the socket
+ * until ip_push_pending_frames() is called. Eache pieces can be a page
+ * or non-page data.
+ *
+ * Not only UDP, other transport protocols - e.g. raw sockets - can use
+ * this interface potentially.
+ *
+ * LATER: length must be adjusted by pad at tail, when it is required.
*/
-static int ip_reply_glue_bits(const void *dptr, char *to, unsigned int offset,
- unsigned int fraglen)
+int ip_append_data(struct sock *sk,
+ int getfrag(void *from, char *to, int offset, int len,
+ int odd, struct sk_buff *skb),
+ void *from, int length, int transhdrlen,
+ struct ipcm_cookie *ipc, struct rtable *rt,
+ unsigned int flags)
{
- struct ip_reply_arg *dp = (struct ip_reply_arg*)dptr;
- u16 *pktp = (u16 *)to;
- struct iovec *iov;
- int len;
- int hdrflag = 1;
-
- iov = &dp->iov[0];
- if (offset >= iov->iov_len) {
- offset -= iov->iov_len;
- iov++;
- hdrflag = 0;
+ struct inet_opt *inet = inet_sk(sk);
+ struct sk_buff *skb;
+
+ struct ip_options *opt = NULL;
+ int hh_len;
+ int exthdrlen;
+ int mtu;
+ int copy;
+ int err;
+ int offset = 0;
+ unsigned int maxfraglen, fragheaderlen;
+ int csummode = CHECKSUM_NONE;
+
+ if (flags&MSG_PROBE)
+ return 0;
+
+ if (skb_queue_empty(&sk->write_queue)) {
+ /*
+ * setup for corking.
+ */
+ opt = ipc->opt;
+ if (opt) {
+ if (inet->cork.opt == NULL)
+ inet->cork.opt = kmalloc(sizeof(struct ip_options)+40, sk->allocation);
+ memcpy(inet->cork.opt, opt, sizeof(struct ip_options)+opt->optlen);
+ inet->cork.flags |= IPCORK_OPT;
+ inet->cork.addr = ipc->addr;
+ }
+ dst_hold(&rt->u.dst);
+ inet->cork.fragsize = mtu = rt->u.dst.pmtu;
+ inet->cork.rt = rt;
+ inet->cork.length = 0;
+ inet->sndmsg_page = NULL;
+ inet->sndmsg_off = 0;
+ if ((exthdrlen = rt->u.dst.header_len) != 0) {
+ length += exthdrlen;
+ transhdrlen += exthdrlen;
+ }
+ } else {
+ rt = inet->cork.rt;
+ if (inet->cork.flags & IPCORK_OPT)
+ opt = inet->cork.opt;
+
+ transhdrlen = 0;
+ exthdrlen = 0;
+ mtu = inet->cork.fragsize;
}
- len = iov->iov_len - offset;
- if (fraglen > len) { /* overlapping. */
- dp->csum = csum_partial_copy_nocheck(iov->iov_base+offset, to, len,
- dp->csum);
- offset = 0;
- fraglen -= len;
- to += len;
- iov++;
+ hh_len = (rt->u.dst.dev->hard_header_len&~15) + 16;
+
+ fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
+ maxfraglen = ((mtu-fragheaderlen) & ~7) + fragheaderlen;
+
+ if (inet->cork.length + length > 0xFFFF - fragheaderlen) {
+ ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu-exthdrlen);
+ return -EMSGSIZE;
+ }
+
+ /*
+ * transhdrlen > 0 means that this is the first fragment and we wish
+ * it won't be fragmented in the future.
+ */
+ if (transhdrlen &&
+ length + fragheaderlen <= maxfraglen &&
+ rt->u.dst.dev->features&(NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM) &&
+ !exthdrlen)
+ csummode = CHECKSUM_HW;
+
+ inet->cork.length += length;
+
+ if ((skb = skb_peek_tail(&sk->write_queue)) == NULL)
+ goto alloc_new_skb;
+
+ while (length > 0) {
+ if ((copy = maxfraglen - skb->len) <= 0) {
+ char *data;
+ unsigned int datalen;
+ unsigned int fraglen;
+ unsigned int alloclen;
+ BUG_TRAP(copy == 0);
+
+alloc_new_skb:
+ datalen = maxfraglen - fragheaderlen;
+ if (datalen > length)
+ datalen = length;
+
+ fraglen = datalen + fragheaderlen;
+ if ((flags & MSG_MORE) &&
+ !(rt->u.dst.dev->features&NETIF_F_SG))
+ alloclen = maxfraglen;
+ else
+ alloclen = datalen + fragheaderlen;
+ if (!(flags & MSG_DONTWAIT) || transhdrlen) {
+ skb = sock_alloc_send_skb(sk,
+ alloclen + hh_len + 15,
+ (flags & MSG_DONTWAIT), &err);
+ } else {
+ skb = sock_wmalloc(sk,
+ alloclen + hh_len + 15, 1,
+ sk->allocation);
+ if (unlikely(skb == NULL))
+ err = -ENOBUFS;
+ }
+ if (skb == NULL)
+ goto error;
+
+ /*
+ * Fill in the control structures
+ */
+ skb->ip_summed = csummode;
+ skb->csum = 0;
+ skb_reserve(skb, hh_len);
+
+ /*
+ * Find where to start putting bytes.
+ */
+ data = skb_put(skb, fraglen);
+ skb->nh.raw = __skb_pull(skb, exthdrlen);
+ data += fragheaderlen;
+ skb->h.raw = data + exthdrlen;
+
+ copy = datalen - transhdrlen;
+ if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, 0, skb) < 0) {
+ err = -EFAULT;
+ kfree_skb(skb);
+ goto error;
+ }
+
+ offset += copy;
+ length -= datalen;
+ transhdrlen = 0;
+ exthdrlen = 0;
+ csummode = CHECKSUM_NONE;
+
+ /*
+ * Put the packet on the pending queue.
+ */
+ __skb_queue_tail(&sk->write_queue, skb);
+ continue;
+ }
+
+ if (copy > length)
+ copy = length;
+
+ if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
+ unsigned int off;
+
+ off = skb->len;
+ if (getfrag(from, skb_put(skb, copy),
+ offset, copy, off, skb) < 0) {
+ __skb_trim(skb, off);
+ err = -EFAULT;
+ goto error;
+ }
+ } else {
+ int i = skb_shinfo(skb)->nr_frags;
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
+ struct page *page = inet->sndmsg_page;
+ int off = inet->sndmsg_off;
+ unsigned int left;
+
+ if (page && (left = PAGE_SIZE - off) > 0) {
+ if (copy >= left)
+ copy = left;
+ if (page != frag->page) {
+ if (i == MAX_SKB_FRAGS) {
+ err = -EMSGSIZE;
+ goto error;
+ }
+ get_page(page);
+ skb_fill_page_desc(skb, i, page, inet->sndmsg_off, 0);
+ frag = &skb_shinfo(skb)->frags[i];
+ }
+ } else if (i < MAX_SKB_FRAGS) {
+ if (copy > PAGE_SIZE)
+ copy = PAGE_SIZE;
+ page = alloc_pages(sk->allocation, 0);
+ if (page == NULL) {
+ err = -ENOMEM;
+ goto error;
+ }
+ inet->sndmsg_page = page;
+ inet->sndmsg_off = 0;
+
+ skb_fill_page_desc(skb, i, page, 0, 0);
+ frag = &skb_shinfo(skb)->frags[i];
+ skb->truesize += PAGE_SIZE;
+ atomic_add(PAGE_SIZE, &sk->wmem_alloc);
+ } else {
+ err = -EMSGSIZE;
+ goto error;
+ }
+ if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
+ err = -EFAULT;
+ goto error;
+ }
+ inet->sndmsg_off += copy;
+ frag->size += copy;
+ skb->len += copy;
+ skb->data_len += copy;
+ }
+ offset += copy;
+ length -= copy;
+ }
+
+ return 0;
+
+error:
+ inet->cork.length -= length;
+ IP_INC_STATS(IpOutDiscards);
+ return err;
+}
+
+ssize_t ip_append_page(struct sock *sk, struct page *page,
+ int offset, size_t size, int flags)
+{
+ struct inet_opt *inet = inet_sk(sk);
+ struct sk_buff *skb;
+ struct rtable *rt;
+ struct ip_options *opt = NULL;
+ int hh_len;
+ int mtu;
+ int len;
+ int err;
+ unsigned int maxfraglen, fragheaderlen;
+
+ if (inet->hdrincl)
+ return -EPERM;
+
+ if (flags&MSG_PROBE)
+ return 0;
+
+ if (skb_queue_empty(&sk->write_queue))
+ return -EINVAL;
+
+ rt = inet->cork.rt;
+ if (inet->cork.flags & IPCORK_OPT)
+ opt = inet->cork.opt;
+
+ if (!(rt->u.dst.dev->features&NETIF_F_SG))
+ return -EOPNOTSUPP;
+
+ hh_len = (rt->u.dst.dev->hard_header_len&~15)+16;
+ mtu = inet->cork.fragsize;
+
+ fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
+ maxfraglen = ((mtu-fragheaderlen) & ~7) + fragheaderlen;
+
+ if (inet->cork.length + size > 0xFFFF - fragheaderlen) {
+ ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu);
+ return -EMSGSIZE;
+ }
+
+ if ((skb = skb_peek_tail(&sk->write_queue)) == NULL)
+ return -EINVAL;
+
+ inet->cork.length += size;
+
+ while (size > 0) {
+ int i;
+ if ((len = maxfraglen - skb->len) <= 0) {
+ char *data;
+ struct iphdr *iph;
+ BUG_TRAP(len == 0);
+
+ skb = sock_wmalloc(sk, fragheaderlen + hh_len + 15, 1,
+ sk->allocation);
+ if (unlikely(!skb)) {
+ err = -ENOBUFS;
+ goto error;
+ }
+
+ /*
+ * Fill in the control structures
+ */
+ skb->ip_summed = CHECKSUM_NONE;
+ skb->csum = 0;
+ skb_reserve(skb, hh_len);
+
+ /*
+ * Find where to start putting bytes.
+ */
+ data = skb_put(skb, fragheaderlen);
+ skb->nh.iph = iph = (struct iphdr *)data;
+ data += fragheaderlen;
+ skb->h.raw = data;
+
+ /*
+ * Put the packet on the pending queue.
+ */
+ __skb_queue_tail(&sk->write_queue, skb);
+ continue;
+ }
+
+ i = skb_shinfo(skb)->nr_frags;
+ if (len > size)
+ len = size;
+ if (skb_can_coalesce(skb, i, page, offset)) {
+ skb_shinfo(skb)->frags[i-1].size += len;
+ } else if (i < MAX_SKB_FRAGS) {
+ get_page(page);
+ skb_fill_page_desc(skb, i, page, offset, len);
+ } else {
+ err = -EMSGSIZE;
+ goto error;
+ }
+
+ if (skb->ip_summed == CHECKSUM_NONE) {
+ unsigned int csum;
+ csum = csum_page(page, offset, len);
+ skb->csum = csum_block_add(skb->csum, csum, skb->len);
+ }
+
+ skb->len += len;
+ skb->data_len += len;
+ offset += len;
+ size -= len;
+ }
+ return 0;
+
+error:
+ inet->cork.length -= size;
+ IP_INC_STATS(IpOutDiscards);
+ return err;
+}
+
+/*
+ * Combined all pending IP fragments on the socket as one IP datagram
+ * and push them out.
+ */
+int ip_push_pending_frames(struct sock *sk)
+{
+ struct sk_buff *skb, *tmp_skb;
+ struct sk_buff **tail_skb;
+ struct inet_opt *inet = inet_sk(sk);
+ struct ip_options *opt = NULL;
+ struct rtable *rt = inet->cork.rt;
+ struct iphdr *iph;
+ int df = 0;
+ __u8 ttl;
+ int err = 0;
+
+ if ((skb = __skb_dequeue(&sk->write_queue)) == NULL)
+ goto out;
+ tail_skb = &(skb_shinfo(skb)->frag_list);
+
+ while ((tmp_skb = __skb_dequeue(&sk->write_queue)) != NULL) {
+ __skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
+ *tail_skb = tmp_skb;
+ tail_skb = &(tmp_skb->next);
+ skb->len += tmp_skb->len;
+ skb->data_len += tmp_skb->len;
+#if 0 /* Logically correct, but useless work, ip_fragment() will have to undo */
+ skb->truesize += tmp_skb->truesize;
+ __sock_put(tmp_skb->sk);
+ tmp_skb->destructor = NULL;
+ tmp_skb->sk = NULL;
+#endif
+ }
+
+ if (inet->pmtudisc == IP_PMTUDISC_DO ||
+ (!skb_shinfo(skb)->frag_list && ip_dont_fragment(sk, &rt->u.dst)))
+ df = htons(IP_DF);
+
+ if (inet->cork.flags & IPCORK_OPT)
+ opt = inet->cork.opt;
+
+ if (rt->rt_type == RTN_MULTICAST)
+ ttl = inet->mc_ttl;
+ else
+ ttl = inet->ttl;
+
+ iph = (struct iphdr *)skb->data;
+ iph->version = 4;
+ iph->ihl = 5;
+ if (opt) {
+ iph->ihl += opt->optlen>>2;
+ ip_options_build(skb, opt, inet->cork.addr, rt, 0);
+ }
+ iph->tos = inet->tos;
+ iph->tot_len = htons(skb->len);
+ iph->frag_off = df;
+ if (!df) {
+ __ip_select_ident(iph, &rt->u.dst, 0);
+ } else {
+ iph->id = htons(inet->id++);
+ }
+ iph->ttl = ttl;
+ iph->protocol = sk->protocol;
+ iph->saddr = rt->rt_src;
+ iph->daddr = rt->rt_dst;
+ ip_send_check(iph);
+
+ skb->priority = sk->priority;
+ skb->dst = dst_clone(&rt->u.dst);
+
+ /* Netfilter gets whole the not fragmented skb. */
+ err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL,
+ skb->dst->dev, dst_output);
+ if (err) {
+ if (err > 0)
+ err = inet->recverr ? net_xmit_errno(err) : 0;
+ if (err)
+ goto error;
+ }
+
+out:
+ inet->cork.flags &= ~IPCORK_OPT;
+ if (inet->cork.rt) {
+ ip_rt_put(inet->cork.rt);
+ inet->cork.rt = NULL;
}
+ return err;
- dp->csum = csum_partial_copy_nocheck(iov->iov_base+offset, to, fraglen,
- dp->csum);
+error:
+ IP_INC_STATS(IpOutDiscards);
+ goto out;
+}
- if (hdrflag && dp->csumoffset)
- *(pktp + dp->csumoffset) = csum_fold(dp->csum); /* fill in checksum */
- return 0;
+/*
+ * Throw away all pending data on the socket.
+ */
+void ip_flush_pending_frames(struct sock *sk)
+{
+ struct inet_opt *inet = inet_sk(sk);
+ struct sk_buff *skb;
+
+ while ((skb = __skb_dequeue_tail(&sk->write_queue)) != NULL)
+ kfree_skb(skb);
+
+ inet->cork.flags &= ~IPCORK_OPT;
+ if (inet->cork.opt) {
+ kfree(inet->cork.opt);
+ inet->cork.opt = NULL;
+ }
+ if (inet->cork.rt) {
+ ip_rt_put(inet->cork.rt);
+ inet->cork.rt = NULL;
+ }
+}
+
+
+/*
+ * Fetch data from kernel space and fill in checksum if needed.
+ */
+static int ip_reply_glue_bits(void *dptr, char *to, int offset,
+ int len, int odd, struct sk_buff *skb)
+{
+ unsigned int csum;
+
+ csum = csum_partial_copy_nocheck(dptr+offset, to, len, 0);
+ skb->csum = csum_block_add(skb->csum, csum, odd);
+ return 0;
}
/*
@@ -967,6 +1176,8 @@ static int ip_reply_glue_bits(const void *dptr, char *to, unsigned int offset,
*
* Should run single threaded per socket because it uses the sock
* structure to pass arguments.
+ *
+ * LATER: switch from ip_build_xmit to ip_append_*
*/
void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
unsigned int len)
@@ -993,8 +1204,14 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
daddr = replyopts.opt.faddr;
}
- if (ip_route_output(&rt, daddr, rt->rt_spec_dst, RT_TOS(skb->nh.iph->tos), 0))
- return;
+ {
+ struct flowi fl = { .nl_u = { .ip4_u =
+ { .daddr = daddr,
+ .saddr = rt->rt_spec_dst,
+ .tos = RT_TOS(skb->nh.iph->tos) } } };
+ if (ip_route_output_key(&rt, &fl))
+ return;
+ }
/* And let IP do all the hard work.
@@ -1006,7 +1223,15 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
inet->tos = skb->nh.iph->tos;
sk->priority = skb->priority;
sk->protocol = skb->nh.iph->protocol;
- ip_build_xmit(sk, ip_reply_glue_bits, arg, len, &ipc, rt, MSG_DONTWAIT);
+ ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
+ &ipc, rt, MSG_DONTWAIT);
+ if ((skb = skb_peek(&sk->write_queue)) != NULL) {
+ if (arg->csumoffset >= 0)
+ *((u16 *)skb->h.raw + arg->csumoffset) = csum_fold(csum_add(skb->csum, arg->csum));
+ skb->ip_summed = CHECKSUM_NONE;
+ ip_push_pending_frames(sk);
+ }
+
bh_unlock_sock(sk);
ip_rt_put(rt);