Name: More accurate TTL filtering for fragments Status: Booted on 2.6.11-rc2-bk5 Signed-off-by: Rusty Russell Asking questions like "what ttl is this packet?" are not possible to answer if the packet was created by combining multiple fragments, and those fragments came from different TTLs. Easiest fix is to drop "multi-ttl" packets if ttl is inspected, with a sysctl to ignore changes from one fragment to the next. Index: linux-2.6.11-rc2-bk5-Netfilter/net/ipv4/netfilter/ip_tables.c =================================================================== --- linux-2.6.11-rc2-bk5-Netfilter.orig/net/ipv4/netfilter/ip_tables.c 2005-01-24 11:14:14.000000000 +1100 +++ linux-2.6.11-rc2-bk5-Netfilter/net/ipv4/netfilter/ip_tables.c 2005-01-28 15:14:42.000000000 +1100 @@ -125,16 +125,23 @@ #define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0) #endif +static inline int multi_input_interface(const struct sk_buff *skb) +{ + return IPCB(skb)->flags & IPSKB_FRAG_MULTI_IF; +} + /* Returns whether matches rule or not. */ static inline int -ip_packet_match(const struct iphdr *ip, +ip_packet_match(const struct sk_buff *skb, const char *indev, const char *outdev, const struct ipt_ip *ipinfo, - int isfrag) + int isfrag, + int *hotdrop) { size_t i; unsigned long ret; + struct iphdr *ip = skb->nh.iph; #define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg)) @@ -157,6 +164,13 @@ return 0; } + /* Do not ask questions I cannot answer. */ + if (ipinfo->iniface_mask[0] && multi_input_interface(skb)) { + if (net_ratelimit()) + printk("ip_tables: packet from multiple interfaces\n"); + *hotdrop = 1; + return 0; + } /* Look for ifname matches; this should unroll nicely. */ for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) { ret |= (((const unsigned long *)indev)[i] @@ -314,7 +328,8 @@ IP_NF_ASSERT(e); IP_NF_ASSERT(back); (*pskb)->nfcache |= e->nfcache; - if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) { + if (ip_packet_match(*pskb, indev, outdev, &e->ip, offset, + &hotdrop)) { struct ipt_entry_target *t; if (IPT_MATCH_ITERATE(e, do_match, Index: linux-2.6.11-rc2-bk5-Netfilter/include/net/ip.h =================================================================== --- linux-2.6.11-rc2-bk5-Netfilter.orig/include/net/ip.h 2005-01-28 13:39:40.000000000 +1100 +++ linux-2.6.11-rc2-bk5-Netfilter/include/net/ip.h 2005-01-28 14:47:08.000000000 +1100 @@ -45,6 +45,9 @@ #define IPSKB_TRANSLATED 2 #define IPSKB_FORWARDED 4 #define IPSKB_XFRM_TUNNEL_SIZE 8 +#define IPSKB_FRAG_MULTI_IF 16 +#define IPSKB_FRAG_MULTI_TTL 32 +#define IPSKB_FRAG_MULTI_OPT 64 /* Not yet implemented */ }; struct ipcm_cookie Index: linux-2.6.11-rc2-bk5-Netfilter/net/ipv4/sysctl_net_ipv4.c =================================================================== --- linux-2.6.11-rc2-bk5-Netfilter.orig/net/ipv4/sysctl_net_ipv4.c 2004-10-19 14:34:25.000000000 +1000 +++ linux-2.6.11-rc2-bk5-Netfilter/net/ipv4/sysctl_net_ipv4.c 2005-01-28 14:56:04.000000000 +1100 @@ -29,6 +29,7 @@ extern int sysctl_ipfrag_high_thresh; extern int sysctl_ipfrag_time; extern int sysctl_ipfrag_secret_interval; +extern int sysctl_ipfrag_ignore_changes; /* From ip_output.c */ extern int sysctl_ip_dynaddr; @@ -682,6 +683,14 @@ .mode = 0644, .proc_handler = &proc_dointvec, }, + { + .ctl_name = NET_IPV4_IPFRAG_IGNORE_CHANGES, + .procname = "ipfrag_ignore_changed", + .data = &sysctl_ipfrag_ignore_changes, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, { .ctl_name = 0 } }; Index: linux-2.6.11-rc2-bk5-Netfilter/net/ipv4/ip_fragment.c =================================================================== --- linux-2.6.11-rc2-bk5-Netfilter.orig/net/ipv4/ip_fragment.c 2005-01-28 13:39:40.000000000 +1100 +++ linux-2.6.11-rc2-bk5-Netfilter/net/ipv4/ip_fragment.c 2005-01-28 14:56:04.000000000 +1100 @@ -60,6 +60,7 @@ * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL. */ int sysctl_ipfrag_time = IP_FRAG_TIME; +int sysctl_ipfrag_ignore_changes = 0; struct ipfrag_skb_cb { @@ -90,6 +91,9 @@ struct timer_list timer; /* when will this queue expire? */ struct ipq **pprev; int iif; + u8 ttl; + u8 flags; /* copied into IPCB()->flags. */ + struct timeval stamp; }; @@ -309,6 +313,17 @@ ipq_put(qp, NULL); } +static void qp_update_flags(struct ipq *qp, u8 ttl, int iif) +{ + if (sysctl_ipfrag_ignore_changes) + return; + + if (ttl != qp->ttl) + qp->flags |= IPSKB_FRAG_MULTI_TTL; + if (iif != qp->iif) + qp->flags |= IPSKB_FRAG_MULTI_IF; +} + /* Creation primitives. */ static struct ipq *ip_frag_intern(unsigned int hash, struct ipq *qp_in) @@ -326,6 +341,7 @@ qp->saddr == qp_in->saddr && qp->daddr == qp_in->daddr && qp->protocol == qp_in->protocol) { + qp_update_flags(qp, qp_in->ttl, qp_in->iif); atomic_inc(&qp->refcnt); write_unlock(&ipfrag_lock); qp_in->last_in |= COMPLETE; @@ -352,7 +368,7 @@ } /* Add an entry to the 'ipq' queue for a newly received IP datagram. */ -static struct ipq *ip_frag_create(unsigned hash, struct iphdr *iph) +static struct ipq *ip_frag_create(unsigned hash, struct iphdr *iph, int iif) { struct ipq *qp; @@ -367,7 +383,9 @@ qp->len = 0; qp->meat = 0; qp->fragments = NULL; - qp->iif = 0; + qp->iif = iif; + qp->ttl = iph->ttl; + qp->flags = 0; /* Initialize a timer for this entry. */ init_timer(&qp->timer); @@ -386,7 +404,7 @@ /* Find the correct entry in the "incomplete datagrams" queue for * this IP datagram, and create new one, if nothing is found. */ -static inline struct ipq *ip_find(struct iphdr *iph) +static inline struct ipq *ip_find(struct iphdr *iph, int iif) { __u16 id = iph->id; __u32 saddr = iph->saddr; @@ -401,6 +419,7 @@ qp->saddr == saddr && qp->daddr == daddr && qp->protocol == protocol) { + qp_update_flags(qp, iph->ttl, iif); atomic_inc(&qp->refcnt); read_unlock(&ipfrag_lock); return qp; @@ -408,7 +427,7 @@ } read_unlock(&ipfrag_lock); - return ip_frag_create(hash, iph); + return ip_frag_create(hash, iph, iif); } /* Add new segment to existing queue. */ @@ -531,8 +550,6 @@ else qp->fragments = skb; - if (skb->dev) - qp->iif = skb->dev->ifindex; skb->dev = NULL; qp->stamp = skb->stamp; qp->meat += skb->len; @@ -602,6 +619,7 @@ skb_shinfo(head)->frag_list = head->next; skb_push(head, head->data - head->nh.raw); atomic_sub(head->truesize, &ip_frag_mem); + IPCB(head)->flags |= qp->flags; for (fp=head->next; fp; fp = fp->next) { head->data_len += fp->len; @@ -647,6 +665,7 @@ struct iphdr *iph = skb->nh.iph; struct ipq *qp; struct net_device *dev; + int iif = 0; IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS); @@ -655,9 +674,11 @@ ip_evictor(); dev = skb->dev; + if (dev) + iif = dev->ifindex; /* Lookup (or create) queue header */ - if ((qp = ip_find(iph)) != NULL) { + if ((qp = ip_find(iph, iif)) != NULL) { struct sk_buff *ret = NULL; spin_lock(&qp->lock); Index: linux-2.6.11-rc2-bk5-Netfilter/net/ipv4/netfilter/ipt_ttl.c =================================================================== --- linux-2.6.11-rc2-bk5-Netfilter.orig/net/ipv4/netfilter/ipt_ttl.c 2005-01-11 14:21:11.000000000 +1100 +++ linux-2.6.11-rc2-bk5-Netfilter/net/ipv4/netfilter/ipt_ttl.c 2005-01-28 14:47:08.000000000 +1100 @@ -14,6 +14,7 @@ #include #include +#include MODULE_AUTHOR("Harald Welte "); MODULE_DESCRIPTION("IP tables TTL matching module"); @@ -25,6 +26,12 @@ { const struct ipt_ttl_info *info = matchinfo; + /* Reconstructed from different TTL'd fragments? Too hard. */ + if (IPCB(skb)->flags & IPSKB_FRAG_MULTI_TTL) { + *hotdrop = 1; + return 0; + } + switch (info->mode) { case IPT_TTL_EQ: return (skb->nh.iph->ttl == info->ttl); Index: linux-2.6.11-rc2-bk5-Netfilter/include/linux/sysctl.h =================================================================== --- linux-2.6.11-rc2-bk5-Netfilter.orig/include/linux/sysctl.h 2005-01-24 11:14:08.000000000 +1100 +++ linux-2.6.11-rc2-bk5-Netfilter/include/linux/sysctl.h 2005-01-28 14:56:04.000000000 +1100 @@ -344,6 +344,7 @@ NET_TCP_DEFAULT_WIN_SCALE=105, NET_TCP_MODERATE_RCVBUF=106, NET_TCP_TSO_WIN_DIVISOR=107, + NET_IPV4_IPFRAG_IGNORE_CHANGES=108, }; enum {