Name: Connection Tracking Protocol Lock optimization Author: Rusty Russell Status: Trivial D: This moves the connection tracking protocol list out from under the D: ip_conntrack_lock, and uses the BR_NETPROTO_LOCK. diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .3824-linux-2.5.33/include/linux/netfilter_ipv4/ip_conntrack.h .3824-linux-2.5.33.updated/include/linux/netfilter_ipv4/ip_conntrack.h --- .3824-linux-2.5.33/include/linux/netfilter_ipv4/ip_conntrack.h 2002-08-28 09:29:52.000000000 +1000 +++ .3824-linux-2.5.33.updated/include/linux/netfilter_ipv4/ip_conntrack.h 2002-09-06 13:39:07.000000000 +1000 @@ -231,6 +231,7 @@ void ip_conntrack_expect_put(struct ip_c extern struct module *ip_conntrack_module; +/* Must hold BR_NETPROTO_LOCK, as in netfilter hook */ extern int invert_tuplepr(struct ip_conntrack_tuple *inverse, const struct ip_conntrack_tuple *orig); diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .3824-linux-2.5.33/include/linux/netfilter_ipv4/ip_conntrack_core.h .3824-linux-2.5.33.updated/include/linux/netfilter_ipv4/ip_conntrack_core.h --- .3824-linux-2.5.33/include/linux/netfilter_ipv4/ip_conntrack_core.h 2002-08-28 09:29:52.000000000 +1000 +++ .3824-linux-2.5.33.updated/include/linux/netfilter_ipv4/ip_conntrack_core.h 2002-09-06 13:39:07.000000000 +1000 @@ -14,6 +14,7 @@ extern unsigned int ip_conntrack_in(unsi extern int ip_conntrack_init(void); extern void ip_conntrack_cleanup(void); +/* Protected by NETPROTO brlock */ struct ip_conntrack_protocol; extern struct ip_conntrack_protocol *ip_ct_find_proto(u_int8_t protocol); /* Like above, but you already have conntrack read lock. */ diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .3824-linux-2.5.33/include/linux/netfilter_ipv4/ip_nat.h .3824-linux-2.5.33.updated/include/linux/netfilter_ipv4/ip_nat.h --- .3824-linux-2.5.33/include/linux/netfilter_ipv4/ip_nat.h 2002-08-28 09:29:52.000000000 +1000 +++ .3824-linux-2.5.33.updated/include/linux/netfilter_ipv4/ip_nat.h 2002-09-06 13:39:07.000000000 +1000 @@ -116,12 +116,13 @@ struct ip_nat_info struct ip_nat_seq seq[IP_CT_DIR_MAX]; }; -/* Set up the info structure to map into this range. */ +/* Set up the info structure to map into this range. + Must hold BR_NETPROTO_LOCK. */ extern unsigned int ip_nat_setup_info(struct ip_conntrack *conntrack, const struct ip_nat_multi_range *mr, unsigned int hooknum); -/* Is this tuple already taken? (not by us)*/ +/* Is this tuple already taken? (not by us). Must hold BR_NETPROTO_LOCK. */ extern int ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple, const struct ip_conntrack *ignored_conntrack); diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .3824-linux-2.5.33/include/linux/netfilter_ipv4/ip_nat_protocol.h .3824-linux-2.5.33.updated/include/linux/netfilter_ipv4/ip_nat_protocol.h --- .3824-linux-2.5.33/include/linux/netfilter_ipv4/ip_nat_protocol.h 2002-05-28 11:20:50.000000000 +1000 +++ .3824-linux-2.5.33.updated/include/linux/netfilter_ipv4/ip_nat_protocol.h 2002-09-06 13:39:07.000000000 +1000 @@ -7,6 +7,7 @@ struct iphdr; struct ip_nat_range; +/* All functions here are called with BR_NETPROTO_LOCK held */ struct ip_nat_protocol { struct list_head list; diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .3824-linux-2.5.33/net/ipv4/netfilter/ip_conntrack_core.c .3824-linux-2.5.33.updated/net/ipv4/netfilter/ip_conntrack_core.c --- .3824-linux-2.5.33/net/ipv4/netfilter/ip_conntrack_core.c 2002-08-28 09:29:54.000000000 +1000 +++ .3824-linux-2.5.33.updated/net/ipv4/netfilter/ip_conntrack_core.c 2002-09-06 13:42:10.000000000 +1000 @@ -2,7 +2,7 @@ but required by, the NAT layer; it can also be used by an iptables extension. */ -/* (c) 1999 Paul `Rusty' Russell. Licenced under the GNU General +/* (c) 1999, 2002 Paul `Rusty' Russell. Licenced under the GNU General * Public Licence. * * 23 Apr 2001: Harald Welte @@ -69,33 +69,16 @@ static kmem_cache_t *ip_conntrack_cachep extern struct ip_conntrack_protocol ip_conntrack_generic_protocol; -static inline int proto_cmpfn(const struct ip_conntrack_protocol *curr, - u_int8_t protocol) -{ - return protocol == curr->proto; -} - -struct ip_conntrack_protocol *__find_proto(u_int8_t protocol) -{ - struct ip_conntrack_protocol *p; - - MUST_BE_READ_LOCKED(&ip_conntrack_lock); - p = LIST_FIND(&protocol_list, proto_cmpfn, - struct ip_conntrack_protocol *, protocol); - if (!p) - p = &ip_conntrack_generic_protocol; - - return p; -} - +/* We must be holding the NETPROTO brlock, as we are from any nf_hook */ struct ip_conntrack_protocol *ip_ct_find_proto(u_int8_t protocol) { struct ip_conntrack_protocol *p; - READ_LOCK(&ip_conntrack_lock); - p = __find_proto(protocol); - READ_UNLOCK(&ip_conntrack_lock); - return p; + list_for_each_entry(p, &protocol_list, list) { + if (p->proto == protocol) + return p; + } + return &ip_conntrack_generic_protocol; } inline void @@ -323,7 +306,9 @@ destroy_conntrack(struct nf_conntrack *n /* To make sure we don't get any weird locking issues here: * destroy_conntrack() MUST NOT be called with a write lock * to ip_conntrack_lock!!! -HW */ + br_read_lock(BR_NETPROTO_LOCK); proto = ip_ct_find_proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum); + br_read_unlock(BR_NETPROTO_LOCK); if (proto && proto->destroy) proto->destroy(ct); @@ -491,7 +476,8 @@ ip_conntrack_tuple_taken(const struct ip return h != NULL; } -/* Returns conntrack if it dealt with ICMP, and filled in skb fields */ +/* Returns conntrack if it dealt with ICMP, and filled in skb fields. + Must hold BR_NETPROTO_LOCK */ struct ip_conntrack * icmp_error_track(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, @@ -788,7 +774,7 @@ resolve_normal_ct(struct sk_buff *skb, return h->ctrack; } -/* Netfilter hook itself. */ +/* Netfilter hook itself: we hold BR_NETPROTO_LOCK */ unsigned int ip_conntrack_in(unsigned int hooknum, struct sk_buff **pskb, const struct net_device *in, diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .3824-linux-2.5.33/net/ipv4/netfilter/ip_conntrack_standalone.c .3824-linux-2.5.33.updated/net/ipv4/netfilter/ip_conntrack_standalone.c --- .3824-linux-2.5.33/net/ipv4/netfilter/ip_conntrack_standalone.c 2002-08-28 09:29:54.000000000 +1000 +++ .3824-linux-2.5.33.updated/net/ipv4/netfilter/ip_conntrack_standalone.c 2002-09-06 13:43:50.000000000 +1000 @@ -71,7 +71,7 @@ print_expect(char *buffer, const struct len += sprintf(buffer + len, "use=%u proto=%u ", atomic_read(&expect->use), expect->tuple.dst.protonum); len += print_tuple(buffer + len, &expect->tuple, - __find_proto(expect->tuple.dst.protonum)); + ip_ct_find_proto(expect->tuple.dst.protonum)); len += sprintf(buffer + len, "\n"); return len; } @@ -81,8 +81,8 @@ print_conntrack(char *buffer, const stru { unsigned int len; struct ip_conntrack_protocol *proto - = __find_proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL] - .tuple.dst.protonum); + = ip_ct_find_proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.dst.protonum); len = sprintf(buffer, "%-8s %u %lu ", proto->name, @@ -143,6 +143,7 @@ list_conntracks(char *buffer, char **sta off_t upto = 0; struct list_head *e; + br_read_lock(BR_NETPROTO_LOCK); READ_LOCK(&ip_conntrack_lock); /* Traverse hash; print originals then reply. */ for (i = 0; i < ip_conntrack_htable_size; i++) { @@ -169,6 +170,7 @@ list_conntracks(char *buffer, char **sta } finished: + br_read_unlock(BR_NETPROTO_LOCK); READ_UNLOCK(&ip_conntrack_lock); /* `start' hack - see fs/proc/generic.c line ~165 */ diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .3824-linux-2.5.33/net/ipv4/netfilter/ip_fw_compat_masq.c .3824-linux-2.5.33.updated/net/ipv4/netfilter/ip_fw_compat_masq.c --- .3824-linux-2.5.33/net/ipv4/netfilter/ip_fw_compat_masq.c 2002-05-24 15:20:35.000000000 +1000 +++ .3824-linux-2.5.33.updated/net/ipv4/netfilter/ip_fw_compat_masq.c 2002-09-06 13:39:07.000000000 +1000 @@ -119,6 +119,7 @@ check_for_masq_error(struct sk_buff *skb } } +/* Must hold BR_NETPROTO_LOCK, as from nf_hook */ unsigned int check_for_demasq(struct sk_buff **pskb) { diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .3824-linux-2.5.33/net/ipv4/netfilter/ip_nat_core.c .3824-linux-2.5.33.updated/net/ipv4/netfilter/ip_nat_core.c --- .3824-linux-2.5.33/net/ipv4/netfilter/ip_nat_core.c 2002-09-01 12:23:08.000000000 +1000 +++ .3824-linux-2.5.33.updated/net/ipv4/netfilter/ip_nat_core.c 2002-09-06 13:44:24.000000000 +1000 @@ -738,7 +738,6 @@ static inline int exp_for_packet(struct struct ip_conntrack_protocol *proto; int ret = 1; - MUST_BE_READ_LOCKED(&ip_conntrack_lock); proto = ip_ct_find_proto((*pskb)->nh.iph->protocol); if (proto->exp_matches_pkt) ret = proto->exp_matches_pkt(exp, pskb);