Index: linux-2.6.11-rc1-bk5-Netfilter/net/ipv4/netfilter/ip_nat_core.c =================================================================== --- linux-2.6.11-rc1-bk5-Netfilter.orig/net/ipv4/netfilter/ip_nat_core.c 2005-01-18 22:15:47.000000000 +1100 +++ linux-2.6.11-rc1-bk5-Netfilter/net/ipv4/netfilter/ip_nat_core.c 2005-01-19 11:27:19.000000000 +1100 @@ -37,7 +37,7 @@ #define DEBUGP(format, args...) #endif -DECLARE_RWLOCK(ip_nat_lock); +static rwlock_t ip_nat_lock = RW_LOCK_UNLOCKED; /* Calculated at init based on memory size */ static unsigned int ip_nat_htable_size; @@ -147,7 +147,7 @@ result->dst = tuple->dst; if (in_range(result, range)) { - READ_UNLOCK(&ip_nat_lock); + read_unlock_bh(&ip_nat_lock); return 1; } } Index: linux-2.6.11-rc1-bk5-Netfilter/net/ipv4/netfilter/ip_nat_helper.c =================================================================== --- linux-2.6.11-rc1-bk5-Netfilter.orig/net/ipv4/netfilter/ip_nat_helper.c 2005-01-18 22:19:02.000000000 +1100 +++ linux-2.6.11-rc1-bk5-Netfilter/net/ipv4/netfilter/ip_nat_helper.c 2005-01-18 22:19:59.000000000 +1100 @@ -21,23 +21,19 @@ #include #include #include +#include #include #include #include #include #include #include - -#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock) -#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock) - #include #include #include #include #include #include -#include #if 0 #define DEBUGP printk @@ -47,7 +43,7 @@ #define DUMP_OFFSET(x) #endif -static DECLARE_LOCK(ip_nat_seqofs_lock); +static spinlock_t ip_nat_seqofs_lock = SPIN_LOCK_UNLOCKED; /* Setup TCP sequence correction given this change at this sequence */ static inline void @@ -70,7 +66,7 @@ DEBUGP("ip_nat_resize_packet: Seq_offset before: "); DUMP_OFFSET(this_way); - LOCK_BH(&ip_nat_seqofs_lock); + spin_lock_bh(&ip_nat_seqofs_lock); /* SYN adjust. If it's uninitialized, or this is after last * correction, record it: we don't handle more than one @@ -82,7 +78,7 @@ this_way->offset_before = this_way->offset_after; this_way->offset_after += sizediff; } - UNLOCK_BH(&ip_nat_seqofs_lock); + spin_unlock_bh(&ip_nat_seqofs_lock); DEBUGP("ip_nat_resize_packet: Seq_offset after: "); DUMP_OFFSET(this_way); Index: linux-2.6.11-rc1-bk5-Netfilter/net/ipv4/netfilter/ip_conntrack_standalone.c =================================================================== --- linux-2.6.11-rc1-bk5-Netfilter.orig/net/ipv4/netfilter/ip_conntrack_standalone.c 2005-01-18 18:52:46.000000000 +1100 +++ linux-2.6.11-rc1-bk5-Netfilter/net/ipv4/netfilter/ip_conntrack_standalone.c 2005-01-19 11:25:34.000000000 +1100 @@ -22,20 +22,14 @@ #include #include #include -#ifdef CONFIG_SYSCTL #include -#endif #include #include -#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_conntrack_lock) -#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_conntrack_lock) - #include #include #include #include -#include #if 0 #define DEBUGP printk @@ -103,8 +97,6 @@ const struct ip_conntrack *conntrack = tuplehash_to_ctrack(hash); struct ip_conntrack_protocol *proto; - MUST_BE_READ_LOCKED(&ip_conntrack_lock); - IP_NF_ASSERT(conntrack); /* we only want to print DIR_ORIGINAL */ @@ -161,14 +153,16 @@ static int ct_seq_show(struct seq_file *s, void *v) { struct list_head *list = v; + struct ip_conntrack_tuple_hash *i; int ret = 0; /* FIXME: Simply truncates if hash chain too long. */ - READ_LOCK(&ip_conntrack_lock); - if (LIST_FIND(list, ct_seq_real_show, - struct ip_conntrack_tuple_hash *, s)) - ret = -ENOSPC; - READ_UNLOCK(&ip_conntrack_lock); + read_lock_bh(&ip_conntrack_lock); + list_for_each_entry(i, list, list) { + if (ct_seq_real_show(i, s)) + ret = -ENOSPC; + } + read_unlock_bh(&ip_conntrack_lock); return ret; } @@ -200,7 +194,7 @@ /* strange seq_file api calls stop even if we fail, * thus we need to grab lock since stop unlocks */ - READ_LOCK(&ip_conntrack_lock); + read_lock_bh(&ip_conntrack_lock); if (list_empty(e)) return NULL; @@ -227,7 +221,7 @@ static void exp_seq_stop(struct seq_file *s, void *v) { - READ_UNLOCK(&ip_conntrack_lock); + read_unlock_bh(&ip_conntrack_lock); } static int exp_seq_show(struct seq_file *s, void *v) @@ -850,22 +844,22 @@ { int ret = 0; - WRITE_LOCK(&ip_conntrack_lock); + write_lock_bh(&ip_conntrack_lock); if (ip_ct_protos[proto->proto] != &ip_conntrack_generic_protocol) { ret = -EBUSY; goto out; } ip_ct_protos[proto->proto] = proto; out: - WRITE_UNLOCK(&ip_conntrack_lock); + write_unlock_bh(&ip_conntrack_lock); return ret; } void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto) { - WRITE_LOCK(&ip_conntrack_lock); + write_lock_bh(&ip_conntrack_lock); ip_ct_protos[proto->proto] = &ip_conntrack_generic_protocol; - WRITE_UNLOCK(&ip_conntrack_lock); + write_unlock_bh(&ip_conntrack_lock); /* Somebody could be still looking at the proto in bh. */ synchronize_net(); Index: linux-2.6.11-rc1-bk5-Netfilter/net/ipv4/netfilter/ip_tables.c =================================================================== --- linux-2.6.11-rc1-bk5-Netfilter.orig/net/ipv4/netfilter/ip_tables.c 2005-01-18 20:24:50.000000000 +1100 +++ linux-2.6.11-rc1-bk5-Netfilter/net/ipv4/netfilter/ip_tables.c 2005-01-19 11:29:21.000000000 +1100 @@ -64,18 +64,6 @@ static DECLARE_MUTEX(ipt_mutex); -/* Must have mutex */ -#define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0) -#define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0) -#include -#include - -#if 0 -/* All the better to debug you with... */ -#define static -#define inline -#endif - /* We keep a set of rules for each CPU, so we can avoid write-locking them in the softirq when updating the counters and therefore @@ -252,6 +240,7 @@ static inline struct ipt_entry * get_entry(void *base, unsigned int offset) { + __check_lock(base, 1); return (struct ipt_entry *)(base + offset); } @@ -1425,7 +1414,7 @@ ipt_unregister_target(struct ipt_target *target) { down(&ipt_mutex); - LIST_DELETE(&ipt_target, target); + list_del(&target->list); up(&ipt_mutex); } @@ -1448,7 +1437,7 @@ ipt_unregister_match(struct ipt_match *match) { down(&ipt_mutex); - LIST_DELETE(&ipt_match, match); + list_del(&match->list); up(&ipt_mutex); } @@ -1456,6 +1445,7 @@ { int ret; struct ipt_table_info *newinfo; + const struct ipt_table *i; static struct ipt_table_info bootstrap = { 0, 0, 0, { 0 }, { 0 }, { } }; @@ -1483,9 +1473,11 @@ } /* Don't autoload: we'd eat our tail... */ - if (list_named_find(&ipt_tables, table->name)) { - ret = -EEXIST; - goto free_unlock; + list_for_each_entry(i, &ipt_tables, list) { + if (strcmp(i->name, table->name) == 0) { + ret = -EEXIST; + goto free_unlock; + } } /* Simplifies replace_table code. */ @@ -1500,7 +1492,7 @@ table->private->initial_entries = table->private->number; rwlock_init(&table->lock); - list_prepend(&ipt_tables, table); + list_add(&table->list, &ipt_tables); unlock: up(&ipt_mutex); @@ -1514,7 +1506,7 @@ void ipt_unregister_table(struct ipt_table *table) { down(&ipt_mutex); - LIST_DELETE(&ipt_tables, table); + list_del(&table->list); up(&ipt_mutex); /* Decrease module usage counts and free resources */ @@ -1810,44 +1802,35 @@ }; #ifdef CONFIG_PROC_FS -static inline int print_name(const char *i, +static inline int print_name(const char *name, off_t start_offset, char *buffer, int length, off_t *pos, unsigned int *count) { if ((*count)++ >= start_offset) { unsigned int namelen; - namelen = sprintf(buffer + *pos, "%s\n", - i + sizeof(struct list_head)); + namelen = sprintf(buffer + *pos, "%s\n", name); if (*pos + namelen > length) { /* Stop iterating */ - return 1; + return 0; } *pos += namelen; } - return 0; -} - -static inline int print_target(const struct ipt_target *t, - off_t start_offset, char *buffer, int length, - off_t *pos, unsigned int *count) -{ - if (t == &ipt_standard_target || t == &ipt_error_target) - return 0; - return print_name((char *)t, start_offset, buffer, length, pos, count); + return 1; } static int ipt_get_tables(char *buffer, char **start, off_t offset, int length) { off_t pos = 0; unsigned int count = 0; + const struct ipt_table *t; if (down_interruptible(&ipt_mutex) != 0) return 0; - LIST_FIND(&ipt_tables, print_name, void *, - offset, buffer, length, &pos, &count); - + list_for_each_entry(t, &ipt_tables, list) + if (!print_name(t->name, offset, buffer, length, &pos, &count)) + break; up(&ipt_mutex); /* `start' hack - see fs/proc/generic.c line ~105 */ @@ -1859,13 +1842,18 @@ { off_t pos = 0; unsigned int count = 0; + struct ipt_target *t; if (down_interruptible(&ipt_mutex) != 0) return 0; - LIST_FIND(&ipt_target, print_target, struct ipt_target *, - offset, buffer, length, &pos, &count); - + list_for_each_entry(t, &ipt_target, list) { + if (t == &ipt_standard_target || t == &ipt_error_target) + continue; + + if (!print_name(t->name, offset, buffer, length, &pos, &count)) + break; + } up(&ipt_mutex); *start = (char *)((unsigned long)count - offset); @@ -1876,13 +1864,15 @@ { off_t pos = 0; unsigned int count = 0; + struct ipt_match *m; if (down_interruptible(&ipt_mutex) != 0) return 0; - - LIST_FIND(&ipt_match, print_name, void *, - offset, buffer, length, &pos, &count); + list_for_each_entry(m, &ipt_match, list) { + if (!print_name(m->name, offset, buffer, length, &pos, &count)) + break; + } up(&ipt_mutex); *start = (char *)((unsigned long)count - offset); @@ -1900,13 +1890,21 @@ { int ret; + /* Mutex covers all the linked lists. */ + register_check_lock(&ipt_target, sizeof(ipt_target), __check_mutex, + &ipt_mutex); + register_check_lock(&ipt_match, sizeof(ipt_match), __check_mutex, + &ipt_mutex); + register_check_lock(&ipt_tables, sizeof(ipt_tables), __check_mutex, + &ipt_mutex); + /* Noone else will be downing sem now, so we won't sleep */ down(&ipt_mutex); - list_append(&ipt_target, &ipt_standard_target); - list_append(&ipt_target, &ipt_error_target); - list_append(&ipt_match, &tcp_matchstruct); - list_append(&ipt_match, &udp_matchstruct); - list_append(&ipt_match, &icmp_matchstruct); + list_add(&ipt_standard_target.list, &ipt_target); + list_add(&ipt_error_target.list, &ipt_target); + list_add(&tcp_matchstruct.list, &ipt_match); + list_add(&udp_matchstruct.list, &ipt_match); + list_add(&icmp_matchstruct.list, &ipt_match); up(&ipt_mutex); /* Register setsockopt */ @@ -1949,6 +1947,9 @@ proc_net_remove(ipt_proc_entry[i].name); } #endif + unregister_check_lock(&ipt_target); + unregister_check_lock(&ipt_match); + unregister_check_lock(&ipt_tables); } EXPORT_SYMBOL(ipt_register_table); Index: linux-2.6.11-rc1-bk5-Netfilter/net/ipv4/netfilter/ip_nat_rule.c =================================================================== --- linux-2.6.11-rc1-bk5-Netfilter.orig/net/ipv4/netfilter/ip_nat_rule.c 2005-01-18 18:52:48.000000000 +1100 +++ linux-2.6.11-rc1-bk5-Netfilter/net/ipv4/netfilter/ip_nat_rule.c 2005-01-18 22:20:38.000000000 +1100 @@ -15,18 +15,14 @@ #include #include #include +#include +#include #include #include -#include - -#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock) -#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock) - #include #include #include #include -#include #if 0 #define DEBUGP printk Index: linux-2.6.11-rc1-bk5-Netfilter/include/linux/netfilter_ipv4/ip_nat.h =================================================================== --- linux-2.6.11-rc1-bk5-Netfilter.orig/include/linux/netfilter_ipv4/ip_nat.h 2005-01-18 18:52:48.000000000 +1100 +++ linux-2.6.11-rc1-bk5-Netfilter/include/linux/netfilter_ipv4/ip_nat.h 2005-01-19 10:34:51.000000000 +1100 @@ -50,7 +50,6 @@ #ifdef __KERNEL__ #include -#include /* The structure embedded in the conntrack structure. */ struct ip_nat_info Index: linux-2.6.11-rc1-bk5-Netfilter/net/ipv4/netfilter/ipt_CLUSTERIP.c =================================================================== --- linux-2.6.11-rc1-bk5-Netfilter.orig/net/ipv4/netfilter/ipt_CLUSTERIP.c 2005-01-18 18:52:45.000000000 +1100 +++ linux-2.6.11-rc1-bk5-Netfilter/net/ipv4/netfilter/ipt_CLUSTERIP.c 2005-01-19 14:43:06.000000000 +1100 @@ -66,7 +66,7 @@ /* clusterip_lock protects the clusterip_configs list _AND_ the configurable * data within all structurses (num_local_nodes, local_nodes[]) */ -static DECLARE_RWLOCK(clusterip_lock); +static rwlock_t clusterip_lock; #ifdef CONFIG_PROC_FS static struct file_operations clusterip_proc_fops; @@ -81,9 +81,9 @@ static inline void clusterip_config_put(struct clusterip_config *c) { if (atomic_dec_and_test(&c->refcount)) { - WRITE_LOCK(&clusterip_lock); + write_lock_bh(&clusterip_lock); list_del(&c->list); - WRITE_UNLOCK(&clusterip_lock); + write_unlock_bh(&clusterip_lock); dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0); dev_put(c->dev); kfree(c); @@ -94,12 +94,9 @@ static struct clusterip_config * __clusterip_config_find(u_int32_t clusterip) { - struct list_head *pos; + struct clusterip_config *c; - MUST_BE_READ_LOCKED(&clusterip_lock); - list_for_each(pos, &clusterip_configs) { - struct clusterip_config *c = list_entry(pos, - struct clusterip_config, list); + list_for_each_entry(c, &clusterip_configs, list) { if (c->clusterip == clusterip) { return c; } @@ -113,14 +110,14 @@ { struct clusterip_config *c; - READ_LOCK(&clusterip_lock); + read_lock_bh(&clusterip_lock); c = __clusterip_config_find(clusterip); if (!c) { - READ_UNLOCK(&clusterip_lock); + read_unlock_bh(&clusterip_lock); return NULL; } atomic_inc(&c->refcount); - READ_UNLOCK(&clusterip_lock); + read_unlock_bh(&clusterip_lock); return c; } @@ -159,9 +156,9 @@ c->pde->data = c; #endif - WRITE_LOCK(&clusterip_lock); + write_lock_bh(&clusterip_lock); list_add(&c->list, &clusterip_configs); - WRITE_UNLOCK(&clusterip_lock); + write_unlock_bh(&clusterip_lock); return c; } @@ -171,25 +168,25 @@ { int i; - WRITE_LOCK(&clusterip_lock); + write_lock_bh(&clusterip_lock); if (c->num_local_nodes >= CLUSTERIP_MAX_NODES || nodenum > CLUSTERIP_MAX_NODES) { - WRITE_UNLOCK(&clusterip_lock); + write_unlock_bh(&clusterip_lock); return 1; } /* check if we alrady have this number in our array */ for (i = 0; i < c->num_local_nodes; i++) { if (c->local_nodes[i] == nodenum) { - WRITE_UNLOCK(&clusterip_lock); + write_unlock_bh(&clusterip_lock); return 1; } } c->local_nodes[c->num_local_nodes++] = nodenum; - WRITE_UNLOCK(&clusterip_lock); + write_unlock_bh(&clusterip_lock); return 0; } @@ -198,10 +195,10 @@ { int i; - WRITE_LOCK(&clusterip_lock); + write_lock_bh(&clusterip_lock); if (c->num_local_nodes <= 1 || nodenum > CLUSTERIP_MAX_NODES) { - WRITE_UNLOCK(&clusterip_lock); + write_unlock_bh(&clusterip_lock); return 1; } @@ -210,12 +207,12 @@ int size = sizeof(u_int16_t)*(c->num_local_nodes-(i+1)); memmove(&c->local_nodes[i], &c->local_nodes[i+1], size); c->num_local_nodes--; - WRITE_UNLOCK(&clusterip_lock); + write_unlock_bh(&clusterip_lock); return 0; } } - WRITE_UNLOCK(&clusterip_lock); + write_unlock_bh(&clusterip_lock); return 1; } @@ -285,21 +282,21 @@ { int i; - READ_LOCK(&clusterip_lock); + read_lock_bh(&clusterip_lock); if (config->num_local_nodes == 0) { - READ_UNLOCK(&clusterip_lock); + read_unlock_bh(&clusterip_lock); return 0; } for (i = 0; i < config->num_local_nodes; i++) { if (config->local_nodes[i] == hash) { - READ_UNLOCK(&clusterip_lock); + read_unlock_bh(&clusterip_lock); return 1; } } - READ_UNLOCK(&clusterip_lock); + read_unlock_bh(&clusterip_lock); return 0; } @@ -577,7 +574,7 @@ struct clusterip_config *c = pde->data; unsigned int *nodeidx; - READ_LOCK(&clusterip_lock); + read_lock_bh(&clusterip_lock); if (*pos >= c->num_local_nodes) return NULL; @@ -607,7 +604,7 @@ { kfree(v); - READ_UNLOCK(&clusterip_lock); + read_unlock_bh(&clusterip_lock); } static int clusterip_seq_show(struct seq_file *s, void *v) @@ -727,6 +724,8 @@ } #endif /* CONFIG_PROC_FS */ + register_check_lock(&clusterip_configs, sizeof(clusterip_configs), + __check_spinlock, &clusterip_lock); printk(KERN_NOTICE "ClusterIP Version %s loaded successfully\n", CLUSTERIP_VERSION); @@ -735,6 +734,7 @@ cleanup: printk(KERN_NOTICE "ClusterIP Version %s unloading\n", CLUSTERIP_VERSION); + unregister_check_lock(&clusterip_configs); #ifdef CONFIG_PROC_FS remove_proc_entry(clusterip_procdir->name, clusterip_procdir->parent); #endif Index: linux-2.6.11-rc1-bk5-Netfilter/net/ipv4/netfilter/ip_nat_standalone.c =================================================================== --- linux-2.6.11-rc1-bk5-Netfilter.orig/net/ipv4/netfilter/ip_nat_standalone.c 2005-01-18 18:52:52.000000000 +1100 +++ linux-2.6.11-rc1-bk5-Netfilter/net/ipv4/netfilter/ip_nat_standalone.c 2005-01-18 22:21:21.000000000 +1100 @@ -29,11 +29,6 @@ #include #include #include -#include - -#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock) -#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock) - #include #include #include @@ -41,7 +36,6 @@ #include #include #include -#include #if 0 #define DEBUGP printk @@ -321,7 +315,6 @@ cleanup_rule_init: ip_nat_rule_cleanup(); cleanup_nothing: - MUST_BE_READ_WRITE_UNLOCKED(&ip_nat_lock); return ret; } Index: linux-2.6.11-rc1-bk5-Netfilter/net/ipv4/netfilter/ipt_helper.c =================================================================== --- linux-2.6.11-rc1-bk5-Netfilter.orig/net/ipv4/netfilter/ipt_helper.c 2005-01-18 18:52:43.000000000 +1100 +++ linux-2.6.11-rc1-bk5-Netfilter/net/ipv4/netfilter/ipt_helper.c 2005-01-19 11:27:40.000000000 +1100 @@ -53,7 +53,7 @@ return ret; } - READ_LOCK(&ip_conntrack_lock); + read_lock_bh(&ip_conntrack_lock); if (!ct->master->helper) { DEBUGP("ipt_helper: master ct %p has no helper\n", exp->expectant); @@ -69,7 +69,7 @@ ret ^= !strncmp(ct->master->helper->name, info->name, strlen(ct->master->helper->name)); out_unlock: - READ_UNLOCK(&ip_conntrack_lock); + read_unlock_bh(&ip_conntrack_lock); return ret; } Index: linux-2.6.11-rc1-bk5-Netfilter/net/ipv4/netfilter/ipt_MASQUERADE.c =================================================================== --- linux-2.6.11-rc1-bk5-Netfilter.orig/net/ipv4/netfilter/ipt_MASQUERADE.c 2005-01-13 12:11:13.000000000 +1100 +++ linux-2.6.11-rc1-bk5-Netfilter/net/ipv4/netfilter/ipt_MASQUERADE.c 2005-01-19 11:29:49.000000000 +1100 @@ -33,7 +33,7 @@ #endif /* Lock protects masq region inside conntrack */ -static DECLARE_RWLOCK(masq_lock); +static rwlock_t masq_lock; /* FIXME: Multiple targets. --RR */ static int @@ -103,9 +103,9 @@ return NF_DROP; } - WRITE_LOCK(&masq_lock); + write_lock_bh(&masq_lock); ct->nat.masq_index = out->ifindex; - WRITE_UNLOCK(&masq_lock); + write_unlock_bh(&masq_lock); /* Transfer from original range. */ newrange = ((struct ip_nat_range) @@ -122,9 +122,9 @@ { int ret; - READ_LOCK(&masq_lock); + read_lock_bh(&masq_lock); ret = (i->nat.masq_index == (int)(long)ifindex); - READ_UNLOCK(&masq_lock); + read_unlock_bh(&masq_lock); return ret; } Index: linux-2.6.11-rc1-bk5-Netfilter/include/linux/netfilter_ipv4/ip_conntrack_core.h =================================================================== --- linux-2.6.11-rc1-bk5-Netfilter.orig/include/linux/netfilter_ipv4/ip_conntrack_core.h 2005-01-18 18:52:43.000000000 +1100 +++ linux-2.6.11-rc1-bk5-Netfilter/include/linux/netfilter_ipv4/ip_conntrack_core.h 2005-01-19 10:34:39.000000000 +1100 @@ -1,7 +1,7 @@ #ifndef _IP_CONNTRACK_CORE_H #define _IP_CONNTRACK_CORE_H #include -#include +#include /* This header is used to share core functionality between the standalone connection tracking module, and the compatibility layer's use @@ -47,6 +47,6 @@ extern struct list_head *ip_conntrack_hash; extern struct list_head ip_conntrack_expect_list; -DECLARE_RWLOCK_EXTERN(ip_conntrack_lock); +extern rwlock_t ip_conntrack_lock; #endif /* _IP_CONNTRACK_CORE_H */ Index: linux-2.6.11-rc1-bk5-Netfilter/net/ipv4/netfilter/ipt_hashlimit.c =================================================================== --- linux-2.6.11-rc1-bk5-Netfilter.orig/net/ipv4/netfilter/ipt_hashlimit.c 2005-01-18 18:52:50.000000000 +1100 +++ linux-2.6.11-rc1-bk5-Netfilter/net/ipv4/netfilter/ipt_hashlimit.c 2005-01-19 11:27:50.000000000 +1100 @@ -33,18 +33,9 @@ #include #include #include - -#define ASSERT_READ_LOCK(x) -#define ASSERT_WRITE_LOCK(x) -#include -#include - #include #include -/* FIXME: this is just for IP_NF_ASSERRT */ -#include - #define MS2JIFFIES(x) ((x*HZ)/1000) MODULE_LICENSE("GPL"); @@ -97,11 +88,11 @@ struct list_head hash[0]; /* hashtable itself */ }; -static DECLARE_RWLOCK(hashlimit_lock); /* protects htables list */ +static rwlock_t hashlimit_lock = RW_LOCK_UNLOCKED; /* protects htables list */ static LIST_HEAD(hashlimit_htables); static kmem_cache_t *hashlimit_cachep; -static inline int dst_cmp(const struct dsthash_ent *ent, struct dsthash_dst *b) +static inline int dst_eq(const struct dsthash_ent *ent, struct dsthash_dst *b) { return (ent->dst.dst_ip == b->dst_ip && ent->dst.dst_port == b->dst_port @@ -119,10 +110,12 @@ static inline struct dsthash_ent * __dsthash_find(const struct ipt_hashlimit_htable *ht, struct dsthash_dst *dst) { - struct dsthash_ent *ent; - u_int32_t hash = hash_dst(ht, dst); - ent = LIST_FIND(&ht->hash[hash], dst_cmp, struct dsthash_ent *, dst); - return ent; + struct dsthash_ent *i; + + list_for_each_entry(i, &ht->hash[hash_dst(ht, dst)], list) + if (dst_eq(i, dst)) + return i; + return NULL; } /* allocate dsthash_ent, initialize dst, put in htable and lock it */ @@ -229,9 +222,14 @@ hinfo->timer.function = htable_gc; add_timer(&hinfo->timer); - WRITE_LOCK(&hashlimit_lock); + /* Lock protects entire hash table. */ + register_check_lock(hinfo->hash, + hinfo->cfg.size * sizeof(hinfo->hash[0]), + __check_spinlock, &hinfo->lock); + + write_lock_bh(&hashlimit_lock); list_add(&hinfo->list, &hashlimit_htables); - WRITE_UNLOCK(&hashlimit_lock); + write_unlock_bh(&hashlimit_lock); return 0; } @@ -252,7 +250,7 @@ { int i; - IP_NF_ASSERT(ht->cfg.size && ht->cfg.max); + BUG_ON(!ht->cfg.size || !ht->cfg.max); /* lock hash table and iterate over it */ spin_lock_bh(&ht->lock); @@ -288,6 +286,7 @@ remove_proc_entry(hinfo->pde->name, hashlimit_procdir); htable_selective_cleanup(hinfo, select_all); + unregister_check_lock(hinfo->hash); vfree(hinfo); } @@ -295,15 +294,15 @@ { struct ipt_hashlimit_htable *hinfo; - READ_LOCK(&hashlimit_lock); + read_lock_bh(&hashlimit_lock); list_for_each_entry(hinfo, &hashlimit_htables, list) { if (!strcmp(name, hinfo->pde->name)) { atomic_inc(&hinfo->use); - READ_UNLOCK(&hashlimit_lock); + read_unlock_bh(&hashlimit_lock); return hinfo; } } - READ_UNLOCK(&hashlimit_lock); + read_unlock_bh(&hashlimit_lock); return NULL; } @@ -311,9 +310,9 @@ static void htable_put(struct ipt_hashlimit_htable *hinfo) { if (atomic_dec_and_test(&hinfo->use)) { - WRITE_LOCK(&hashlimit_lock); + write_lock_bh(&hashlimit_lock); list_del(&hinfo->list); - WRITE_UNLOCK(&hashlimit_lock); + write_unlock_bh(&hashlimit_lock); htable_destroy(hinfo); } } @@ -621,12 +620,12 @@ struct proc_dir_entry *pde = s->private; struct ipt_hashlimit_htable *htable = pde->data; unsigned int *bucket = (unsigned int *)v; + struct dsthash_ent *i; - if (LIST_FIND_W(&htable->hash[*bucket], dl_seq_real_show, - struct dsthash_ent *, s)) { - /* buffer was filled and unable to print that tuple */ - return 1; - } + list_for_each_entry(i, &htable->hash[*bucket], list) + if (dl_seq_real_show(i, s)) + /* buffer was filled and unable to print that tuple */ + return 1; return 0; } @@ -684,9 +683,12 @@ goto cleanup_free_slab; } + register_check_lock(&hashlimit_htables, sizeof(hashlimit_htables), + __check_rwlock, &hashlimit_lock); return ret; cleanup: + unregister_check_lock(&hashlimit_htables); remove_proc_entry("ipt_hashlimit", proc_net); cleanup_free_slab: kmem_cache_destroy(hashlimit_cachep); Index: linux-2.6.11-rc1-bk5-Netfilter/net/ipv4/netfilter/ip_conntrack_core.c =================================================================== --- linux-2.6.11-rc1-bk5-Netfilter.orig/net/ipv4/netfilter/ip_conntrack_core.c 2005-01-18 18:52:50.000000000 +1100 +++ linux-2.6.11-rc1-bk5-Netfilter/net/ipv4/netfilter/ip_conntrack_core.c 2005-01-19 10:45:09.000000000 +1100 @@ -38,16 +38,10 @@ #include #include -/* This rwlock protects the main hash table, protocol/helper/expected - registrations, conntrack timers*/ -#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_conntrack_lock) -#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_conntrack_lock) - #include #include #include #include -#include #define IP_CONNTRACK_VERSION "2.1" @@ -57,7 +51,7 @@ #define DEBUGP(format, args...) #endif -DECLARE_RWLOCK(ip_conntrack_lock); +rwlock_t ip_conntrack_lock = RW_LOCK_UNLOCKED; /* ip_conntrack_standalone needs this */ atomic_t ip_conntrack_count = ATOMIC_INIT(0); @@ -147,7 +141,6 @@ static void unlink_expect(struct ip_conntrack_expect *exp) { - MUST_BE_WRITE_LOCKED(&ip_conntrack_lock); list_del(&exp->list); /* Logically in destroy_expect, but we hold the lock here. */ exp->master->expecting--; @@ -157,9 +150,9 @@ { struct ip_conntrack_expect *exp = (void *)ul_expect; - WRITE_LOCK(&ip_conntrack_lock); + write_lock_bh(&ip_conntrack_lock); unlink_expect(exp); - WRITE_UNLOCK(&ip_conntrack_lock); + write_unlock_bh(&ip_conntrack_lock); destroy_expect(exp); } @@ -206,15 +199,10 @@ static void clean_from_lists(struct ip_conntrack *ct) { - unsigned int ho, hr; - DEBUGP("clean_from_lists(%p)\n", ct); - MUST_BE_WRITE_LOCKED(&ip_conntrack_lock); - ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); - LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]); - LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]); + list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); + list_del(&ct->tuplehash[IP_CT_DIR_REPLY].list); /* Destroy all pending expectations */ remove_expectations(ct); @@ -240,7 +228,7 @@ if (ip_conntrack_destroyed) ip_conntrack_destroyed(ct); - WRITE_LOCK(&ip_conntrack_lock); + write_lock_bh(&ip_conntrack_lock); /* Expectations will have been removed in clean_from_lists, * except TFTP can create an expectation on the first packet, * before connection is in the list, so we need to clean here, @@ -254,7 +242,7 @@ } CONNTRACK_STAT_INC(delete); - WRITE_UNLOCK(&ip_conntrack_lock); + write_unlock_bh(&ip_conntrack_lock); if (ct->master) ip_conntrack_put(ct->master); @@ -268,12 +256,12 @@ { struct ip_conntrack *ct = (void *)ul_conntrack; - WRITE_LOCK(&ip_conntrack_lock); + write_lock_bh(&ip_conntrack_lock); /* Inside lock so preempt is disabled on module removal path. * Otherwise we can get spurious warnings. */ CONNTRACK_STAT_INC(delete_list); clean_from_lists(ct); - WRITE_UNLOCK(&ip_conntrack_lock); + write_unlock_bh(&ip_conntrack_lock); ip_conntrack_put(ct); } @@ -282,7 +270,6 @@ const struct ip_conntrack_tuple *tuple, const struct ip_conntrack *ignored_conntrack) { - MUST_BE_READ_LOCKED(&ip_conntrack_lock); return tuplehash_to_ctrack(i) != ignored_conntrack && ip_ct_tuple_equal(tuple, &i->tuple); } @@ -294,7 +281,6 @@ struct ip_conntrack_tuple_hash *h; unsigned int hash = hash_conntrack(tuple); - MUST_BE_READ_LOCKED(&ip_conntrack_lock); list_for_each_entry(h, &ip_conntrack_hash[hash], list) { if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) { CONNTRACK_STAT_INC(found); @@ -313,15 +299,27 @@ { struct ip_conntrack_tuple_hash *h; - READ_LOCK(&ip_conntrack_lock); + read_lock_bh(&ip_conntrack_lock); h = __ip_conntrack_find(tuple, ignored_conntrack); if (h) atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use); - READ_UNLOCK(&ip_conntrack_lock); + read_unlock_bh(&ip_conntrack_lock); return h; } +static inline int already_in_list(struct list_head *head, + const struct ip_conntrack_tuple *tuple) +{ + struct ip_conntrack_tuple_hash *i; + + list_for_each_entry(i, head, list) + if (ip_ct_tuple_equal(tuple, &i->tuple)) + return 1; + + return 0; +} + /* Confirm a connection given skb; places it in hash table */ int __ip_conntrack_confirm(struct sk_buff **pskb) @@ -352,26 +350,22 @@ IP_NF_ASSERT(!is_confirmed(ct)); DEBUGP("Confirming conntrack %p\n", ct); - WRITE_LOCK(&ip_conntrack_lock); + write_lock_bh(&ip_conntrack_lock); /* See if there's one in the list already, including reverse: NAT could have grabbed it without realizing, since we're not in the hash. If there is, we lost race. */ - if (!LIST_FIND(&ip_conntrack_hash[hash], - conntrack_tuple_cmp, - struct ip_conntrack_tuple_hash *, - &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL) - && !LIST_FIND(&ip_conntrack_hash[repl_hash], - conntrack_tuple_cmp, - struct ip_conntrack_tuple_hash *, - &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) { + if (!already_in_list(&ip_conntrack_hash[hash], + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple) + && !already_in_list(&ip_conntrack_hash[repl_hash], + &ct->tuplehash[IP_CT_DIR_REPLY].tuple)) { /* Remove from unconfirmed list */ list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); - list_prepend(&ip_conntrack_hash[hash], - &ct->tuplehash[IP_CT_DIR_ORIGINAL]); - list_prepend(&ip_conntrack_hash[repl_hash], - &ct->tuplehash[IP_CT_DIR_REPLY]); + list_add(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list, + &ip_conntrack_hash[hash]); + list_add(&ct->tuplehash[IP_CT_DIR_REPLY].list, + &ip_conntrack_hash[repl_hash]); /* Timer relative to confirmation time, not original setting time, otherwise we'd get timer wrap in weird delay cases. */ @@ -380,12 +374,12 @@ atomic_inc(&ct->ct_general.use); set_bit(IPS_CONFIRMED_BIT, &ct->status); CONNTRACK_STAT_INC(insert); - WRITE_UNLOCK(&ip_conntrack_lock); + write_unlock_bh(&ip_conntrack_lock); return NF_ACCEPT; } CONNTRACK_STAT_INC(insert_failed); - WRITE_UNLOCK(&ip_conntrack_lock); + write_unlock_bh(&ip_conntrack_lock); return NF_DROP; } @@ -398,58 +392,48 @@ { struct ip_conntrack_tuple_hash *h; - READ_LOCK(&ip_conntrack_lock); + read_lock_bh(&ip_conntrack_lock); h = __ip_conntrack_find(tuple, ignored_conntrack); - READ_UNLOCK(&ip_conntrack_lock); + read_unlock_bh(&ip_conntrack_lock); return h != NULL; } -/* There's a small race here where we may free a just-assured - connection. Too bad: we're in trouble anyway. */ -static inline int unreplied(const struct ip_conntrack_tuple_hash *i) -{ - return !(test_bit(IPS_ASSURED_BIT, &tuplehash_to_ctrack(i)->status)); -} - static int early_drop(struct list_head *chain) { /* Traverse backwards: gives us oldest, which is roughly LRU */ struct ip_conntrack_tuple_hash *h; - struct ip_conntrack *ct = NULL; - int dropped = 0; + struct ip_conntrack *ct; - READ_LOCK(&ip_conntrack_lock); - h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *); - if (h) { + read_lock_bh(&ip_conntrack_lock); + list_for_each_entry_reverse(h, chain, list) { ct = tuplehash_to_ctrack(h); - atomic_inc(&ct->ct_general.use); + if (!test_bit(IPS_ASSURED_BIT, &ct->status) + && del_timer(&ct->timeout)) + goto delete; } - READ_UNLOCK(&ip_conntrack_lock); - - if (!ct) - return dropped; + read_unlock_bh(&ip_conntrack_lock); + return 0; - if (del_timer(&ct->timeout)) { - death_by_timeout((unsigned long)ct); - dropped = 1; - CONNTRACK_STAT_INC(early_drop); - } +delete: + atomic_inc(&ct->ct_general.use); + read_unlock_bh(&ip_conntrack_lock); + death_by_timeout((unsigned long)ct); + CONNTRACK_STAT_INC(early_drop); ip_conntrack_put(ct); - return dropped; + return 1; } -static inline int helper_cmp(const struct ip_conntrack_helper *i, - const struct ip_conntrack_tuple *rtuple) +static struct ip_conntrack_helper * +ip_ct_find_helper(const struct ip_conntrack_tuple *tuple) { - return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask); -} + struct ip_conntrack_helper *i; -static struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_tuple *tuple) -{ - return LIST_FIND(&helpers, helper_cmp, - struct ip_conntrack_helper *, - tuple); + list_for_each_entry(i, &helpers, list) { + if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) + return i; + } + return NULL; } /* Allocate a new conntrack: we return -ENOMEM if classification @@ -508,7 +492,7 @@ conntrack->timeout.data = (unsigned long)conntrack; conntrack->timeout.function = death_by_timeout; - WRITE_LOCK(&ip_conntrack_lock); + write_lock_bh(&ip_conntrack_lock); exp = find_expectation(tuple); if (exp) { @@ -532,7 +516,7 @@ list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed); atomic_inc(&ip_conntrack_count); - WRITE_UNLOCK(&ip_conntrack_lock); + write_unlock_bh(&ip_conntrack_lock); if (exp) { if (exp->expectfn) @@ -723,17 +707,17 @@ { struct ip_conntrack_expect *i; - WRITE_LOCK(&ip_conntrack_lock); + write_lock_bh(&ip_conntrack_lock); /* choose the the oldest expectation to evict */ list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) { if (expect_matches(i, exp) && del_timer(&i->timeout)) { unlink_expect(i); - WRITE_UNLOCK(&ip_conntrack_lock); + write_unlock_bh(&ip_conntrack_lock); destroy_expect(i); return; } } - WRITE_UNLOCK(&ip_conntrack_lock); + write_unlock_bh(&ip_conntrack_lock); } struct ip_conntrack_expect *ip_conntrack_expect_alloc(void) @@ -808,7 +792,7 @@ DEBUGP("tuple: "); DUMP_TUPLE(&expect->tuple); DEBUGP("mask: "); DUMP_TUPLE(&expect->mask); - WRITE_LOCK(&ip_conntrack_lock); + write_lock_bh(&ip_conntrack_lock); list_for_each_entry(i, &ip_conntrack_expect_list, list) { if (expect_matches(i, expect)) { /* Refresh timer: if it's dying, ignore.. */ @@ -832,7 +816,7 @@ ip_conntrack_expect_insert(expect); ret = 0; out: - WRITE_UNLOCK(&ip_conntrack_lock); + write_unlock_bh(&ip_conntrack_lock); return ret; } @@ -841,7 +825,7 @@ void ip_conntrack_alter_reply(struct ip_conntrack *conntrack, const struct ip_conntrack_tuple *newreply) { - WRITE_LOCK(&ip_conntrack_lock); + write_lock_bh(&ip_conntrack_lock); /* Should be unconfirmed, so not in hash table yet */ IP_NF_ASSERT(!is_confirmed(conntrack)); @@ -851,25 +835,26 @@ conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; if (!conntrack->master && conntrack->expecting == 0) conntrack->helper = ip_ct_find_helper(newreply); - WRITE_UNLOCK(&ip_conntrack_lock); + write_unlock_bh(&ip_conntrack_lock); } int ip_conntrack_helper_register(struct ip_conntrack_helper *me) { BUG_ON(me->timeout == 0); - WRITE_LOCK(&ip_conntrack_lock); - list_prepend(&helpers, me); - WRITE_UNLOCK(&ip_conntrack_lock); + write_lock_bh(&ip_conntrack_lock); + list_add(&me->list, &helpers); + write_unlock_bh(&ip_conntrack_lock); return 0; } -static inline int unhelp(struct ip_conntrack_tuple_hash *i, - const struct ip_conntrack_helper *me) +static void unhelp(struct list_head *head, struct ip_conntrack_helper *me) { - if (tuplehash_to_ctrack(i)->helper == me) - tuplehash_to_ctrack(i)->helper = NULL; - return 0; + struct ip_conntrack_tuple_hash *i; + + list_for_each_entry(i, &unconfirmed, list) + if (tuplehash_to_ctrack(i)->helper == me) + tuplehash_to_ctrack(i)->helper = NULL; } void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me) @@ -878,8 +863,8 @@ struct ip_conntrack_expect *exp, *tmp; /* Need write lock here, to delete helper. */ - WRITE_LOCK(&ip_conntrack_lock); - LIST_DELETE(&helpers, me); + write_lock_bh(&ip_conntrack_lock); + list_del(&me->list); /* Get rid of expectations */ list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) { @@ -888,12 +873,11 @@ destroy_expect(exp); } } - /* Get rid of expecteds, set helpers to NULL. */ - LIST_FIND_W(&unconfirmed, unhelp, struct ip_conntrack_tuple_hash*, me); + /* Set helpers to NULL on connections. */ + unhelp(&unconfirmed, me); for (i = 0; i < ip_conntrack_htable_size; i++) - LIST_FIND_W(&ip_conntrack_hash[i], unhelp, - struct ip_conntrack_tuple_hash *, me); - WRITE_UNLOCK(&ip_conntrack_lock); + unhelp(&ip_conntrack_hash[i], me); + write_unlock_bh(&ip_conntrack_lock); /* Someone could be still looking at the helper in a bh. */ synchronize_net(); @@ -925,14 +909,14 @@ ct->timeout.expires = extra_jiffies; ct_add_counters(ct, ctinfo, skb); } else { - WRITE_LOCK(&ip_conntrack_lock); + write_lock_bh(&ip_conntrack_lock); /* Need del_timer for race avoidance (may already be dying). */ if (del_timer(&ct->timeout)) { ct->timeout.expires = jiffies + extra_jiffies; add_timer(&ct->timeout); } ct_add_counters(ct, ctinfo, skb); - WRITE_UNLOCK(&ip_conntrack_lock); + write_unlock_bh(&ip_conntrack_lock); } } @@ -1014,22 +998,23 @@ get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data), void *data, unsigned int *bucket) { - struct ip_conntrack_tuple_hash *h = NULL; - - WRITE_LOCK(&ip_conntrack_lock); + struct ip_conntrack_tuple_hash *h; + + write_lock_bh(&ip_conntrack_lock); for (; *bucket < ip_conntrack_htable_size; (*bucket)++) { - h = LIST_FIND_W(&ip_conntrack_hash[*bucket], do_iter, - struct ip_conntrack_tuple_hash *, iter, data); - if (h) - break; - } - if (!h) - h = LIST_FIND_W(&unconfirmed, do_iter, - struct ip_conntrack_tuple_hash *, iter, data); - if (h) - atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use); - WRITE_UNLOCK(&ip_conntrack_lock); - + list_for_each_entry(h, &ip_conntrack_hash[*bucket], list) + if (iter(tuplehash_to_ctrack(h), data)) + goto found; + } + list_for_each_entry(h, &unconfirmed, list) + if (iter(tuplehash_to_ctrack(h), data)) + goto found; + write_unlock_bh(&ip_conntrack_lock); + return NULL; + +found: + atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use); + write_unlock_bh(&ip_conntrack_lock); return h; } @@ -1220,17 +1205,21 @@ } /* Don't NEED lock here, but good form anyway. */ - WRITE_LOCK(&ip_conntrack_lock); + write_lock_bh(&ip_conntrack_lock); for (i = 0; i < MAX_IP_CT_PROTO; i++) ip_ct_protos[i] = &ip_conntrack_generic_protocol; /* Sew in builtin protocols. */ ip_ct_protos[IPPROTO_TCP] = &ip_conntrack_protocol_tcp; ip_ct_protos[IPPROTO_UDP] = &ip_conntrack_protocol_udp; ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp; - WRITE_UNLOCK(&ip_conntrack_lock); + write_unlock_bh(&ip_conntrack_lock); - for (i = 0; i < ip_conntrack_htable_size; i++) + for (i = 0; i < ip_conntrack_htable_size; i++) { + register_check_lock(&ip_conntrack_hash[i], + sizeof(ip_conntrack_hash[i]), + __check_rwlock, &ip_conntrack_lock); INIT_LIST_HEAD(&ip_conntrack_hash[i]); + } /* For use by ipt_REJECT */ ip_ct_attach = ip_conntrack_attach;