diff -urN linux-2.2.10-ac5/include/net/sock.h linux-2.2.10-ac6/include/net/sock.h --- linux-2.2.10-ac5/include/net/sock.h Sat Jul 17 01:17:51 1999 +++ linux-2.2.10-ac6/include/net/sock.h Sat Jul 17 01:18:28 1999 @@ -584,9 +584,7 @@ /* Keeping track of sk's, looking them up, and port selection methods. */ void (*hash)(struct sock *sk); void (*unhash)(struct sock *sk); - void (*rehash)(struct sock *sk); - unsigned short (*good_socknum)(void); - int (*verify_bind)(struct sock *sk, unsigned short snum); + int (*get_port)(struct sock *sk, unsigned short snum); unsigned short max_header; unsigned long retransmits; @@ -622,22 +620,26 @@ /* Some things in the kernel just want to get at a protocols * entire socket list commensurate, thus... */ +static __inline__ void __add_to_prot_sklist(struct sock *sk) +{ + struct proto *p = sk->prot; + + sk->sklist_prev = (struct sock *) p; + sk->sklist_next = p->sklist_next; + p->sklist_next->sklist_prev = sk; + p->sklist_next = sk; + + /* Charge the protocol. */ + sk->prot->inuse += 1; + if(sk->prot->highestinuse < sk->prot->inuse) + sk->prot->highestinuse = sk->prot->inuse; +} + static __inline__ void add_to_prot_sklist(struct sock *sk) { SOCKHASH_LOCK(); - if(!sk->sklist_next) { - struct proto *p = sk->prot; - - sk->sklist_prev = (struct sock *) p; - sk->sklist_next = p->sklist_next; - p->sklist_next->sklist_prev = sk; - p->sklist_next = sk; - - /* Charge the protocol. */ - sk->prot->inuse += 1; - if(sk->prot->highestinuse < sk->prot->inuse) - sk->prot->highestinuse = sk->prot->inuse; - } + if(!sk->sklist_next) + __add_to_prot_sklist(sk); SOCKHASH_UNLOCK(); } diff -urN linux-2.2.10-ac5/include/net/tcp.h linux-2.2.10-ac6/include/net/tcp.h --- linux-2.2.10-ac5/include/net/tcp.h Fri Jul 16 22:52:17 1999 +++ linux-2.2.10-ac6/include/net/tcp.h Sat Jul 17 01:18:28 1999 @@ -75,11 +75,7 @@ */ struct tcp_bind_bucket { unsigned short port; - unsigned short flags; -#define TCPB_FLAG_LOCKED 0x0001 -#define TCPB_FLAG_FASTREUSE 0x0002 -#define TCPB_FLAG_GOODSOCKNUM 0x0004 - + unsigned short fastreuse; struct tcp_bind_bucket *next; struct sock *owners; struct tcp_bind_bucket **pprev; @@ -116,32 +112,6 @@ return (lport & (TCP_BHTABLE_SIZE - 1)); } -static __inline__ void tcp_sk_bindify(struct sock *sk) -{ - struct tcp_bind_bucket *tb; - unsigned short snum = sk->num; - - for(tb = tcp_bound_hash[tcp_bhashfn(snum)]; tb->port != snum; tb = tb->next) - ; - /* Update bucket flags. */ - if(tb->owners == NULL) { - /* We're the first. */ - if(sk->reuse && sk->state != TCP_LISTEN) - tb->flags = TCPB_FLAG_FASTREUSE; - else - tb->flags = 0; - } else { - if((tb->flags & TCPB_FLAG_FASTREUSE) && - ((sk->reuse == 0) || (sk->state == TCP_LISTEN))) - tb->flags &= ~TCPB_FLAG_FASTREUSE; - } - if((sk->bind_next = tb->owners) != NULL) - tb->owners->bind_pprev = &sk->bind_next; - tb->owners = sk; - sk->bind_pprev = &tb->owners; - sk->prev = (struct sock *) tb; -} - /* This is a TIME_WAIT bucket. It works around the memory consumption * problems of sockets in such a state on heavily loaded servers, but * without violating the protocol specification. @@ -478,7 +448,9 @@ extern struct proto tcp_prot; extern struct tcp_mib tcp_statistics; -extern unsigned short tcp_good_socknum(void); +extern void tcp_put_port(struct sock *sk); +extern void __tcp_put_port(struct sock *sk); +extern void tcp_inherit_port(struct sock *sk, struct sock *child); extern void tcp_v4_err(struct sk_buff *skb, unsigned char *, int); @@ -631,8 +603,7 @@ #define TCP_SLT_SYNACK 0 #define TCP_SLT_KEEPALIVE 1 #define TCP_SLT_TWKILL 2 -#define TCP_SLT_BUCKETGC 3 -#define TCP_SLT_MAX 4 +#define TCP_SLT_MAX 3 extern struct tcp_sl_timer tcp_slt_array[TCP_SLT_MAX]; @@ -1068,17 +1039,6 @@ struct tcp_sl_timer *slt = &tcp_slt_array[timer]; atomic_dec(&slt->count); -} - -/* This needs to use a slow timer, so it is here. */ -static __inline__ void tcp_sk_unbindify(struct sock *sk) -{ - struct tcp_bind_bucket *tb = (struct tcp_bind_bucket *) sk->prev; - if(sk->bind_next) - sk->bind_next->bind_pprev = sk->bind_pprev; - *sk->bind_pprev = sk->bind_next; - if(tb->owners == NULL) - tcp_inc_slow_timer(TCP_SLT_BUCKETGC); } extern const char timer_bug_msg[]; diff -urN linux-2.2.10-ac5/include/net/udp.h linux-2.2.10-ac6/include/net/udp.h --- linux-2.2.10-ac5/include/net/udp.h Mon Jan 18 02:27:28 1999 +++ linux-2.2.10-ac6/include/net/udp.h Sat Jul 17 01:18:28 1999 @@ -32,10 +32,20 @@ */ extern struct sock *udp_hash[UDP_HTABLE_SIZE]; -extern unsigned short udp_good_socknum(void); - #define UDP_NO_CHECK 0 +extern int udp_port_rover; + +static inline int udp_lport_inuse(u16 num) +{ + struct sock *sk = udp_hash[num & (UDP_HTABLE_SIZE - 1)]; + + for(; sk != NULL; sk = sk->next) { + if(sk->num == num) + return 1; + } + return 0; +} extern struct proto udp_prot; diff -urN linux-2.2.10-ac5/net/ipv4/af_inet.c linux-2.2.10-ac6/net/ipv4/af_inet.c --- linux-2.2.10-ac5/net/ipv4/af_inet.c Tue Jul 13 00:33:23 1999 +++ linux-2.2.10-ac6/net/ipv4/af_inet.c Sat Jul 17 01:18:30 1999 @@ -5,7 +5,7 @@ * * PF_INET protocol family socket handler. * - * Version: $Id: af_inet.c,v 1.87.2.1 1999/05/29 04:32:01 davem Exp $ + * Version: $Id: af_inet.c,v 1.87.2.2 1999/06/20 20:14:39 davem Exp $ * * Authors: Ross Biro, * Fred N. van Kempen, @@ -262,8 +262,7 @@ { /* We may need to bind the socket. */ if (sk->num == 0) { - sk->num = sk->prot->good_socknum(); - if (sk->num == 0) + if (sk->prot->get_port(sk, 0) != 0) return(-EAGAIN); sk->sport = htons(sk->num); sk->prot->hash(sk); @@ -279,28 +278,38 @@ int inet_listen(struct socket *sock, int backlog) { struct sock *sk = sock->sk; + unsigned char old_state; if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM) return(-EINVAL); - if (inet_autobind(sk) != 0) - return -EAGAIN; - - /* We might as well re use these. */ if ((unsigned) backlog == 0) /* BSDism */ backlog = 1; if ((unsigned) backlog > SOMAXCONN) backlog = SOMAXCONN; sk->max_ack_backlog = backlog; - if (sk->state != TCP_LISTEN) { - sk->ack_backlog = 0; + + /* Really, if the socket is already in listen state + * we can only allow the backlog to be adjusted. + */ + old_state = sk->state; + if (old_state != TCP_LISTEN) { sk->state = TCP_LISTEN; + sk->ack_backlog = 0; + if (sk->num == 0) { + if (sk->prot->get_port(sk, 0) != 0) { + sk->state = old_state; + return -EAGAIN; + } + sk->sport = htons(sk->num); + add_to_prot_sklist(sk); + } + dst_release(xchg(&sk->dst_cache, NULL)); - sk->prot->rehash(sk); - add_to_prot_sklist(sk); + sk->prot->hash(sk); + sk->socket->flags |= SO_ACCEPTCON; } - sk->socket->flags |= SO_ACCEPTCON; - return(0); + return 0; } /* @@ -528,20 +537,17 @@ if((snum >= PORT_MASQ_BEGIN) && (snum <= PORT_MASQ_END)) return -EADDRINUSE; #endif - if (snum == 0) - snum = sk->prot->good_socknum(); - if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) + if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) return(-EACCES); /* Make sure we are allowed to bind here. */ - if(sk->prot->verify_bind(sk, snum)) + if (sk->prot->get_port(sk, snum) != 0) return -EADDRINUSE; - sk->num = snum; - sk->sport = htons(snum); + sk->sport = htons(sk->num); sk->daddr = 0; sk->dport = 0; - sk->prot->rehash(sk); + sk->prot->hash(sk); add_to_prot_sklist(sk); dst_release(sk->dst_cache); sk->dst_cache=NULL; @@ -611,11 +617,13 @@ if (flags & O_NONBLOCK) return -EALREADY; } else { + if (sk->prot->connect == NULL) + return(-EOPNOTSUPP); + /* We may need to bind the socket. */ if (inet_autobind(sk) != 0) return(-EAGAIN); - if (sk->prot->connect == NULL) - return(-EOPNOTSUPP); + err = sk->prot->connect(sk, uaddr, addr_len); /* Note: there is a theoretical race here when an wake up occurred before inet_wait_for_connect is entered. In 2.3 @@ -789,7 +797,7 @@ return sock_error(sk); /* We may need to bind the socket. */ - if(inet_autobind(sk) != 0) + if (inet_autobind(sk) != 0) return -EAGAIN; return sk->prot->sendmsg(sk, msg, size); diff -urN linux-2.2.10-ac5/net/ipv4/arp.c linux-2.2.10-ac6/net/ipv4/arp.c --- linux-2.2.10-ac5/net/ipv4/arp.c Wed Mar 24 01:53:52 1999 +++ linux-2.2.10-ac6/net/ipv4/arp.c Sat Jul 17 01:18:30 1999 @@ -1,6 +1,6 @@ /* linux/net/inet/arp.c * - * Version: $Id: arp.c,v 1.77 1999/03/21 05:22:30 davem Exp $ + * Version: $Id: arp.c,v 1.77.2.1 1999/06/28 10:39:23 davem Exp $ * * Copyright (C) 1994 by Florian La Roche * @@ -383,9 +383,9 @@ end_bh_atomic(); return 0; } + neigh_release(n); } else kfree_skb(skb); - neigh_release(n); end_bh_atomic(); return 1; } diff -urN linux-2.2.10-ac5/net/ipv4/raw.c linux-2.2.10-ac6/net/ipv4/raw.c --- linux-2.2.10-ac5/net/ipv4/raw.c Mon Jan 18 02:27:53 1999 +++ linux-2.2.10-ac6/net/ipv4/raw.c Sat Jul 17 01:18:30 1999 @@ -5,7 +5,7 @@ * * RAW - implementation of IP "raw" sockets. * - * Version: $Id: raw.c,v 1.39 1998/11/08 11:17:04 davem Exp $ + * Version: $Id: raw.c,v 1.39.2.1 1999/06/20 20:14:50 davem Exp $ * * Authors: Ross Biro, * Fred N. van Kempen, @@ -70,57 +70,25 @@ static void raw_v4_hash(struct sock *sk) { - struct sock **skp; - int num = sk->num; + struct sock **skp = &raw_v4_htable[sk->num & (RAWV4_HTABLE_SIZE - 1)]; - num &= (RAWV4_HTABLE_SIZE - 1); - skp = &raw_v4_htable[num]; SOCKHASH_LOCK(); - sk->next = *skp; + if ((sk->next = *skp) != NULL) + (*skp)->pprev = &sk->next; *skp = sk; - sk->hashent = num; + sk->pprev = skp; SOCKHASH_UNLOCK(); } static void raw_v4_unhash(struct sock *sk) { - struct sock **skp; - int num = sk->num; - - num &= (RAWV4_HTABLE_SIZE - 1); - skp = &raw_v4_htable[num]; - - SOCKHASH_LOCK(); - while(*skp != NULL) { - if(*skp == sk) { - *skp = sk->next; - break; - } - skp = &((*skp)->next); - } - SOCKHASH_UNLOCK(); -} - -static void raw_v4_rehash(struct sock *sk) -{ - struct sock **skp; - int num = sk->num; - int oldnum = sk->hashent; - - num &= (RAWV4_HTABLE_SIZE - 1); - skp = &raw_v4_htable[oldnum]; - SOCKHASH_LOCK(); - while(*skp != NULL) { - if(*skp == sk) { - *skp = sk->next; - break; - } - skp = &((*skp)->next); + if (sk->pprev) { + if (sk->next) + sk->next->pprev = sk->pprev; + *sk->pprev = sk->next; + sk->pprev = NULL; } - sk->next = raw_v4_htable[num]; - raw_v4_htable[num] = sk; - sk->hashent = num; SOCKHASH_UNLOCK(); } @@ -596,9 +564,7 @@ raw_rcv_skb, /* backlog_rcv */ raw_v4_hash, /* hash */ raw_v4_unhash, /* unhash */ - raw_v4_rehash, /* rehash */ - NULL, /* good_socknum */ - NULL, /* verify_bind */ + NULL, /* get_port */ 128, /* max_header */ 0, /* retransmits */ "RAW", /* name */ diff -urN linux-2.2.10-ac5/net/ipv4/tcp_input.c linux-2.2.10-ac6/net/ipv4/tcp_input.c --- linux-2.2.10-ac5/net/ipv4/tcp_input.c Tue Jul 13 00:33:23 1999 +++ linux-2.2.10-ac6/net/ipv4/tcp_input.c Sat Jul 17 01:18:31 1999 @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_input.c,v 1.164.2.3 1999/06/02 04:15:06 davem Exp $ + * Version: $Id: tcp_input.c,v 1.164.2.5 1999/06/30 09:27:05 davem Exp $ * * Authors: Ross Biro, * Fred N. van Kempen, @@ -916,13 +916,20 @@ void tcp_timewait_kill(struct tcp_tw_bucket *tw) { - /* Unlink from various places. */ + struct tcp_bind_bucket *tb = tw->tb; + + /* Disassociate with bind bucket. */ if(tw->bind_next) tw->bind_next->bind_pprev = tw->bind_pprev; *(tw->bind_pprev) = tw->bind_next; - if(tw->tb->owners == NULL) - tcp_inc_slow_timer(TCP_SLT_BUCKETGC); + if (tb->owners == NULL) { + if (tb->next) + tb->next->pprev = tb->pprev; + *(tb->pprev) = tb->next; + kmem_cache_free(tcp_bucket_cachep, tb); + } + /* Unlink from established hashes. */ if(tw->next) tw->next->pprev = tw->pprev; *tw->pprev = tw->next; @@ -1023,6 +1030,7 @@ sk->bind_next->bind_pprev = &tw->bind_next; tw->bind_pprev = sk->bind_pprev; *sk->bind_pprev = (struct sock *)tw; + sk->prev = NULL; /* Step 3: Same for the protocol sklist. */ (tw->sklist_next = sk->sklist_next)->sklist_prev = (struct sock *)tw; diff -urN linux-2.2.10-ac5/net/ipv4/tcp_ipv4.c linux-2.2.10-ac6/net/ipv4/tcp_ipv4.c --- linux-2.2.10-ac5/net/ipv4/tcp_ipv4.c Tue Jul 13 00:33:23 1999 +++ linux-2.2.10-ac6/net/ipv4/tcp_ipv4.c Sat Jul 17 01:18:31 1999 @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_ipv4.c,v 1.175.2.2 1999/06/02 04:06:15 davem Exp $ + * Version: $Id: tcp_ipv4.c,v 1.175.2.5 1999/06/30 09:27:00 davem Exp $ * * IPv4 specific functions * @@ -130,27 +130,9 @@ return tcp_hashfn(laddr, lport, faddr, fport); } -/* Invariant, sk->num is non-zero. */ -void tcp_bucket_unlock(struct sock *sk) -{ - struct tcp_bind_bucket *tb; - unsigned short snum = sk->num; - - SOCKHASH_LOCK(); - for(tb = tcp_bound_hash[tcp_bhashfn(snum)]; tb; tb = tb->next) { - if(tb->port == snum) { - if(tb->owners == NULL && - (tb->flags & TCPB_FLAG_LOCKED)) { - tb->flags &= ~(TCPB_FLAG_LOCKED | - TCPB_FLAG_FASTREUSE); - tcp_inc_slow_timer(TCP_SLT_BUCKETGC); - } - break; - } - } - SOCKHASH_UNLOCK(); -} - +/* Allocate and initialize a new TCP local port bind bucket. + * Always runs inside the socket hashing lock. + */ struct tcp_bind_bucket *tcp_bucket_create(unsigned short snum) { struct tcp_bind_bucket *tb; @@ -160,7 +142,7 @@ struct tcp_bind_bucket **head = &tcp_bound_hash[tcp_bhashfn(snum)]; tb->port = snum; - tb->flags = TCPB_FLAG_LOCKED; + tb->fastreuse = 0; tb->owners = NULL; if((tb->next = *head) != NULL) tb->next->pprev = &tb->next; @@ -172,139 +154,184 @@ #ifdef CONFIG_IP_TRANSPARENT_PROXY /* Ensure that the bound bucket for the port exists. - * Return 0 on success. + * Return 0 and bump bucket reference count on success. + * + * Must run in a BH atomic section. */ -static __inline__ int tcp_bucket_check(unsigned short snum) +static __inline__ int __tcp_bucket_check(unsigned short snum) { - struct tcp_bind_bucket *tb = tcp_bound_hash[tcp_bhashfn(snum)]; + struct tcp_bind_bucket *tb; + + tb = tcp_bound_hash[tcp_bhashfn(snum)]; for( ; (tb && (tb->port != snum)); tb = tb->next) ; - if(tb == NULL && tcp_bucket_create(snum) == NULL) - return 1; - else - return 0; + if (tb == NULL) { + if ((tb = tcp_bucket_create(snum)) == NULL) { + SOCKHASH_UNLOCK(); + return 1; + } + } + + return 0; } #endif -static int tcp_v4_verify_bind(struct sock *sk, unsigned short snum) +static __inline__ void __tcp_inherit_port(struct sock *sk, struct sock *child) +{ + struct tcp_bind_bucket *tb = (struct tcp_bind_bucket *)sk->prev; + + if ((child->bind_next = tb->owners) != NULL) + tb->owners->bind_pprev = &child->bind_next; + tb->owners = child; + child->bind_pprev = &tb->owners; + child->prev = (struct sock *) tb; +} + +__inline__ void tcp_inherit_port(struct sock *sk, struct sock *child) +{ + SOCKHASH_LOCK(); + __tcp_inherit_port(sk, child); + SOCKHASH_UNLOCK(); +} + +/* Obtain a reference to a local port for the given sock, + * if snum is zero it means select any available local port. + */ +static int tcp_v4_get_port(struct sock *sk, unsigned short snum) { struct tcp_bind_bucket *tb; - int result = 0; SOCKHASH_LOCK(); - for(tb = tcp_bound_hash[tcp_bhashfn(snum)]; - (tb && (tb->port != snum)); - tb = tb->next) - ; - if(tb && tb->owners) { - /* Fast path for reuse ports, see include/net/tcp.h for a very - * detailed description of why this works, and why it is worth - * the effort at all. -DaveM - */ - if((tb->flags & TCPB_FLAG_FASTREUSE) && - (sk->reuse != 0)) { - goto go_like_smoke; + if (snum == 0) { + int rover = tcp_port_rover; + int low = sysctl_local_port_range[0]; + int high = sysctl_local_port_range[1]; + int remaining = (high - low) + 1; + + do { rover++; + if ((rover < low) || (rover > high)) + rover = low; + tb = tcp_bound_hash[tcp_bhashfn(rover)]; + for ( ; tb; tb = tb->next) + if (tb->port == rover) + goto next; + break; + next: + } while (--remaining > 0); + tcp_port_rover = rover; + + /* Exhausted local port range during search? */ + if (remaining <= 0) + goto fail; + + /* OK, here is the one we will use. */ + snum = rover; + tb = NULL; + } else { + for (tb = tcp_bound_hash[tcp_bhashfn(snum)]; + tb != NULL; + tb = tb->next) + if (tb->port == snum) + break; + } + if (tb != NULL && tb->owners != NULL) { + if (tb->fastreuse != 0 && sk->reuse != 0) { + goto success; } else { - struct sock *sk2; + struct sock *sk2 = tb->owners; int sk_reuse = sk->reuse; - /* We must walk the whole port owner list in this case. -DaveM */ - for(sk2 = tb->owners; sk2; sk2 = sk2->bind_next) { + for( ; sk2 != NULL; sk2 = sk2->bind_next) { if (sk->bound_dev_if == sk2->bound_dev_if) { - if(!sk_reuse || !sk2->reuse || sk2->state == TCP_LISTEN) { - if(!sk2->rcv_saddr || - !sk->rcv_saddr || - (sk2->rcv_saddr == sk->rcv_saddr)) + if (!sk_reuse || + !sk2->reuse || + sk2->state == TCP_LISTEN) { + if (!sk2->rcv_saddr || + !sk->rcv_saddr || + (sk2->rcv_saddr == sk->rcv_saddr)) break; } } } - if(sk2 != NULL) - result = 1; - } - } - if(result == 0) { - if(tb == NULL) { - if((tb = tcp_bucket_create(snum)) == NULL) - result = 1; - else if (sk->reuse && sk->state != TCP_LISTEN) - tb->flags |= TCPB_FLAG_FASTREUSE; - } else { - /* It could be pending garbage collection, this - * kills the race and prevents it from disappearing - * out from under us by the time we use it. -DaveM - */ - if(tb->owners == NULL) { - if (!(tb->flags & TCPB_FLAG_LOCKED)) { - tb->flags = (TCPB_FLAG_LOCKED | - ((sk->reuse && - sk->state != TCP_LISTEN) ? - TCPB_FLAG_FASTREUSE : 0)); - tcp_dec_slow_timer(TCP_SLT_BUCKETGC); - } else if (!(tb->flags & TCPB_FLAG_GOODSOCKNUM)) { - /* Someone is in between the bind - * and the actual connect or listen. - * See if it was a legitimate reuse - * and we are as well, else punt. - */ - if (sk->reuse == 0 || - !(tb->flags & TCPB_FLAG_FASTREUSE)) - result = 1; - } else - tb->flags &= ~TCPB_FLAG_GOODSOCKNUM; - } + /* If we found a conflict, fail. */ + if (sk2 != NULL) + goto fail; } } -go_like_smoke: + if (tb == NULL && + (tb = tcp_bucket_create(snum)) == NULL) + goto fail; + if (tb->owners == NULL) { + if (sk->reuse && sk->state != TCP_LISTEN) + tb->fastreuse = 1; + else + tb->fastreuse = 0; + } else if (tb->fastreuse && + ((sk->reuse == 0) || (sk->state == TCP_LISTEN))) + tb->fastreuse = 0; +success: + sk->num = snum; + if ((sk->bind_next = tb->owners) != NULL) + tb->owners->bind_pprev = &sk->bind_next; + tb->owners = sk; + sk->bind_pprev = &tb->owners; + sk->prev = (struct sock *) tb; + SOCKHASH_UNLOCK(); - return result; + return 0; + +fail: + SOCKHASH_UNLOCK(); + return 1; } -unsigned short tcp_good_socknum(void) +/* Get rid of any references to a local port held by the + * given sock. + */ +__inline__ void __tcp_put_port(struct sock *sk) { struct tcp_bind_bucket *tb; - int low = sysctl_local_port_range[0]; - int high = sysctl_local_port_range[1]; - int remaining = (high - low) + 1; - int rover; + tb = (struct tcp_bind_bucket *) sk->prev; + if (sk->bind_next) + sk->bind_next->bind_pprev = sk->bind_pprev; + *(sk->bind_pprev) = sk->bind_next; + sk->prev = NULL; + if (tb->owners == NULL) { + if (tb->next) + tb->next->pprev = tb->pprev; + *(tb->pprev) = tb->next; + kmem_cache_free(tcp_bucket_cachep, tb); + } +} + +void tcp_put_port(struct sock *sk) +{ SOCKHASH_LOCK(); - rover = tcp_port_rover; - do { - rover += 1; - if((rover < low) || (rover > high)) - rover = low; - tb = tcp_bound_hash[tcp_bhashfn(rover)]; - for( ; tb; tb = tb->next) { - if(tb->port == rover) - goto next; - } - break; - next: - } while(--remaining > 0); - tcp_port_rover = rover; - tb = NULL; - if((remaining <= 0) || ((tb = tcp_bucket_create(rover)) == NULL)) - rover = 0; - if (tb != NULL) - tb->flags |= TCPB_FLAG_GOODSOCKNUM; + __tcp_put_port(sk); SOCKHASH_UNLOCK(); +} + +static __inline__ void __tcp_v4_hash(struct sock *sk) +{ + struct sock **skp; - return rover; + if(sk->state == TCP_LISTEN) + skp = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)]; + else + skp = &tcp_established_hash[(sk->hashent = tcp_sk_hashfn(sk))]; + + if((sk->next = *skp) != NULL) + (*skp)->pprev = &sk->next; + *skp = sk; + sk->pprev = skp; } static void tcp_v4_hash(struct sock *sk) { if (sk->state != TCP_CLOSE) { - struct sock **skp; - SOCKHASH_LOCK(); - skp = &tcp_established_hash[(sk->hashent = tcp_sk_hashfn(sk))]; - if((sk->next = *skp) != NULL) - (*skp)->pprev = &sk->next; - *skp = sk; - sk->pprev = skp; - tcp_sk_bindify(sk); + __tcp_v4_hash(sk); SOCKHASH_UNLOCK(); } } @@ -318,38 +345,7 @@ *sk->pprev = sk->next; sk->pprev = NULL; tcp_reg_zap(sk); - tcp_sk_unbindify(sk); - } - SOCKHASH_UNLOCK(); -} - -static void tcp_v4_rehash(struct sock *sk) -{ - unsigned char state; - - SOCKHASH_LOCK(); - state = sk->state; - if(sk->pprev != NULL) { - if(sk->next) - sk->next->pprev = sk->pprev; - *sk->pprev = sk->next; - sk->pprev = NULL; - tcp_reg_zap(sk); - } - if(state != TCP_CLOSE) { - struct sock **skp; - - if(state == TCP_LISTEN) - skp = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)]; - else - skp = &tcp_established_hash[(sk->hashent = tcp_sk_hashfn(sk))]; - - if((sk->next = *skp) != NULL) - (*skp)->pprev = &sk->next; - *skp = sk; - sk->pprev = skp; - if(state == TCP_LISTEN) - tcp_sk_bindify(sk); + __tcp_put_port(sk); } SOCKHASH_UNLOCK(); } @@ -1468,7 +1464,7 @@ * later will require to destroy just created newsk in the case of fail. * 1998/04/22 Andrey V. Savochkin */ - if (tcp_bucket_check(ntohs(skb->h.th->dest))) + if (__tcp_bucket_check(ntohs(skb->h.th->dest))) goto exit; #endif @@ -1503,8 +1499,13 @@ if (newsk->sndbuf < (3 * newtp->pmtu_cookie)) newsk->sndbuf = min ((3 * newtp->pmtu_cookie), sysctl_wmem_max); - tcp_v4_hash(newsk); - add_to_prot_sklist(newsk); + /* We run in BH processing itself or within a BH atomic + * sequence (backlog) so no locking is needed. + */ + __tcp_v4_hash(newsk); + __tcp_inherit_port(sk, newsk); + __add_to_prot_sklist(newsk); + sk->data_ready(sk, 0); /* Deliver SIGIO */ return newsk; @@ -1727,6 +1728,25 @@ goto discard_it; } +static void __tcp_v4_rehash(struct sock *sk) +{ + struct sock **skp = &tcp_established_hash[(sk->hashent = tcp_sk_hashfn(sk))]; + + SOCKHASH_LOCK(); + if(sk->pprev) { + if(sk->next) + sk->next->pprev = sk->pprev; + *sk->pprev = sk->next; + sk->pprev = NULL; + tcp_reg_zap(sk); + } + if((sk->next = *skp) != NULL) + (*skp)->pprev = &sk->next; + *skp = sk; + sk->pprev = skp; + SOCKHASH_UNLOCK(); +} + int tcp_v4_rebuild_header(struct sock *sk) { struct rtable *rt = (struct rtable *)sk->dst_cache; @@ -1800,7 +1820,12 @@ sk->saddr = new_saddr; sk->rcv_saddr = new_saddr; - tcp_v4_rehash(sk); + + /* XXX The only one ugly spot where we need to + * XXX really change the sockets identity after + * XXX it has entered the hashes. -DaveM + */ + __tcp_v4_rehash(sk); } return 0; @@ -1895,13 +1920,11 @@ while((skb = __skb_dequeue(&tp->out_of_order_queue)) != NULL) kfree_skb(skb); - /* Clean up a locked TCP bind bucket, this only happens if a + /* Clean up a referenced TCP bind bucket, this only happens if a * port is allocated for a socket, but it never fully connects. - * In which case we will find num to be non-zero and daddr to - * be zero. */ - if(sk->daddr == 0 && sk->num != 0) - tcp_bucket_unlock(sk); + if(sk->prev != NULL) + tcp_put_port(sk); return 0; } @@ -1928,9 +1951,7 @@ tcp_v4_do_rcv, /* backlog_rcv */ tcp_v4_hash, /* hash */ tcp_v4_unhash, /* unhash */ - tcp_v4_rehash, /* rehash */ - tcp_good_socknum, /* good_socknum */ - tcp_v4_verify_bind, /* verify_bind */ + tcp_v4_get_port, /* get_port */ 128, /* max_header */ 0, /* retransmits */ "TCP", /* name */ diff -urN linux-2.2.10-ac5/net/ipv4/tcp_timer.c linux-2.2.10-ac6/net/ipv4/tcp_timer.c --- linux-2.2.10-ac5/net/ipv4/tcp_timer.c Tue Jul 13 00:33:23 1999 +++ linux-2.2.10-ac6/net/ipv4/tcp_timer.c Sat Jul 17 01:18:31 1999 @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_timer.c,v 1.62.2.2 1999/06/02 04:06:21 davem Exp $ + * Version: $Id: tcp_timer.c,v 1.62.2.3 1999/06/20 20:14:30 davem Exp $ * * Authors: Ross Biro, * Fred N. van Kempen, @@ -31,7 +31,6 @@ static void tcp_sltimer_handler(unsigned long); static void tcp_syn_recv_timer(unsigned long); static void tcp_keepalive(unsigned long data); -static void tcp_bucketgc(unsigned long); static void tcp_twkill(unsigned long); struct timer_list tcp_slow_timer = { @@ -44,8 +43,7 @@ struct tcp_sl_timer tcp_slt_array[TCP_SLT_MAX] = { {ATOMIC_INIT(0), TCP_SYNACK_PERIOD, 0, tcp_syn_recv_timer},/* SYNACK */ {ATOMIC_INIT(0), TCP_KEEPALIVE_PERIOD, 0, tcp_keepalive}, /* KEEPALIVE */ - {ATOMIC_INIT(0), TCP_TWKILL_PERIOD, 0, tcp_twkill}, /* TWKILL */ - {ATOMIC_INIT(0), TCP_BUCKETGC_PERIOD, 0, tcp_bucketgc} /* BUCKETGC */ + {ATOMIC_INIT(0), TCP_TWKILL_PERIOD, 0, tcp_twkill} /* TWKILL */ }; const char timer_bug_msg[] = KERN_DEBUG "tcpbug: unknown timer value\n"; @@ -246,40 +244,6 @@ } } return res; -} - -/* Garbage collect TCP bind buckets. */ -static void tcp_bucketgc(unsigned long data) -{ - int i, reaped = 0;; - - for(i = 0; i < TCP_BHTABLE_SIZE; i++) { - struct tcp_bind_bucket *tb = tcp_bound_hash[i]; - - while(tb) { - struct tcp_bind_bucket *next = tb->next; - - if((tb->owners == NULL) && - !(tb->flags & TCPB_FLAG_LOCKED)) { - reaped++; - - /* Unlink bucket. */ - if(tb->next) - tb->next->pprev = tb->pprev; - *tb->pprev = tb->next; - - /* Finally, free it up. */ - kmem_cache_free(tcp_bucket_cachep, tb); - } - tb = next; - } - } - if(reaped != 0) { - struct tcp_sl_timer *slt = (struct tcp_sl_timer *)data; - - /* Eat timer references. */ - atomic_sub(reaped, &slt->count); - } } /* Kill off TIME_WAIT sockets once their lifetime has expired. */ diff -urN linux-2.2.10-ac5/net/ipv4/udp.c linux-2.2.10-ac6/net/ipv4/udp.c --- linux-2.2.10-ac5/net/ipv4/udp.c Sat Jul 17 01:17:56 1999 +++ linux-2.2.10-ac6/net/ipv4/udp.c Sat Jul 17 01:18:31 1999 @@ -5,7 +5,7 @@ * * The User Datagram Protocol (UDP). * - * Version: $Id: udp.c,v 1.66 1999/05/08 20:00:25 davem Exp $ + * Version: $Id: udp.c,v 1.66.2.1 1999/06/20 20:14:48 davem Exp $ * * Authors: Ross Biro, * Fred N. van Kempen, @@ -123,108 +123,75 @@ struct sock *udp_hash[UDP_HTABLE_SIZE]; -static int udp_v4_verify_bind(struct sock *sk, unsigned short snum) -{ - struct sock *sk2; - int retval = 0, sk_reuse = sk->reuse; +/* Shared by v4/v6 udp. */ +int udp_port_rover = 0; +static int udp_v4_get_port(struct sock *sk, unsigned short snum) +{ SOCKHASH_LOCK(); - for(sk2 = udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; sk2 != NULL; sk2 = sk2->next) { - if((sk2->num == snum) && (sk2 != sk)) { - unsigned char state = sk2->state; - int sk2_reuse = sk2->reuse; - - /* Two sockets can be bound to the same port if they're - * bound to different interfaces. - */ - - if(sk2->bound_dev_if != sk->bound_dev_if) - continue; + if (snum == 0) { + int best_size_so_far, best, result, i; - if(!sk2->rcv_saddr || !sk->rcv_saddr) { - if((!sk2_reuse) || - (!sk_reuse) || - (state == TCP_LISTEN)) { - retval = 1; - break; - } - } else if(sk2->rcv_saddr == sk->rcv_saddr) { - if((!sk_reuse) || - (!sk2_reuse) || - (state == TCP_LISTEN)) { - retval = 1; - break; - } + if (udp_port_rover > sysctl_local_port_range[1] || + udp_port_rover < sysctl_local_port_range[0]) + udp_port_rover = sysctl_local_port_range[0]; + best_size_so_far = 32767; + best = result = udp_port_rover; + for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { + struct sock *sk; + int size; + + sk = udp_hash[result & (UDP_HTABLE_SIZE - 1)]; + if (!sk) { + if (result > sysctl_local_port_range[1]) + result = sysctl_local_port_range[0] + + ((result - sysctl_local_port_range[0]) & + (UDP_HTABLE_SIZE - 1)); + goto gotit; } + size = 0; + do { + if (++size >= best_size_so_far) + goto next; + } while ((sk = sk->next) != NULL); + best_size_so_far = size; + best = result; + next: } - } - SOCKHASH_UNLOCK(); - return retval; -} - -static inline int udp_lport_inuse(u16 num) -{ - struct sock *sk = udp_hash[num & (UDP_HTABLE_SIZE - 1)]; + result = best; + for(;; result += UDP_HTABLE_SIZE) { + if (result > sysctl_local_port_range[1]) + result = sysctl_local_port_range[0] + + ((result - sysctl_local_port_range[0]) & + (UDP_HTABLE_SIZE - 1)); + if (!udp_lport_inuse(result)) + break; + } +gotit: + udp_port_rover = snum = result; + } else { + struct sock *sk2; - for(; sk != NULL; sk = sk->next) { - if(sk->num == num) - return 1; + for (sk2 = udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; + sk2 != NULL; + sk2 = sk2->next) { + if (sk2->num == snum && + sk2 != sk && + sk2->bound_dev_if == sk->bound_dev_if && + (!sk2->rcv_saddr || + !sk->rcv_saddr || + sk2->rcv_saddr == sk->rcv_saddr) && + (!sk2->reuse || !sk->reuse)) + goto fail; + } } + sk->num = snum; + SOCKHASH_UNLOCK(); return 0; -} - -/* Shared by v4/v6 tcp. */ -unsigned short udp_good_socknum(void) -{ - int result; - static int start = 0; - int i, best, best_size_so_far; - - SOCKHASH_LOCK(); - if (start > sysctl_local_port_range[1] || start < sysctl_local_port_range[0]) - start = sysctl_local_port_range[0]; - - best_size_so_far = 32767; /* "big" num */ - best = result = start; - - for(i = 0; i < UDP_HTABLE_SIZE; i++, result++) { - struct sock *sk; - int size; - - sk = udp_hash[result & (UDP_HTABLE_SIZE - 1)]; - - if(!sk) { - if (result > sysctl_local_port_range[1]) - result = sysctl_local_port_range[0] - + ((result - sysctl_local_port_range[0]) & (UDP_HTABLE_SIZE - 1)); - goto out; - } - /* Is this one better than our best so far? */ - size = 0; - do { - if(++size >= best_size_so_far) - goto next; - } while((sk = sk->next) != NULL); - best_size_so_far = size; - best = result; - next: - } - - result = best; - - for(;; result += UDP_HTABLE_SIZE) { - /* Get into range (but preserve hash bin)... */ - if (result > sysctl_local_port_range[1]) - result = sysctl_local_port_range[0] - + ((result - sysctl_local_port_range[0]) & (UDP_HTABLE_SIZE - 1)); - if (!udp_lport_inuse(result)) - break; - } -out: - start = result; +fail: SOCKHASH_UNLOCK(); - return result; + return 1; } /* Last hit UDP socket cache, this is ipv4 specific so make it static. */ @@ -234,62 +201,27 @@ static void udp_v4_hash(struct sock *sk) { - struct sock **skp; - int num = sk->num; - - num &= (UDP_HTABLE_SIZE - 1); - skp = &udp_hash[num]; + struct sock **skp = &udp_hash[sk->num & (UDP_HTABLE_SIZE - 1)]; SOCKHASH_LOCK(); - sk->next = *skp; + if ((sk->next = *skp) != NULL) + (*skp)->pprev = &sk->next; *skp = sk; - sk->hashent = num; + sk->pprev = skp; SOCKHASH_UNLOCK(); } static void udp_v4_unhash(struct sock *sk) { - struct sock **skp; - int num = sk->num; - - num &= (UDP_HTABLE_SIZE - 1); - skp = &udp_hash[num]; - SOCKHASH_LOCK(); - while(*skp != NULL) { - if(*skp == sk) { - *skp = sk->next; - break; - } - skp = &((*skp)->next); - } - if(uh_cache_sk == sk) - uh_cache_sk = NULL; - SOCKHASH_UNLOCK(); -} - -static void udp_v4_rehash(struct sock *sk) -{ - struct sock **skp; - int num = sk->num; - int oldnum = sk->hashent; - - num &= (UDP_HTABLE_SIZE - 1); - skp = &udp_hash[oldnum]; - - SOCKHASH_LOCK(); - while(*skp != NULL) { - if(*skp == sk) { - *skp = sk->next; - break; - } - skp = &((*skp)->next); + if (sk->pprev) { + if (sk->next) + sk->next->pprev = sk->pprev; + *sk->pprev = sk->next; + sk->pprev = NULL; + if(uh_cache_sk == sk) + uh_cache_sk = NULL; } - sk->next = udp_hash[num]; - udp_hash[num] = sk; - sk->hashent = num; - if(uh_cache_sk == sk) - uh_cache_sk = NULL; SOCKHASH_UNLOCK(); } @@ -810,7 +742,6 @@ { unsigned long amount; - if (sk->state == TCP_LISTEN) return(-EINVAL); amount = sock_wspace(sk); return put_user(amount, (int *)arg); } @@ -820,8 +751,6 @@ struct sk_buff *skb; unsigned long amount; - if (sk->state == TCP_LISTEN) - return(-EINVAL); amount = 0; /* N.B. Is this interrupt safe?? -> Yes. Interrupts do not remove skbs. --ANK (980725) @@ -1251,9 +1180,7 @@ udp_queue_rcv_skb, /* backlog_rcv */ udp_v4_hash, /* hash */ udp_v4_unhash, /* unhash */ - udp_v4_rehash, /* rehash */ - udp_good_socknum, /* good_socknum */ - udp_v4_verify_bind, /* verify_bind */ + udp_v4_get_port, /* good_socknum */ 128, /* max_header */ 0, /* retransmits */ "UDP", /* name */ diff -urN linux-2.2.10-ac5/net/ipv6/af_inet6.c linux-2.2.10-ac6/net/ipv6/af_inet6.c --- linux-2.2.10-ac5/net/ipv6/af_inet6.c Tue Jul 13 00:33:05 1999 +++ linux-2.2.10-ac6/net/ipv6/af_inet6.c Sat Jul 17 01:18:31 1999 @@ -7,7 +7,7 @@ * * Adapted from linux/net/ipv4/af_inet.c * - * $Id: af_inet6.c,v 1.43 1999/04/22 10:07:39 davem Exp $ + * $Id: af_inet6.c,v 1.43.2.1 1999/06/20 20:15:06 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -236,20 +236,17 @@ sizeof(struct in6_addr)); snum = ntohs(addr->sin6_port); - if (snum == 0) - snum = sk->prot->good_socknum(); - if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) + if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) return(-EACCES); /* Make sure we are allowed to bind here. */ - if(sk->prot->verify_bind(sk, snum)) + if(sk->prot->get_port(sk, snum) != 0) return -EADDRINUSE; - sk->num = snum; sk->sport = ntohs(sk->num); sk->dport = 0; sk->daddr = 0; - sk->prot->rehash(sk); + sk->prot->hash(sk); add_to_prot_sklist(sk); return(0); diff -urN linux-2.2.10-ac5/net/ipv6/raw.c linux-2.2.10-ac6/net/ipv6/raw.c --- linux-2.2.10-ac5/net/ipv6/raw.c Tue Jul 13 00:33:05 1999 +++ linux-2.2.10-ac6/net/ipv6/raw.c Sat Jul 17 01:18:31 1999 @@ -7,7 +7,7 @@ * * Adapted from linux/net/ipv4/raw.c * - * $Id: raw.c,v 1.24 1999/04/22 10:07:45 davem Exp $ + * $Id: raw.c,v 1.24.2.1 1999/06/20 20:14:58 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -45,57 +45,25 @@ static void raw_v6_hash(struct sock *sk) { - struct sock **skp; - int num = sk->num; + struct sock **skp = &raw_v6_htable[sk->num & (RAWV6_HTABLE_SIZE - 1)]; - num &= (RAWV6_HTABLE_SIZE - 1); - skp = &raw_v6_htable[num]; SOCKHASH_LOCK(); - sk->next = *skp; + if ((sk->next = *skp) != NULL) + (*skp)->pprev = &sk->next; *skp = sk; - sk->hashent = num; + sk->pprev = skp; SOCKHASH_UNLOCK(); } static void raw_v6_unhash(struct sock *sk) { - struct sock **skp; - int num = sk->num; - - num &= (RAWV6_HTABLE_SIZE - 1); - skp = &raw_v6_htable[num]; - - SOCKHASH_LOCK(); - while(*skp != NULL) { - if(*skp == sk) { - *skp = sk->next; - break; - } - skp = &((*skp)->next); - } - SOCKHASH_UNLOCK(); -} - -static void raw_v6_rehash(struct sock *sk) -{ - struct sock **skp; - int num = sk->num; - int oldnum = sk->hashent; - - num &= (RAWV6_HTABLE_SIZE - 1); - skp = &raw_v6_htable[oldnum]; - SOCKHASH_LOCK(); - while(*skp != NULL) { - if(*skp == sk) { - *skp = sk->next; - break; - } - skp = &((*skp)->next); + if (sk->pprev) { + if (sk->next) + sk->next->pprev = sk->pprev; + *sk->pprev = sk->next; + sk->pprev = NULL; } - sk->next = raw_v6_htable[num]; - raw_v6_htable[num] = sk; - sk->hashent = num; SOCKHASH_UNLOCK(); } @@ -667,9 +635,7 @@ rawv6_rcv_skb, /* backlog_rcv */ raw_v6_hash, /* hash */ raw_v6_unhash, /* unhash */ - raw_v6_rehash, /* rehash */ - NULL, /* good_socknum */ - NULL, /* verify_bind */ + NULL, /* get_port */ 128, /* max_header */ 0, /* retransmits */ "RAW", /* name */ diff -urN linux-2.2.10-ac5/net/ipv6/tcp_ipv6.c linux-2.2.10-ac6/net/ipv6/tcp_ipv6.c --- linux-2.2.10-ac5/net/ipv6/tcp_ipv6.c Tue Jul 13 00:33:24 1999 +++ linux-2.2.10-ac6/net/ipv6/tcp_ipv6.c Sat Jul 17 01:18:31 1999 @@ -5,7 +5,7 @@ * Authors: * Pedro Roque * - * $Id: tcp_ipv6.c,v 1.104.2.2 1999/06/02 04:06:27 davem Exp $ + * $Id: tcp_ipv6.c,v 1.104.2.4 1999/06/30 09:27:12 davem Exp $ * * Based on: * linux/net/ipv4/tcp.c @@ -84,101 +84,120 @@ * But it doesn't matter, the recalculation is in the rarest path * this function ever takes. */ -static int tcp_v6_verify_bind(struct sock *sk, unsigned short snum) +static int tcp_v6_get_port(struct sock *sk, unsigned short snum) { struct tcp_bind_bucket *tb; - int result = 0; SOCKHASH_LOCK(); - for(tb = tcp_bound_hash[tcp_bhashfn(snum)]; - (tb && (tb->port != snum)); - tb = tb->next) - ; - if(tb && tb->owners) { - /* Fast path for reuse ports, see include/net/tcp.h for a very - * detailed description of why this works, and why it is worth - * the effort at all. -DaveM - */ - if((tb->flags & TCPB_FLAG_FASTREUSE) && - (sk->reuse != 0)) { - goto go_like_smoke; + if (snum == 0) { + int rover = tcp_port_rover; + int low = sysctl_local_port_range[0]; + int high = sysctl_local_port_range[1]; + int remaining = (high - low) + 1; + + do { rover++; + if ((rover < low) || (rover > high)) + rover = low; + tb = tcp_bound_hash[tcp_bhashfn(rover)]; + for ( ; tb; tb = tb->next) + if (tb->port == rover) + goto next; + break; + next: + } while (--remaining > 0); + tcp_port_rover = rover; + + /* Exhausted local port range during search? */ + if (remaining <= 0) + goto fail; + + /* OK, here is the one we will use. */ + snum = rover; + tb = NULL; + } else { + for (tb = tcp_bound_hash[tcp_bhashfn(snum)]; + tb != NULL; + tb = tb->next) + if (tb->port == snum) + break; + } + if (tb != NULL && tb->owners != NULL) { + if (tb->fastreuse != 0 && sk->reuse != 0) { + goto success; } else { - struct sock *sk2; + struct sock *sk2 = tb->owners; int sk_reuse = sk->reuse; int addr_type = ipv6_addr_type(&sk->net_pinfo.af_inet6.rcv_saddr); - /* We must walk the whole port owner list in this case. -DaveM */ - for(sk2 = tb->owners; sk2; sk2 = sk2->bind_next) { - if(sk->bound_dev_if == sk2->bound_dev_if) { - if(!sk_reuse || !sk2->reuse || sk2->state == TCP_LISTEN) { - if(addr_type == IPV6_ADDR_ANY || - !sk2->rcv_saddr || - !ipv6_addr_cmp(&sk->net_pinfo.af_inet6.rcv_saddr, - &sk2->net_pinfo.af_inet6.rcv_saddr)) + for( ; sk2 != NULL; sk2 = sk2->bind_next) { + if (sk->bound_dev_if == sk2->bound_dev_if) { + if (!sk_reuse || + !sk2->reuse || + sk2->state == TCP_LISTEN) { + if (!sk2->rcv_saddr || + !addr_type == IPV6_ADDR_ANY || + !ipv6_addr_cmp(&sk->net_pinfo.af_inet6.rcv_saddr, + &sk2->net_pinfo.af_inet6.rcv_saddr)) break; } } } - if(sk2 != NULL) - result = 1; + /* If we found a conflict, fail. */ + if (sk2 != NULL) + goto fail; } } - if(result == 0) { - if(tb == NULL) { - if((tb = tcp_bucket_create(snum)) == NULL) - result = 1; - else if (sk->reuse && sk->state != TCP_LISTEN) - tb->flags |= TCPB_FLAG_FASTREUSE; - } else { - /* It could be pending garbage collection, this - * kills the race and prevents it from disappearing - * out from under us by the time we use it. -DaveM - */ - if(tb->owners == NULL) { - if (!(tb->flags & TCPB_FLAG_LOCKED)) { - tb->flags = (TCPB_FLAG_LOCKED | - ((sk->reuse && - sk->state != TCP_LISTEN) ? - TCPB_FLAG_FASTREUSE : 0)); - tcp_dec_slow_timer(TCP_SLT_BUCKETGC); - } else if (!(tb->flags & TCPB_FLAG_GOODSOCKNUM)) { - /* Someone is in between the bind - * and the actual connect or listen. - * See if it was a legitimate reuse - * and we are as well, else punt. - */ - if (sk->reuse == 0 || - !(tb->flags & TCPB_FLAG_FASTREUSE)) - result = 1; - } else - tb->flags &= ~TCPB_FLAG_GOODSOCKNUM; - } - } - } -go_like_smoke: + if (tb == NULL && + (tb = tcp_bucket_create(snum)) == NULL) + goto fail; + if (tb->owners == NULL) { + if (sk->reuse && sk->state != TCP_LISTEN) + tb->fastreuse = 1; + else + tb->fastreuse = 0; + } else if (tb->fastreuse && + ((sk->reuse == 0) || (sk->state == TCP_LISTEN))) + tb->fastreuse = 0; + +success: + sk->num = snum; + if ((sk->bind_next = tb->owners) != NULL) + tb->owners->bind_pprev = &sk->bind_next; + tb->owners = sk; + sk->bind_pprev = &tb->owners; + sk->prev = (struct sock *) tb; + SOCKHASH_UNLOCK(); - return result; + return 0; + +fail: + SOCKHASH_UNLOCK(); + return 1; } static void tcp_v6_hash(struct sock *sk) { - /* Well, I know that it is ugly... - All this ->prot, ->af_specific etc. need LARGE cleanup --ANK - */ - if (sk->tp_pinfo.af_tcp.af_specific == &ipv6_mapped) { - tcp_prot.hash(sk); - return; - } if(sk->state != TCP_CLOSE) { struct sock **skp; + /* Well, I know that it is ugly... + * All this ->prot, ->af_specific etc. need LARGE cleanup --ANK + */ + if (sk->tp_pinfo.af_tcp.af_specific == &ipv6_mapped) { + tcp_prot.hash(sk); + return; + } + + if(sk->state == TCP_LISTEN) + skp = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)]; + else + skp = &tcp_established_hash[(sk->hashent = tcp_v6_sk_hashfn(sk))]; + SOCKHASH_LOCK(); - skp = &tcp_established_hash[(sk->hashent = tcp_v6_sk_hashfn(sk))]; if((sk->next = *skp) != NULL) (*skp)->pprev = &sk->next; *skp = sk; sk->pprev = skp; - tcp_sk_bindify(sk); SOCKHASH_UNLOCK(); } } @@ -191,39 +210,8 @@ sk->next->pprev = sk->pprev; *sk->pprev = sk->next; sk->pprev = NULL; - tcp_sk_unbindify(sk); tcp_reg_zap(sk); - } - SOCKHASH_UNLOCK(); -} - -static void tcp_v6_rehash(struct sock *sk) -{ - unsigned char state; - - SOCKHASH_LOCK(); - state = sk->state; - if(sk->pprev != NULL) { - if(sk->next) - sk->next->pprev = sk->pprev; - *sk->pprev = sk->next; - sk->pprev = NULL; - tcp_reg_zap(sk); - } - if(state != TCP_CLOSE) { - struct sock **skp; - - if(state == TCP_LISTEN) - skp = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)]; - else - skp = &tcp_established_hash[(sk->hashent = tcp_v6_sk_hashfn(sk))]; - - if((sk->next = *skp) != NULL) - (*skp)->pprev = &sk->next; - *skp = sk; - sk->pprev = skp; - if(state == TCP_LISTEN) - tcp_sk_bindify(sk); + __tcp_put_port(sk); } SOCKHASH_UNLOCK(); } @@ -1048,8 +1036,8 @@ newsk->rcv_saddr= LOOPBACK4_IPV6; newsk->prot->hash(newsk); + tcp_inherit_port(sk, newsk); add_to_prot_sklist(newsk); - sk->data_ready(sk, 0); /* Deliver SIGIO */ return newsk; @@ -1633,11 +1621,9 @@ /* Clean up a locked TCP bind bucket, this only happens if a * port is allocated for a socket, but it never fully connects. - * In which case we will find num to be non-zero and daddr to - * be zero. */ - if(ipv6_addr_any(&(sk->net_pinfo.af_inet6.daddr)) && sk->num != 0) - tcp_bucket_unlock(sk); + if(sk->prev != NULL) + tcp_put_port(sk); return inet6_destroy_sock(sk); } @@ -1664,9 +1650,7 @@ tcp_v6_do_rcv, /* backlog_rcv */ tcp_v6_hash, /* hash */ tcp_v6_unhash, /* unhash */ - tcp_v6_rehash, /* rehash */ - tcp_good_socknum, /* good_socknum */ - tcp_v6_verify_bind, /* verify_bind */ + tcp_v6_get_port, /* get_port */ 128, /* max_header */ 0, /* retransmits */ "TCPv6", /* name */ diff -urN linux-2.2.10-ac5/net/ipv6/udp.c linux-2.2.10-ac6/net/ipv6/udp.c --- linux-2.2.10-ac5/net/ipv6/udp.c Tue Jul 13 00:33:10 1999 +++ linux-2.2.10-ac6/net/ipv6/udp.c Sat Jul 17 01:18:31 1999 @@ -7,7 +7,7 @@ * * Based on linux/ipv4/udp.c * - * $Id: udp.c,v 1.40 1999/05/08 20:00:32 davem Exp $ + * $Id: udp.c,v 1.40.2.1 1999/06/20 20:14:55 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -49,104 +49,101 @@ /* Grrr, addr_type already calculated by caller, but I don't want * to add some silly "cookie" argument to this method just for that. */ -static int udp_v6_verify_bind(struct sock *sk, unsigned short snum) +static int udp_v6_get_port(struct sock *sk, unsigned short snum) { - struct sock *sk2; - int addr_type = ipv6_addr_type(&sk->net_pinfo.af_inet6.rcv_saddr); - int retval = 0, sk_reuse = sk->reuse; - SOCKHASH_LOCK(); - for(sk2 = udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; sk2 != NULL; sk2 = sk2->next) { - if((sk2->num == snum) && (sk2 != sk)) { - unsigned char state = sk2->state; - int sk2_reuse = sk2->reuse; - - /* Two sockets can be bound to the same port if they're - * bound to different interfaces. - */ + if (snum == 0) { + int best_size_so_far, best, result, i; - if(sk2->bound_dev_if != sk->bound_dev_if) - continue; - - if(addr_type == IPV6_ADDR_ANY || (!sk2->rcv_saddr)) { - if((!sk2_reuse) || - (!sk_reuse) || - (state == TCP_LISTEN)) { - retval = 1; - break; - } - } else if(!ipv6_addr_cmp(&sk->net_pinfo.af_inet6.rcv_saddr, - &sk2->net_pinfo.af_inet6.rcv_saddr)) { - if((!sk_reuse) || - (!sk2_reuse) || - (state == TCP_LISTEN)) { - retval = 1; - break; - } + if (udp_port_rover > sysctl_local_port_range[1] || + udp_port_rover < sysctl_local_port_range[0]) + udp_port_rover = sysctl_local_port_range[0]; + best_size_so_far = 32767; + best = result = udp_port_rover; + for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { + struct sock *sk; + int size; + + sk = udp_hash[result & (UDP_HTABLE_SIZE - 1)]; + if (!sk) { + if (result > sysctl_local_port_range[1]) + result = sysctl_local_port_range[0] + + ((result - sysctl_local_port_range[0]) & + (UDP_HTABLE_SIZE - 1)); + goto gotit; } + size = 0; + do { + if (++size >= best_size_so_far) + goto next; + } while ((sk = sk->next) != NULL); + best_size_so_far = size; + best = result; + next: + } + result = best; + for(;; result += UDP_HTABLE_SIZE) { + if (result > sysctl_local_port_range[1]) + result = sysctl_local_port_range[0] + + ((result - sysctl_local_port_range[0]) & + (UDP_HTABLE_SIZE - 1)); + if (!udp_lport_inuse(result)) + break; + } +gotit: + udp_port_rover = snum = result; + } else { + struct sock *sk2; + int addr_type = ipv6_addr_type(&sk->net_pinfo.af_inet6.rcv_saddr); + + for (sk2 = udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; + sk2 != NULL; + sk2 = sk2->next) { + if (sk2->num == snum && + sk2 != sk && + sk2->bound_dev_if == sk->bound_dev_if && + (!sk2->rcv_saddr || + addr_type == IPV6_ADDR_ANY || + !ipv6_addr_cmp(&sk->net_pinfo.af_inet6.rcv_saddr, + &sk2->net_pinfo.af_inet6.rcv_saddr)) && + (!sk2->reuse || !sk->reuse)) + goto fail; } } + + sk->num = snum; + SOCKHASH_UNLOCK(); + return 0; + +fail: SOCKHASH_UNLOCK(); - return retval; + return 1; } static void udp_v6_hash(struct sock *sk) { - struct sock **skp; - int num = sk->num; - - num &= (UDP_HTABLE_SIZE - 1); - skp = &udp_hash[num]; + struct sock **skp = &udp_hash[sk->num & (UDP_HTABLE_SIZE - 1)]; SOCKHASH_LOCK(); - sk->next = *skp; + if ((sk->next = *skp) != NULL) + (*skp)->pprev = &sk->next; *skp = sk; - sk->hashent = num; + sk->pprev = skp; SOCKHASH_UNLOCK(); } static void udp_v6_unhash(struct sock *sk) { - struct sock **skp; - int num = sk->num; - - num &= (UDP_HTABLE_SIZE - 1); - skp = &udp_hash[num]; - SOCKHASH_LOCK(); - while(*skp != NULL) { - if(*skp == sk) { - *skp = sk->next; - break; - } - skp = &((*skp)->next); + if (sk->pprev) { + if (sk->next) + sk->next->pprev = sk->pprev; + *sk->pprev = sk->next; + sk->pprev = NULL; } SOCKHASH_UNLOCK(); } -static void udp_v6_rehash(struct sock *sk) -{ - struct sock **skp; - int num = sk->num; - int oldnum = sk->hashent; - - num &= (UDP_HTABLE_SIZE - 1); - skp = &udp_hash[oldnum]; - - SOCKHASH_LOCK(); - while(*skp != NULL) { - if(*skp == sk) { - *skp = sk->next; - break; - } - skp = &((*skp)->next); - } - sk->next = udp_hash[num]; - udp_hash[num] = sk; - sk->hashent = num; - SOCKHASH_UNLOCK(); -} - static struct sock *udp_v6_lookup(struct in6_addr *saddr, u16 sport, struct in6_addr *daddr, u16 dport, int dif) { @@ -893,7 +890,6 @@ "UDPv6" /* name */ }; - struct proto udpv6_prot = { (struct sock *)&udpv6_prot, /* sklist_next */ (struct sock *)&udpv6_prot, /* sklist_prev */ @@ -916,9 +912,7 @@ udpv6_queue_rcv_skb, /* backlog_rcv */ udp_v6_hash, /* hash */ udp_v6_unhash, /* unhash */ - udp_v6_rehash, /* rehash */ - udp_good_socknum, /* good_socknum */ - udp_v6_verify_bind, /* verify_bind */ + udp_v6_get_port, /* get_port */ 128, /* max_header */ 0, /* retransmits */ "UDP", /* name */ diff -urN linux-2.2.10-ac5/net/netsyms.c linux-2.2.10-ac6/net/netsyms.c --- linux-2.2.10-ac5/net/netsyms.c Tue Jul 13 00:33:05 1999 +++ linux-2.2.10-ac6/net/netsyms.c Sat Jul 17 01:18:31 1999 @@ -60,6 +60,9 @@ #include extern int tcp_tw_death_row_slot; +extern int sysctl_local_port_range[2]; +extern int tcp_port_rover; +extern int udp_port_rover; #endif #endif @@ -278,11 +281,9 @@ EXPORT_SYMBOL(inet_recvmsg); /* Socket demultiplexing. */ -EXPORT_SYMBOL(tcp_good_socknum); EXPORT_SYMBOL(tcp_established_hash); EXPORT_SYMBOL(tcp_listening_hash); EXPORT_SYMBOL(tcp_bound_hash); -EXPORT_SYMBOL(udp_good_socknum); EXPORT_SYMBOL(udp_hash); EXPORT_SYMBOL(destroy_sock); @@ -323,7 +324,9 @@ EXPORT_SYMBOL(tcp_v4_conn_request); EXPORT_SYMBOL(tcp_create_openreq_child); EXPORT_SYMBOL(tcp_bucket_create); -EXPORT_SYMBOL(tcp_bucket_unlock); +EXPORT_SYMBOL(__tcp_put_port); +EXPORT_SYMBOL(tcp_put_port); +EXPORT_SYMBOL(tcp_inherit_port); EXPORT_SYMBOL(tcp_v4_syn_recv_sock); EXPORT_SYMBOL(tcp_v4_do_rcv); EXPORT_SYMBOL(tcp_v4_connect); @@ -339,6 +342,9 @@ EXPORT_SYMBOL(tcp_connect); EXPORT_SYMBOL(tcp_make_synack); EXPORT_SYMBOL(tcp_tw_death_row_slot); +EXPORT_SYMBOL(sysctl_local_port_range); +EXPORT_SYMBOL(tcp_port_rover); +EXPORT_SYMBOL(udp_port_rover); EXPORT_SYMBOL(tcp_sync_mss); EXPORT_SYMBOL(net_statistics);