# This is a BitKeeper generated patch for the following project: # Project Name: Linux kernel tree # This patch format is intended for GNU patch command version 2.5 or higher. # This patch includes the following deltas: # ChangeSet 1.1194.1.17 -> 1.1236 # drivers/net/8139too.c 1.42 -> 1.44 # drivers/net/3c59x.c 1.19 -> 1.20 # drivers/net/pci-skeleton.c 1.18 -> 1.19 # include/linux/skbuff.h 1.15 -> 1.16 # net/ipv6/ndisc.c 1.20 -> 1.21 # drivers/net/Config.in 1.60.1.1 -> 1.62 # drivers/net/amd8111e.c 1.2 -> 1.5 # drivers/net/e100/e100_main.c 1.36 -> 1.48 # drivers/net/e100/e100_test.c 1.7 -> 1.8 # drivers/net/wireless/airo.c 1.24 -> 1.25 # drivers/net/e1000/e1000_ethtool.c 1.15 -> 1.18 # drivers/net/tlan.c 1.9 -> 1.11 # include/linux/if_arcnet.h 1.1 -> 1.2 # drivers/net/sk98lin/skge.c 1.12 -> 1.13 # drivers/net/e1000/e1000_hw.h 1.8 -> 1.11 # Documentation/networking/ifenslave.c 1.3 -> 1.9 # net/core/skbuff.c 1.9 -> 1.10 # include/linux/if_vlan.h 1.3 -> 1.4 # drivers/net/r8169.c 1.6.1.1 -> 1.10 # drivers/net/amd8111e.h 1.1 -> 1.2 # include/net/irda/irlan_common.h 1.1 -> 1.2 # drivers/net/e100/e100_phy.c 1.6 -> 1.7 # drivers/net/typhoon.c 1.1 -> 1.2 # include/linux/ethtool.h 1.12 -> 1.14 # net/irda/irlan/irlan_eth.c 1.3 -> 1.4 # Documentation/networking/bonding.txt 1.6 -> 1.11 # include/linux/if_bonding.h 1.8 -> 1.15 # drivers/net/pcnet32.c 1.31 -> 1.33 # drivers/net/e1000/e1000_osdep.h 1.8 -> 1.9 # drivers/net/Makefile 1.33 -> 1.35 # drivers/net/tlan.h 1.3 -> 1.4 # drivers/net/dl2k.h 1.11 -> 1.12 # drivers/net/e1000/e1000.h 1.14 -> 1.18 # drivers/net/bonding.c 1.16 -> 1.34 drivers/net/bonding/bond_main.c (moved) # net/ipv6/addrconf.c 1.25 -> 1.26 # drivers/net/e1000/e1000_main.c 1.32 -> 1.39 # drivers/net/tg3.c 1.63 -> 1.64 # drivers/net/sis900.c 1.28 -> 1.30 # drivers/net/e100/e100.h 1.15 -> 1.18 # drivers/net/via-rhine.c 1.37 -> 1.38 # drivers/net/arcnet/rfc1201.c 1.4 -> 1.5 # drivers/net/8139cp.c 1.27 -> 1.29 # net/core/dev.c 1.34.1.1 -> 1.36 # drivers/net/tulip/tulip_core.c 1.39 -> 1.40 # drivers/net/sundance.c 1.35 -> 1.39 # drivers/net/eepro100.c 1.51 -> 1.52 # drivers/net/arcnet/arcnet.c 1.8 -> 1.9 # drivers/net/e1000/e1000_hw.c 1.11 -> 1.13 # drivers/net/eepro.c 1.13 -> 1.14 # drivers/net/tokenring/olympic.c 1.14.1.1 -> 1.16 # drivers/net/cs89x0.c 1.9 -> 1.10 # Documentation/Configure.help 1.179.1.3 -> 1.182 # include/net/if_inet6.h 1.2 -> 1.3 # drivers/net/ns83820.c 1.18 -> 1.19 # drivers/net/e1000/Makefile 1.4 -> 1.5 # (new) -> 1.3 drivers/net/bonding/Makefile # (new) -> 1.2 drivers/net/bonding/bond_alb.c # (new) -> 1.5 drivers/net/bonding/bonding.h # (new) -> 1.3 drivers/net/bonding/bond_3ad.h # (new) -> 1.2 drivers/net/bonding/bond_alb.h # (new) -> 1.6 drivers/net/bonding/bond_3ad.c # # The following is the BitKeeper ChangeSet Log # -------------------------------------------- # 03/06/19 jgarzik@redhat.com 1.1229 # Merge redhat.com:/garz/repo/marcelo-2.4 # into redhat.com:/garz/repo/net-drivers-2.4 # -------------------------------------------- # 03/06/19 akpm@digeo.com 1.1230 # [PATCH] Additional 3c980 device support # # From: "J.A. Magallon" # # Adds support for a couple of 3c980 variants which are in pci.ids, but not in # the driver. # -------------------------------------------- # 03/06/19 ak@muc.de 1.1231 # [PATCH] Remove copied inet_aton code in bond_main.c # # According to a report the my_inet_aton code in bond_main.c is copied # from 4.4BSD, but it doesn't carry a BSD copyright license. In addition # it is somewhat redundant with the standard in_aton. Convert it # to use the linux function. # # Error handling is a bit worse than before, but not much. # # Patch for 2.5 bonding. The 2.4 version has the same problem, but afaik # it is scheduled to be replaced by the 2.5 codebase anyways. # # -Andi # -------------------------------------------- # 03/06/19 linux-kernel@vger.kernel.org 1.1232 # [PATCH] new eepro100 PDI ID # # [PATCH] new eepro100 PDI ID # # From: Tom Alsberg # # Add support for a new eepro100 PCI ID. # -------------------------------------------- # 03/06/19 zwane@linuxpower.ca 1.1233 # [PATCH] Remove warning due to comparison in drivers/net/pcnet32.c # # drivers/net/pcnet32.c: In function `pcnet32_init_ring': # drivers/net/pcnet32.c:1006: warning: comparison between pointer and integer # -------------------------------------------- # 03/06/19 sam@mars.ravnborg.org 1.1234 # [netdrvr sis900] make function headers readable by kernel-doc tool # -------------------------------------------- # 03/06/19 jgarzik@redhat.com 1.1235 # [netdrvr sis900] minor fixes from 2.5 # # spelling, C99 initializers, jiffy wrap, set_bit # -------------------------------------------- # 03/06/19 bernie@develer.com 1.1236 # [PATCH] PATCH: fix bug in drivers/net/cs89x0.c:set_mac_address() # # Hello Andrew, Jeff and Alan, # # the following patch fixes a bug in the CS89xx net device which # would set new MAC address through SIOCSIFHWADDR _only_ when # net_debug is set, which is obviously not what it was meant to do. # The original code bogusly interpreted the addr argument as a buffer # containing the MAC address instead of a struct sockaddr. # # Applies as-is to 2.4.20 and with offset to 2.5.69. Please forward # it to Linus and Marcelo. This bug has been found and fixed by # Stefano Fedrigo . # -------------------------------------------- # diff -Nru a/Documentation/Configure.help b/Documentation/Configure.help --- a/Documentation/Configure.help Thu Jun 19 23:46:53 2003 +++ b/Documentation/Configure.help Thu Jun 19 23:46:53 2003 @@ -11861,15 +11861,6 @@ module, say M here and read as well as . -New Tulip bus configuration -CONFIG_TULIP_MWI - This configures your Tulip card specifically for the card and - system cache line size type you are using. - - This is experimental code, not yet tested on many boards. - - If unsure, say N. - Use PCI shared memory for NIC registers CONFIG_TULIP_MMIO Use PCI shared memory for the NIC registers, rather than going through diff -Nru a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt --- a/Documentation/networking/bonding.txt Thu Jun 19 23:46:52 2003 +++ b/Documentation/networking/bonding.txt Thu Jun 19 23:46:52 2003 @@ -43,10 +43,10 @@ For the latest version of the bonding driver, use kernel 2.4.12 or above (otherwise you will need to apply a patch). -Configure kernel with `make menuconfig/xconfig/config', and select -"Bonding driver support" in the "Network device support" section. It is -recommended to configure the driver as module since it is currently the only way -to pass parameters to the driver and configure more than one bonding device. +Configure kernel with `make menuconfig/xconfig/config', and select "Bonding +driver support" in the "Network device support" section. It is recommended +to configure the driver as module since it is currently the only way to +pass parameters to the driver and configure more than one bonding device. Build and install the new kernel and modules. @@ -108,17 +108,17 @@ SLAVE=yes BOOTPROTO=none -Use DEVICE=eth1 in the ifcfg-eth1 config file. If you configure a second bonding -interface (bond1), use MASTER=bond1 in the config file to make the network -interface be a slave of bond1. +Use DEVICE=eth1 in the ifcfg-eth1 config file. If you configure a second +bonding interface (bond1), use MASTER=bond1 in the config file to make the +network interface be a slave of bond1. Restart the networking subsystem or just bring up the bonding device if your administration tools allow it. Otherwise, reboot. On Red Hat distros you can issue `ifup bond0' or `/etc/rc.d/init.d/network restart'. -If the administration tools of your distribution do not support master/slave -notation in configuring network interfaces, you will need to manually configure -the bonding device with the following commands: +If the administration tools of your distribution do not support +master/slave notation in configuring network interfaces, you will need to +manually configure the bonding device with the following commands: # /sbin/ifconfig bond0 192.168.1.1 netmask 255.255.255.0 \ broadcast 192.168.1.255 up @@ -166,8 +166,9 @@ ip.ipAddrTable.ipAddrEntry.ipAdEntIfIndex.127.0.0.1 = 1 This problem is avoided by loading the bonding driver before any network -drivers participating in a bond. Below is an example of loading the bonding -driver first, the IP address 192.168.1.1 is correctly associated with ifDescr.2. +drivers participating in a bond. Below is an example of loading the bonding +driver first, the IP address 192.168.1.1 is correctly associated with +ifDescr.2. interfaces.ifTable.ifEntry.ifDescr.1 = lo interfaces.ifTable.ifEntry.ifDescr.2 = bond0 @@ -200,6 +201,44 @@ parameters be specified, otherwise serious network degradation will occur during link failures. +arp_interval + + Specifies the ARP monitoring frequency in milli-seconds. + If ARP monitoring is used in a load-balancing mode (mode 0 or 2), the + switch should be configured in a mode that evenly distributes packets + across all links - such as round-robin. If the switch is configured to + distribute the packets in an XOR fashion, all replies from the ARP + targets will be received on the same link which could cause the other + team members to fail. ARP monitoring should not be used in conjunction + with miimon. A value of 0 disables ARP monitoring. The default value + is 0. + +arp_ip_target + + Specifies the ip addresses to use when arp_interval is > 0. These + are the targets of the ARP request sent to determine the health of + the link to the targets. Specify these values in ddd.ddd.ddd.ddd + format. Multiple ip adresses must be seperated by a comma. At least + one ip address needs to be given for ARP monitoring to work. The + maximum number of targets that can be specified is set at 16. + +downdelay + + Specifies the delay time in milli-seconds to disable a link after a + link failure has been detected. This should be a multiple of miimon + value, otherwise the value will be rounded. The default value is 0. + +lacp_rate + + Option specifying the rate in which we'll ask our link partner to + transmit LACPDU packets in 802.3ad mode. Possible values are: + + slow or 0 + Request partner to transmit LACPDUs every 30 seconds (default) + + fast or 1 + Request partner to transmit LACPDUs every 1 second + max_bonds Specifies the number of bonding devices to create for this @@ -207,18 +246,27 @@ the bonding driver is not already loaded, then bond0, bond1 and bond2 will be created. The default value is 1. +miimon + + Specifies the frequency in milli-seconds that MII link monitoring + will occur. A value of zero disables MII link monitoring. A value + of 100 is a good starting point. See High Availability section for + additional information. The default value is 0. + mode - Specifies one of four bonding policies. The default is -round-robin (balance-rr). Possible values are (you can use either the -text or numeric option): + Specifies one of the bonding policies. The default is + round-robin (balance-rr). Possible values are (you can use + either the text or numeric option): balance-rr or 0 + Round-robin policy: Transmit in a sequential order from the first available slave through the last. This mode provides load balancing and fault tolerance. active-backup or 1 + Active-backup policy: Only one slave in the bond is active. A different slave becomes active if, and only if, the active slave fails. The bond's MAC address is @@ -226,7 +274,8 @@ to avoid confusing the switch. This mode provides fault tolerance. - balance-xor or 2 + balance-xor or 2 + XOR policy: Transmit based on [(source MAC address XOR'd with destination MAC address) modula slave count]. This selects the same slave for each @@ -234,16 +283,125 @@ balancing and fault tolerance. broadcast or 3 + Broadcast policy: transmits everything on all slave interfaces. This mode provides fault tolerance. -miimon - - Specifies the frequency in milli-seconds that MII link monitoring will - occur. A value of zero disables MII link monitoring. A value of - 100 is a good starting point. See High Availability section for - additional information. The default value is 0. + 802.3ad or 4 + + IEEE 802.3ad Dynamic link aggregation. Creates aggregation + groups that share the same speed and duplex settings. + Transmits and receives on all slaves in the active + aggregator. + + Pre-requisites: + + 1. Ethtool support in the base drivers for retrieving the + speed and duplex of each slave. + + 2. A switch that supports IEEE 802.3ad Dynamic link + aggregation. + + balance-tlb or 5 + + Adaptive transmit load balancing: channel bonding that does + not require any special switch support. The outgoing + traffic is distributed according to the current load + (computed relative to the speed) on each slave. Incoming + traffic is received by the current slave. If the receiving + slave fails, another slave takes over the MAC address of + the failed receiving slave. + + Prerequisite: + + Ethtool support in the base drivers for retrieving the + speed of each slave. + + balance-alb or 6 + + Adaptive load balancing: includes balance-tlb + receive + load balancing (rlb) for IPV4 traffic and does not require + any special switch support. The receive load balancing is + achieved by ARP negotiation. The bonding driver intercepts + the ARP Replies sent by the server on their way out and + overwrites the src hw address with the unique hw address of + one of the slaves in the bond such that different clients + use different hw addresses for the server. + + Receive traffic from connections created by the server is + also balanced. When the server sends an ARP Request the + bonding driver copies and saves the client's IP information + from the ARP. When the ARP Reply arrives from the client, + its hw address is retrieved and the bonding driver + initiates an ARP reply to this client assigning it to one + of the slaves in the bond. A problematic outcome of using + ARP negotiation for balancing is that each time that an ARP + request is broadcasted it uses the hw address of the + bond. Hence, clients learn the hw address of the bond and + the balancing of receive traffic collapses to the current + salve. This is handled by sending updates (ARP Replies) to + all the clients with their assigned hw address such that + the traffic is redistributed. Receive traffic is also + redistributed when a new slave is added to the bond and + when an inactive slave is re-activated. The receive load is + distributed sequentially (round robin) among the group of + highest speed slaves in the bond. + + When a link is reconnected or a new slave joins the bond + the receive traffic is redistributed among all active + slaves in the bond by intiating ARP Replies with the + selected mac address to each of the clients. The updelay + modeprobe parameter must be set to a value equal or greater + than the switch's forwarding delay so that the ARP Replies + sent to the clients will not be blocked by the switch. + + Prerequisites: + + 1. Ethtool support in the base drivers for retrieving the + speed of each slave. + + 2. Base driver support for setting the hw address of a + device also when it is open. This is required so that there + will always be one slave in the team using the bond hw + address (the current_slave) while having a unique hw + address for each slave in the bond. If the current_slave + fails it's hw address is swapped with the new current_slave + that was chosen. + +multicast + + Option specifying the mode of operation for multicast support. + Possible values are: + + disabled or 0 + Disabled (no multicast support) + + active or 1 + Enabled on active slave only, useful in active-backup mode + + all or 2 + Enabled on all slaves, this is the default + +primary + + A string (eth0, eth2, etc) to equate to a primary device. If this + value is entered, and the device is on-line, it will be used first + as the output media. Only when this device is off-line, will + alternate devices be used. Otherwise, once a failover is detected + and a new default output is chosen, it will remain the output media + until it too fails. This is useful when one slave was preferred + over another, i.e. when one slave is 1000Mbps and another is + 100Mbps. If the 1000Mbps slave fails and is later restored, it may + be preferred the faster slave gracefully become the active slave - + without deliberately failing the 100Mbps slave. Specifying a + primary is only valid in active-backup mode. +updelay + + Specifies the delay time in milli-seconds to enable a link after a + link up status has been detected. This should be a multiple of miimon + value, otherwise the value will be rounded. The default value is 0. + use_carrier Specifies whether or not miimon should use MII or ETHTOOL @@ -265,89 +423,37 @@ 0 will use the deprecated MII / ETHTOOL ioctls. The default value is 1. -downdelay - - Specifies the delay time in milli-seconds to disable a link after a - link failure has been detected. This should be a multiple of miimon - value, otherwise the value will be rounded. The default value is 0. - -updelay - - Specifies the delay time in milli-seconds to enable a link after a - link up status has been detected. This should be a multiple of miimon - value, otherwise the value will be rounded. The default value is 0. - -arp_interval - - Specifies the ARP monitoring frequency in milli-seconds. - If ARP monitoring is used in a load-balancing mode (mode 0 or 2), the - switch should be configured in a mode that evenly distributes packets - across all links - such as round-robin. If the switch is configured to - distribute the packets in an XOR fashion, all replies from the ARP - targets will be received on the same link which could cause the other - team members to fail. ARP monitoring should not be used in conjunction - with miimon. A value of 0 disables ARP monitoring. The default value - is 0. - -arp_ip_target - - Specifies the ip addresses to use when arp_interval is > 0. These are - the targets of the ARP request sent to determine the health of the link - to the targets. Specify these values in ddd.ddd.ddd.ddd format. - Multiple ip adresses must be seperated by a comma. At least one ip - address needs to be given for ARP monitoring to work. The maximum number - of targets that can be specified is set at 16. - -primary - - A string (eth0, eth2, etc) to equate to a primary device. If this - value is entered, and the device is on-line, it will be used first as - the output media. Only when this device is off-line, will alternate - devices be used. Otherwise, once a failover is detected and a new - default output is chosen, it will remain the output media until it too - fails. This is useful when one slave was preferred over another, i.e. - when one slave is 1000Mbps and another is 100Mbps. If the 1000Mbps - slave fails and is later restored, it may be preferred the faster slave - gracefully become the active slave - without deliberately failing the - 100Mbps slave. Specifying a primary is only valid in active-backup mode. - -multicast - - Option specifying the mode of operation for multicast support. - Possible values are: - - disabled or 0 - Disabled (no multicast support) - - active or 1 - Enabled on active slave only, useful in active-backup mode - - all or 2 - Enabled on all slaves, this is the default - Configuring Multiple Bonds ========================== -If several bonding interfaces are required, the driver must be loaded -multiple times. For example, to configure two bonding interfaces with link -monitoring performed every 100 milli-seconds, the /etc/conf.modules should +If several bonding interfaces are required, either specify the max_bonds +parameter (described above), or load the driver multiple times. Using +the max_bonds parameter is less complicated, but has the limitation that +all bonding instances created will have the same options. Loading the +driver multiple times allows each instance of the driver to have differing +options. + +For example, to configure two bonding interfaces, one with mii link +monitoring performed every 100 milliseconds, and one with ARP link +monitoring performed every 200 milliseconds, the /etc/conf.modules should resemble the following: alias bond0 bonding alias bond1 bonding options bond0 miimon=100 -options bond1 -o bonding1 miimon=100 +options bond1 -o bonding1 arp_interval=200 arp_ip_target=10.0.0.1 Configuring Multiple ARP Targets ================================ -While ARP monitoring can be done with just one target, it can be usefull +While ARP monitoring can be done with just one target, it can be useful in a High Availability setup to have several targets to monitor. In the case of just one target, the target itself may go down or have a problem making it unresponsive to ARP requests. Having an additional target (or -several) would increase the reliability of the ARP monitoring. +several) increases the reliability of the ARP monitoring. + Multiple ARP targets must be seperated by commas as follows: # example options for ARP monitoring with three targets @@ -410,9 +516,10 @@ Switch Configuration ==================== -While the switch does not need to be configured when the active-backup -policy is used (mode=1), it does need to be configured for the round-robin, -XOR, and broadcast policies (mode=0, mode=2, and mode=3). +While the switch does not need to be configured when the active-backup, +balance-tlb or balance-alb policies (mode=1,5,6) are used, it does need to +be configured for the round-robin, XOR, broadcast, or 802.3ad policies +(mode=0,2,3,4). Verifying Bond Configuration @@ -420,7 +527,7 @@ 1) Bonding information files ---------------------------- -The bonding driver information files reside in the /proc/net/bond* directories. +The bonding driver information files reside in the /proc/net/bond* directories. Sample contents of /proc/net/bond0/info after the driver is loaded with parameters of mode=0 and miimon=1000 is shown below. @@ -445,7 +552,8 @@ The network configuration can be verified using the ifconfig command. In the example below, the bond0 interface is the master (MASTER) while eth0 and eth1 are slaves (SLAVE). Notice all slaves of bond0 have the same MAC address -(HWaddr) as bond0. +(HWaddr) as bond0 for all modes except TLB and ALB that require a unique MAC +address for each slave. [root]# /sbin/ifconfig bond0 Link encap:Ethernet HWaddr 00:C0:F0:1F:37:B4 @@ -488,8 +596,7 @@ 3. How many bonding devices can I have? - One for each module you load. See section on Module Parameters for how - to accomplish this. + There is no limit. 4. How many slaves can a bonding device have? @@ -508,10 +615,11 @@ For ethernet cards not supporting MII status, the arp_interval and arp_ip_target parameters must be specified for bonding to work correctly. If packets have not been sent or received during the - specified arp_interval durration, an ARP request is sent to the targets - to generate send and receive traffic. If after this interval, either - the successful send and/or receive count has not incremented, the next - slave in the sequence will become the active slave. + specified arp_interval durration, an ARP request is sent to the + targets to generate send and receive traffic. If after this + interval, either the successful send and/or receive count has not + incremented, the next slave in the sequence will become the active + slave. If neither mii_monitor and arp_interval is configured, the bonding driver will not handle this situation very well. The driver will @@ -522,15 +630,16 @@ 6. Can bonding be used for High Availability? - Yes, if you use MII monitoring and ALL your cards support MII link - status reporting. See section on High Availability for more information. + Yes, if you use MII monitoring and ALL your cards support MII link + status reporting. See section on High Availability for more + information. 7. Which switches/systems does it work with? In round-robin and XOR mode, it works with systems that support trunking: - * Cisco 5500 series (look for EtherChannel support). + * Many Cisco switches and routers (look for EtherChannel support). * SunTrunking software. * Alteon AceDirector switches / WebOS (use Trunks). * BayStack Switches (trunks must be explicitly configured). Stackable @@ -538,7 +647,17 @@ units. * Linux bonding, of course ! - In active-backup mode, it should work with any Layer-II switche. + In 802.3ad mode, it works with with systems that support IEEE 802.3ad + Dynamic Link Aggregation: + + * Extreme networks Summit 7i (look for link-aggregation). + * Many Cisco switches and routers (look for LACP support; this may + require an upgrade to your IOS software; LACP support was added + by Cisco in late 2002). + * Foundry Big Iron 4000 + + In active-backup, balance-tlb and balance-alb modes, it should work + with any Layer-II switch. 8. Where does a bonding device get its MAC address from? @@ -591,6 +710,20 @@ Broadcast policy transmits everything on all slave interfaces. + 802.3ad, based on XOR but distributes traffic among all interfaces + in the active aggregator. + + Transmit load balancing (balance-tlb) balances the traffic + according to the current load on each slave. The balancing is + clients based and the least loaded slave is selected for each new + client. The load of each slave is calculated relative to its speed + and enables load balancing in mixed speed teams. + + Adaptive load balancing (balance-alb) uses the Transmit load + balancing for the transmit load. The receive load is balanced only + among the group of highest speed active slaves in the bond. The + load is distributed with round-robin i.e. next available slave in + the high speed group of active slaves. High Availability ================= @@ -826,10 +959,6 @@ Use the arp_interval/arp_ip_target parameters to count incoming/outgoing frames. - - A Transmit Load Balancing policy is not currently available. This mode - allows every slave in the bond to transmit while only one receives. If - the "receiving" slave fails, another slave takes over the MAC address of - the failed receiving slave. Resources and Links diff -Nru a/Documentation/networking/ifenslave.c b/Documentation/networking/ifenslave.c --- a/Documentation/networking/ifenslave.c Thu Jun 19 23:46:52 2003 +++ b/Documentation/networking/ifenslave.c Thu Jun 19 23:46:52 2003 @@ -51,10 +51,51 @@ * multiple interfaces are specified on a single ifenslave command * (ifenslave bond0 eth0 eth1). * + * - 2003/03/18 - Tsippy Mendelson and + * Shmulik Hen + * - Moved setting the slave's mac address and openning it, from + * the application to the driver. This enables support of modes + * that need to use the unique mac address of each slave. + * The driver also takes care of closing the slave and restoring its + * original mac address upon release. + * In addition, block possibility of enslaving before the master is up. + * This prevents putting the system in an undefined state. + * + * - 2003/05/01 - Amir Noam + * - Added ABI version control to restore compatibility between + * new/old ifenslave and new/old bonding. + * - Prevent adding an adapter that is already a slave. + * Fixes the problem of stalling the transmission and leaving + * the slave in a down state. + * + * - 2003/05/01 - Shmulik Hen + * - Prevent enslaving if the bond device is down. + * Fixes the problem of leaving the system in unstable state and + * halting when trying to remove the module. + * - Close socket on all abnormal exists. + * - Add versioning scheme that follows that of the bonding driver. + * current version is 1.0.0 as a base line. + * + * - 2003/05/22 - Jay Vosburgh + * - ifenslave -c was broken; it's now fixed + * - Fixed problem with routes vanishing from master during enslave + * processing. + * + * - 2003/05/27 - Amir Noam + * - Fix backward compatibility issue: + * For drivers not using ABI versions, slave was set down while + * it should be left up before enslaving. + * - For opt_c: slave should not be set to the master's setting + * while it is runnig. It was already set during enslave. To + * simplify things, it is now handeled separately. */ +#define APP_VERSION "1.0.11" +#define APP_RELDATE "May 29, 2003" +#define APP_NAME "ifenslave" + static char *version = -"ifenslave.c:v0.07 9/9/97 Donald Becker (becker@cesdis.gsfc.nasa.gov).\n" +APP_NAME ".c:v" APP_VERSION " (" APP_RELDATE ") " "\nDonald Becker (becker@cesdis.gsfc.nasa.gov).\n" "detach support added on 2000/10/02 by Willy Tarreau (willy at meta-x.org).\n" "2.4 kernel support added on 2001/02/16 by Chad N. Tindel (ctindel at ieee dot org.\n"; @@ -103,6 +144,12 @@ #include #include +typedef unsigned long long u64; /* hack, so we may include kernel's ethtool.h */ +typedef __uint32_t u32; /* ditto */ +typedef __uint16_t u16; /* ditto */ +typedef __uint8_t u8; /* ditto */ +#include + struct option longopts[] = { /* { name has_arg *flag val } */ {"all-interfaces", 0, 0, 'a'}, /* Show all interfaces. */ @@ -130,18 +177,19 @@ int skfd = -1; /* AF_INET socket for ioctl() calls. */ static void if_print(char *ifname); +static int get_abi_ver(char *master_ifname); int main(int argc, char **argv) { struct ifreq ifr2, if_hwaddr, if_ipaddr, if_metric, if_mtu, if_dstaddr; struct ifreq if_netmask, if_brdaddr, if_flags; - int goterr = 0; + int rv, goterr = 0; int c, errflag = 0; sa_family_t master_family; char **spp, *master_ifname, *slave_ifname; int hwaddr_notset; - int master_up; + int abi_ver = 0; while ((c = getopt_long(argc, argv, "acdfrvV?h", longopts, 0)) != EOF) switch (c) { @@ -207,6 +255,7 @@ char **tempp = spp; if ((master_ifname == NULL)||(slave_ifname == NULL)||(*tempp++ != NULL)) { fprintf(stderr, usage_msg); + (void) close(skfd); return 2; } } @@ -218,6 +267,13 @@ exit(0); } + /* exchange abi version with bonding driver */ + abi_ver = get_abi_ver(master_ifname); + if (abi_ver < 0) { + (void) close(skfd); + exit(1); + } + /* Get the vitals from the master interface. */ { struct ifreq *ifra[7] = { &if_ipaddr, &if_mtu, &if_dstaddr, @@ -242,6 +298,13 @@ } } + /* check if master is up; if not then fail any operation */ + if (!(if_flags.ifr_flags & IFF_UP)) { + fprintf(stderr, "Illegal operation; the specified master interface '%s' is not up.\n", master_ifname); + (void) close(skfd); + exit (1); + } + hwaddr_notset = 1; /* assume master's address not set yet */ for (i = 0; hwaddr_notset && (i < 6); i++) { hwaddr_notset &= ((unsigned char *)if_hwaddr.ifr_hwaddr.sa_data)[i] == 0; @@ -254,7 +317,7 @@ " with ethernet-like network interfaces.\n" " Use the '-f' option to force the operation.\n", master_ifname); - + (void) close(skfd); exit (1); } master_family = if_hwaddr.ifr_hwaddr.sa_family; @@ -278,39 +341,50 @@ fprintf(stderr, "SIOCBONDRELEASE: cannot detach %s from %s. errno=%s.\n", slave_ifname, master_ifname, strerror(errno)); } - else { /* we'll set the interface down to avoid any conflicts due to - same IP/MAC */ + else if (abi_ver < 1) { + /* The driver is using an old ABI, so we'll set the interface + * down to avoid any conflicts due to same IP/MAC + */ strncpy(ifr2.ifr_name, slave_ifname, IFNAMSIZ); if (ioctl(skfd, SIOCGIFFLAGS, &ifr2) < 0) { int saved_errno = errno; fprintf(stderr, "SIOCGIFFLAGS on %s failed: %s\n", slave_ifname, - strerror(saved_errno)); + strerror(saved_errno)); } else { ifr2.ifr_flags &= ~(IFF_UP | IFF_RUNNING); if (ioctl(skfd, SIOCSIFFLAGS, &ifr2) < 0) { int saved_errno = errno; fprintf(stderr, "Shutting down interface %s failed: %s\n", - slave_ifname, strerror(saved_errno)); + slave_ifname, strerror(saved_errno)); } } } - } - else { /* attach a slave interface to the master */ - /* two possibilities : - - if hwaddr_notset, do nothing. The bond will assign the - hwaddr from it's first slave. - - if !hwaddr_notset, assign the master's hwaddr to each slave - */ + } else if (opt_c) { + strncpy(if_flags.ifr_name, master_ifname, IFNAMSIZ); + strncpy(if_flags.ifr_slave, slave_ifname, IFNAMSIZ); + if ((ioctl(skfd, SIOCBONDCHANGEACTIVE, &if_flags) < 0) && + (ioctl(skfd, BOND_CHANGE_ACTIVE_OLD, &if_flags) < 0)) { + fprintf(stderr, "SIOCBONDCHANGEACTIVE: %s.\n", strerror(errno)); + } + } else { /* attach a slave interface to the master */ strncpy(ifr2.ifr_name, slave_ifname, IFNAMSIZ); if (ioctl(skfd, SIOCGIFFLAGS, &ifr2) < 0) { int saved_errno = errno; fprintf(stderr, "SIOCGIFFLAGS on %s failed: %s\n", slave_ifname, strerror(saved_errno)); + (void) close(skfd); return 1; } + if ((ifr2.ifr_flags & IFF_SLAVE) && !opt_r) { + fprintf(stderr, "%s is already a slave\n", slave_ifname); + (void) close(skfd); + return 1; + } + + /* if hwaddr_notset, assign the slave hw address to the master */ if (hwaddr_notset) { /* assign the slave hw address to the * master since it currently does not @@ -322,43 +396,18 @@ * TODO: put this and the "else" portion in * a function. */ - goterr = 0; - master_up = 0; - if (if_flags.ifr_flags & IFF_UP) { - if_flags.ifr_flags &= ~IFF_UP; - if (ioctl(skfd, SIOCSIFFLAGS, - &if_flags) < 0) { - goterr = 1; - fprintf(stderr, - "Shutting down " - "interface %s failed: " - "%s\n", - master_ifname, - strerror(errno)); - } else { - /* we took the master down, - * so we must bring it up - */ - master_up = 1; - } - } - - if (!goterr) { - /* get the slaves MAC address */ - strncpy(if_hwaddr.ifr_name, - slave_ifname, IFNAMSIZ); - if (ioctl(skfd, SIOCGIFHWADDR, - &if_hwaddr) < 0) { - fprintf(stderr, - "Could not get MAC " - "address of %s: %s\n", - slave_ifname, - strerror(errno)); - strncpy(if_hwaddr.ifr_name, - master_ifname, - IFNAMSIZ); - goterr=1; - } + /* get the slaves MAC address */ + strncpy(if_hwaddr.ifr_name, slave_ifname, + IFNAMSIZ); + rv = ioctl(skfd, SIOCGIFHWADDR, &if_hwaddr); + if (-1 == rv) { + fprintf(stderr, "Could not get MAC " + "address of %s: %s\n", + slave_ifname, + strerror(errno)); + strncpy(if_hwaddr.ifr_name, + master_ifname, IFNAMSIZ); + goterr = 1; } if (!goterr) { @@ -376,45 +425,35 @@ hwaddr_notset = 0; } } + } else if (abi_ver < 1) { /* if (hwaddr_notset) */ - if (master_up) { - if_flags.ifr_flags |= IFF_UP; - if (ioctl(skfd, SIOCSIFFLAGS, - &if_flags) < 0) { - fprintf(stderr, - "Bringing up interface " - "%s failed: %s\n", - master_ifname, - strerror(errno)); - } - } - - } else { - /* we'll assign master's hwaddr to this slave */ + /* The driver is using an old ABI, so we'll set the interface + * down and assign the master's hwaddr to it + */ if (ifr2.ifr_flags & IFF_UP) { ifr2.ifr_flags &= ~IFF_UP; if (ioctl(skfd, SIOCSIFFLAGS, &ifr2) < 0) { int saved_errno = errno; fprintf(stderr, "Shutting down interface %s failed: %s\n", - slave_ifname, strerror(saved_errno)); + slave_ifname, strerror(saved_errno)); } } - + strncpy(if_hwaddr.ifr_name, slave_ifname, IFNAMSIZ); if (ioctl(skfd, SIOCSIFHWADDR, &if_hwaddr) < 0) { int saved_errno = errno; fprintf(stderr, "SIOCSIFHWADDR on %s failed: %s\n", if_hwaddr.ifr_name, - strerror(saved_errno)); + strerror(saved_errno)); if (saved_errno == EBUSY) fprintf(stderr, " The slave device %s is busy: it must be" - " idle before running this command.\n", slave_ifname); + " idle before running this command.\n", slave_ifname); else if (saved_errno == EOPNOTSUPP) fprintf(stderr, " The slave device you specified does not support" - " setting the MAC address.\n Your kernel likely does not" - " support slave devices.\n"); + " setting the MAC address.\n Your kernel likely does not" + " support slave devices.\n"); else if (saved_errno == EINVAL) fprintf(stderr, " The slave device's address type does not match" - " the master's address type.\n"); + " the master's address type.\n"); } else { if (verbose) { unsigned char *hwaddr = if_hwaddr.ifr_hwaddr.sa_data; @@ -424,10 +463,11 @@ } } } - + if (*spp && !strcmp(*spp, "metric")) { if (*++spp == NULL) { fprintf(stderr, usage_msg); + (void) close(skfd); exit(2); } if_metric.ifr_metric = atoi(*spp); @@ -500,33 +540,44 @@ } } - ifr2.ifr_flags |= IFF_UP; /* the interface will need to be up to be bonded */ - if ((ifr2.ifr_flags &= ~(IFF_SLAVE | IFF_MASTER)) == 0 - || strncpy(ifr2.ifr_name, slave_ifname, IFNAMSIZ) <= 0 - || ioctl(skfd, SIOCSIFFLAGS, &ifr2) < 0) { - fprintf(stderr, - "Something broke setting the slave (%s) flags: %s.\n", - slave_ifname, strerror(errno)); + if (abi_ver < 1) { + + /* The driver is using an old ABI, so we'll set the interface + * up before enslaving it + */ + ifr2.ifr_flags |= IFF_UP; + if ((ifr2.ifr_flags &= ~(IFF_SLAVE | IFF_MASTER)) == 0 + || strncpy(ifr2.ifr_name, slave_ifname, IFNAMSIZ) <= 0 + || ioctl(skfd, SIOCSIFFLAGS, &ifr2) < 0) { + fprintf(stderr, + "Something broke setting the slave (%s) flags: %s.\n", + slave_ifname, strerror(errno)); + } else { + if (verbose) + printf("Set the slave's (%s) flags %4.4x.\n", + slave_ifname, if_flags.ifr_flags); + } } else { - if (verbose) - printf("Set the slave's (%s) flags %4.4x.\n", slave_ifname, if_flags.ifr_flags); + /* the bonding module takes care of setting the slave's mac address + * and opening its interface + */ + if (ifr2.ifr_flags & IFF_UP) { /* the interface will need to be down */ + ifr2.ifr_flags &= ~IFF_UP; + if (ioctl(skfd, SIOCSIFFLAGS, &ifr2) < 0) { + int saved_errno = errno; + fprintf(stderr, "Shutting down interface %s failed: %s\n", + slave_ifname, strerror(saved_errno)); + } + } } - + /* Do the real thing */ - if ( ! opt_r) { + if (!opt_r) { strncpy(if_flags.ifr_name, master_ifname, IFNAMSIZ); strncpy(if_flags.ifr_slave, slave_ifname, IFNAMSIZ); - if (!opt_c) { - if ((ioctl(skfd, SIOCBONDENSLAVE, &if_flags) < 0) && - (ioctl(skfd, BOND_ENSLAVE_OLD, &if_flags) < 0)) { - fprintf(stderr, "SIOCBONDENSLAVE: %s.\n", strerror(errno)); - } - } - else { - if ((ioctl(skfd, SIOCBONDCHANGEACTIVE, &if_flags) < 0) && - (ioctl(skfd, BOND_CHANGE_ACTIVE_OLD, &if_flags) < 0)) { - fprintf(stderr, "SIOCBONDCHANGEACTIVE: %s.\n", strerror(errno)); - } + if ((ioctl(skfd, SIOCBONDENSLAVE, &if_flags) < 0) && + (ioctl(skfd, BOND_ENSLAVE_OLD, &if_flags) < 0)) { + fprintf(stderr, "SIOCBONDENSLAVE: %s.\n", strerror(errno)); } } } @@ -638,6 +689,37 @@ fprintf(stderr, "%s: unknown interface.\n", ifname); } } + +static int get_abi_ver(char *master_ifname) +{ + struct ifreq ifr; + struct ethtool_drvinfo info; + int abi_ver = 0; + + memset(&ifr, 0, sizeof(ifr)); + strncpy(ifr.ifr_name, master_ifname, IFNAMSIZ); + ifr.ifr_data = (caddr_t)&info; + + info.cmd = ETHTOOL_GDRVINFO; + strncpy(info.driver, "ifenslave", 32); + snprintf(info.fw_version, 32, "%d", BOND_ABI_VERSION); + if (ioctl(skfd, SIOCETHTOOL, &ifr) >= 0) { + char *endptr; + + abi_ver = strtoul(info.fw_version, &endptr, 0); + if (*endptr) { + fprintf(stderr, "Error: got invalid string as an ABI " + "version from the bonding module\n"); + return -1; + } + } + + if (verbose) { + printf("ABI ver is %d\n", abi_ver); + } + return abi_ver; +} + /* diff -Nru a/drivers/net/3c59x.c b/drivers/net/3c59x.c --- a/drivers/net/3c59x.c Thu Jun 19 23:46:51 2003 +++ b/drivers/net/3c59x.c Thu Jun 19 23:46:51 2003 @@ -442,6 +442,8 @@ CH_3CCFEM656_1, CH_3C450, CH_3C920, + CH_3C982A, + CH_3C982B, }; @@ -497,8 +499,8 @@ PCI_USES_IO|PCI_USES_MASTER, IS_TORNADO|HAS_NWAY|HAS_HWCKSM, 128, }, {"3c980 Cyclone", PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_HWCKSM, 128, }, - {"3c982 Dual Port Server Cyclone", - PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_HWCKSM, 128, }, + {"3c980C Python-T", + PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_HWCKSM, 128, }, {"3cSOHO100-TX Hurricane", PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_HWCKSM, 128, }, @@ -535,6 +537,11 @@ PCI_USES_IO|PCI_USES_MASTER, IS_TORNADO|HAS_NWAY|HAS_HWCKSM, 128, }, {"3c920 Tornado", PCI_USES_IO|PCI_USES_MASTER, IS_TORNADO|HAS_NWAY|HAS_HWCKSM, 128, }, + {"3c982 Hydra Dual Port A", + PCI_USES_IO|PCI_USES_MASTER, IS_TORNADO|HAS_HWCKSM|HAS_NWAY, 128, }, + {"3c982 Hydra Dual Port B", + PCI_USES_IO|PCI_USES_MASTER, IS_TORNADO|HAS_HWCKSM|HAS_NWAY, 128, }, + {0,}, /* 0 terminated list. */ }; @@ -579,6 +586,8 @@ { 0x10B7, 0x6564, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3CCFEM656_1 }, { 0x10B7, 0x4500, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C450 }, { 0x10B7, 0x9201, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C920 }, + { 0x10B7, 0x1201, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C982A }, + { 0x10B7, 0x1202, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C982B }, {0,} /* 0 terminated list. */ }; MODULE_DEVICE_TABLE(pci, vortex_pci_tbl); diff -Nru a/drivers/net/8139cp.c b/drivers/net/8139cp.c --- a/drivers/net/8139cp.c Thu Jun 19 23:46:52 2003 +++ b/drivers/net/8139cp.c Thu Jun 19 23:46:52 2003 @@ -1771,7 +1771,6 @@ long pciaddr; unsigned int addr_len, i; u8 pci_rev, cache_size; - u16 pci_command; unsigned int board_type = (unsigned int) ent->driver_data; #ifndef MODULE @@ -1837,7 +1836,7 @@ } /* Configure DMA attributes. */ - if (!pci_set_dma_mask(pdev, (u64) 0xffffffffffffffff)) { + if (!pci_set_dma_mask(pdev, (u64) 0xffffffffffffffffULL)) { cp->pci_using_dac = 1; } else { rc = pci_set_dma_mask(pdev, (u64) 0xffffffff); @@ -1929,12 +1928,8 @@ } /* enable busmastering and memory-write-invalidate */ - pci_read_config_word(pdev, PCI_COMMAND, &pci_command); - if (!(pci_command & PCI_COMMAND_INVALIDATE)) { - pci_command |= PCI_COMMAND_INVALIDATE; - pci_write_config_word(pdev, PCI_COMMAND, pci_command); - } pci_set_master(pdev); + pci_set_mwi(pdev); if (cp->wol_enabled) cp_set_d3_state (cp); diff -Nru a/drivers/net/8139too.c b/drivers/net/8139too.c --- a/drivers/net/8139too.c Thu Jun 19 23:46:51 2003 +++ b/drivers/net/8139too.c Thu Jun 19 23:46:51 2003 @@ -978,7 +978,7 @@ dev->irq = pdev->irq; - /* dev->priv/tp zeroed and aligned in init_etherdev */ + /* dev->priv/tp zeroed and aligned in alloc_etherdev */ tp = dev->priv; /* note: tp->chipset set in rtl8139_init_board */ @@ -1687,7 +1687,7 @@ entry = tp->cur_tx % NUM_TX_DESC; if (likely(len < TX_BUF_SIZE)) { - if(len < ETH_ZLEN) + if (len < ETH_ZLEN) memset(tp->tx_buf[entry], 0, ETH_ZLEN); skb_copy_and_csum_dev(skb, tp->tx_buf[entry]); dev_kfree_skb(skb); @@ -2146,7 +2146,7 @@ spin_unlock_irqrestore (&tp->lock, flags); - synchronize_irq (); + synchronize_irq (); /* racy, but that's ok here */ free_irq (dev->irq, dev); rtl8139_tx_clear (tp); diff -Nru a/drivers/net/Config.in b/drivers/net/Config.in --- a/drivers/net/Config.in Thu Jun 19 23:46:51 2003 +++ b/drivers/net/Config.in Thu Jun 19 23:46:51 2003 @@ -257,6 +257,9 @@ fi dep_tristate 'D-Link DL2000-based Gigabit Ethernet support' CONFIG_DL2K $CONFIG_PCI dep_tristate 'Intel(R) PRO/1000 Gigabit Ethernet support' CONFIG_E1000 $CONFIG_PCI +if [ "$CONFIG_E1000" != "n" ]; then + bool ' Use Rx Polling (NAPI)' CONFIG_E1000_NAPI +fi dep_tristate 'MyriCOM Gigabit Ethernet support' CONFIG_MYRI_SBUS $CONFIG_SBUS dep_tristate 'National Semiconductor DP83820 support' CONFIG_NS83820 $CONFIG_PCI dep_tristate 'Packet Engines Hamachi GNIC-II support' CONFIG_HAMACHI $CONFIG_PCI diff -Nru a/drivers/net/Makefile b/drivers/net/Makefile --- a/drivers/net/Makefile Thu Jun 19 23:46:52 2003 +++ b/drivers/net/Makefile Thu Jun 19 23:46:52 2003 @@ -29,6 +29,10 @@ obj-y += e1000/e1000.o endif +ifeq ($(CONFIG_BONDING),y) + obj-y += bonding/bonding.o +endif + ifeq ($(CONFIG_ISDN_PPP),y) obj-$(CONFIG_ISDN) += slhc.o endif @@ -46,6 +50,7 @@ subdir-$(CONFIG_SKFP) += skfp subdir-$(CONFIG_E100) += e100 subdir-$(CONFIG_E1000) += e1000 +subdir-$(CONFIG_BONDING) += bonding # # link order important here @@ -158,7 +163,6 @@ obj-$(CONFIG_STRIP) += strip.o obj-$(CONFIG_DUMMY) += dummy.o -obj-$(CONFIG_BONDING) += bonding.o obj-$(CONFIG_DE600) += de600.o obj-$(CONFIG_DE620) += de620.o obj-$(CONFIG_AT1500) += lance.o @@ -229,7 +233,7 @@ obj-$(CONFIG_ETHER00) +=ether00.o obj-$(CONFIG_DL2K) += dl2k.o obj-$(CONFIG_R8169) += r8169.o -obj-$(CONFIG_AMD8111_ETH) += amd8111e.o +obj-$(CONFIG_AMD8111_ETH) += amd8111e.o mii.o # non-drivers/net drivers who want mii lib obj-$(CONFIG_PCMCIA_SMC91C92) += mii.o diff -Nru a/drivers/net/amd8111e.c b/drivers/net/amd8111e.c --- a/drivers/net/amd8111e.c Thu Jun 19 23:46:52 2003 +++ b/drivers/net/amd8111e.c Thu Jun 19 23:46:52 2003 @@ -1,6 +1,6 @@ /* Advanced Micro Devices Inc. AMD8111E Linux Network Driver - * Copyright (C) 2002 Advanced Micro Devices + * Copyright (C) 2003 Advanced Micro Devices * * * Copyright 2001,2002 Jeff Garzik [ 8139cp.c,tg3.c ] @@ -41,6 +41,20 @@ Kernel Mode Revision History: + 3.0.0 + Initial Revision. + 3.0.1 + 1. Dynamic interrupt coalescing. + 2. Removed prev_stats. + 3. MII support. + 4. Dynamic IPG support + 3.0.2 05/29/2003 + 1. Bug fix: Fixed failure to send jumbo packets larger than 4k. + 2. Bug fix: Fixed VLAN support failure. + 3. Bug fix: Fixed receive interrupt coalescing bug. + 4. Dynamic IPG support is disabled by default. + 3.0.3 06/05/2003 + 1. Bug fix: Fixed failure to close the interface if SMP is enabled. */ @@ -77,13 +91,16 @@ #include "amd8111e.h" #define MODULE_NAME "amd8111e" -#define MODULE_VERSION "3.0.0" +#define MODULE_VERSION "3.0.3" MODULE_AUTHOR("Advanced Micro Devices, Inc."); -MODULE_DESCRIPTION ("AMD8111 based 10/100 Ethernet Controller. Driver Version 3.0.0"); +MODULE_DESCRIPTION ("AMD8111 based 10/100 Ethernet Controller. Driver Version 3.0.3"); MODULE_LICENSE("GPL"); - MODULE_PARM(speed_duplex, "1-" __MODULE_STRING (MAX_UNITS) "i"); MODULE_PARM_DESC(speed_duplex, "Set device speed and duplex modes, 0: Auto Negotitate, 1: 10Mbps Half Duplex, 2: 10Mbps Full Duplex, 3: 100Mbps Half Duplex, 4: 100Mbps Full Duplex"); +MODULE_PARM(coalesce, "1-" __MODULE_STRING(MAX_UNITS) "i"); +MODULE_PARM_DESC(coalesce, "Enable or Disable interrupt coalescing, 1: Enable, 0: Disable"); +MODULE_PARM(dynamic_ipg, "1-" __MODULE_STRING(MAX_UNITS) "i"); +MODULE_PARM_DESC(dynamic_ipg, "Enable or Disable dynamic IPG, 1: Enable, 0: Disable"); static struct pci_device_id amd8111e_pci_tbl[] __devinitdata = { @@ -92,6 +109,88 @@ { 0, } }; +/* +This function will read the PHY registers. +*/ +static int amd8111e_read_phy(struct amd8111e_priv* lp, int phy_id, int reg, u32* val) +{ + void * mmio = lp->mmio; + unsigned int reg_val; + unsigned int repeat= REPEAT_CNT; + + reg_val = readl(mmio + PHY_ACCESS); + while (reg_val & PHY_CMD_ACTIVE) + reg_val = readl( mmio + PHY_ACCESS ); + + writel( PHY_RD_CMD | ((phy_id & 0x1f) << 21) | + ((reg & 0x1f) << 16), mmio +PHY_ACCESS); + do{ + reg_val = readl(mmio + PHY_ACCESS); + udelay(30); /* It takes 30 us to read/write data */ + } while (--repeat && (reg_val & PHY_CMD_ACTIVE)); + if(reg_val & PHY_RD_ERR) + goto err_phy_read; + + *val = reg_val & 0xffff; + return 0; +err_phy_read: + *val = 0; + return -EINVAL; + +} + +/* +This function will write into PHY registers. +*/ +static int amd8111e_write_phy(struct amd8111e_priv* lp,int phy_id, int reg, u32 val) +{ + unsigned int repeat = REPEAT_CNT + void * mmio = lp->mmio; + unsigned int reg_val; + + reg_val = readl(mmio + PHY_ACCESS); + while (reg_val & PHY_CMD_ACTIVE) + reg_val = readl( mmio + PHY_ACCESS ); + + writel( PHY_WR_CMD | ((phy_id & 0x1f) << 21) | + ((reg & 0x1f) << 16)|val, mmio + PHY_ACCESS); + + do{ + reg_val = readl(mmio + PHY_ACCESS); + udelay(30); /* It takes 30 us to read/write the data */ + } while (--repeat && (reg_val & PHY_CMD_ACTIVE)); + + if(reg_val & PHY_RD_ERR) + goto err_phy_write; + + return 0; + +err_phy_write: + return -EINVAL; + +} +/* +This is the mii register read function provided to the mii interface. +*/ +static int amd8111e_mdio_read(struct net_device * dev, int phy_id, int reg_num) +{ + struct amd8111e_priv* lp = dev->priv; + unsigned int reg_val; + + amd8111e_read_phy(lp,phy_id,reg_num,®_val); + return reg_val; + +} + +/* +This is the mii register write function provided to the mii interface. +*/ +static void amd8111e_mdio_write(struct net_device * dev, int phy_id, int reg_num, int val) +{ + struct amd8111e_priv* lp = dev->priv; + + amd8111e_write_phy(lp, phy_id, reg_num, val); +} /* This function will set PHY speed. During initialization sets the original speed to 100 full. @@ -99,26 +198,39 @@ static void amd8111e_set_ext_phy(struct net_device *dev) { struct amd8111e_priv *lp = (struct amd8111e_priv *)dev->priv; - unsigned long reg_val = 0; - void * mmio = lp->mmio; - struct amd8111e_link_config *link_config = &lp->link_config; + u32 bmcr,advert,tmp; - if(!lp->opened){ - /* Initializing SPEED_100 and DUPLEX_FULL as original values */ - link_config->orig_speed = SPEED_100; - link_config->orig_duplex = DUPLEX_FULL; - link_config->orig_phy_option = XPHYSP |XPHYFD; - } - reg_val = lp->ext_phy_option; - - /* Disable port manager */ - writel((u32) EN_PMGR, mmio + CMD3 ); + /* Determine mii register values to set the speed */ + advert = amd8111e_mdio_read(dev, PHY_ID, MII_ADVERTISE); + tmp = advert & ~(ADVERTISE_ALL | ADVERTISE_100BASE4); + switch (lp->ext_phy_option){ + + default: + case SPEED_AUTONEG: /* advertise all values */ + tmp |= ( ADVERTISE_10HALF|ADVERTISE_10FULL| + ADVERTISE_100HALF|ADVERTISE_100FULL) ; + break; + case SPEED10_HALF: + tmp |= ADVERTISE_10HALF; + break; + case SPEED10_FULL: + tmp |= ADVERTISE_10FULL; + break; + case SPEED100_HALF: + tmp |= ADVERTISE_100HALF; + break; + case SPEED100_FULL: + tmp |= ADVERTISE_100FULL; + break; + } + + if(advert != tmp) + amd8111e_mdio_write(dev, PHY_ID, MII_ADVERTISE, tmp); + /* Restart auto negotiation */ + bmcr = amd8111e_mdio_read(dev, PHY_ID, MII_BMCR); + bmcr |= (BMCR_ANENABLE | BMCR_ANRESTART); + amd8111e_mdio_write(dev, PHY_ID, MII_BMCR, bmcr); - /* Reset PHY */ - writel((u32)XPHYRST | lp->ext_phy_option, mmio + CTRL2); - - /* Enable port manager */ - writel((u32)VAL1 | EN_PMGR, mmio + CMD3 ); } /* @@ -156,7 +268,7 @@ } /* - This will set the receive buffer length corresponding to the mtu size of network interface. +This will set the receive buffer length corresponding to the mtu size of networkinterface. */ static inline void amd8111e_set_rx_buff_len(struct net_device* dev) { @@ -226,13 +338,13 @@ lp->rx_ring[i].buff_phy_addr = cpu_to_le32(lp->rx_dma_addr[i]); lp->rx_ring[i].buff_count = cpu_to_le16(lp->rx_buff_len); - lp->rx_ring[i].rx_dr_offset10 = cpu_to_le16(OWN_BIT); + lp->rx_ring[i].rx_flags = cpu_to_le16(OWN_BIT); } /* Initializing transmit descriptors */ for (i = 0; i < NUM_TX_RING_DR; i++) { lp->tx_ring[i].buff_phy_addr = 0; - lp->tx_ring[i].tx_dr_offset2 = 0; + lp->tx_ring[i].tx_flags = 0; lp->tx_ring[i].buff_count = 0; } @@ -253,6 +365,65 @@ err_no_mem: return -ENOMEM; } +/* This function will set the interrupt coalescing according to the input arguments */ +static int amd8111e_set_coalesce(struct net_device * dev, enum coal_mode cmod) +{ + unsigned int timeout; + unsigned int event_count; + + struct amd8111e_priv *lp = dev->priv; + void* mmio = lp->mmio; + struct amd8111e_coalesce_conf * coal_conf = &lp->coal_conf; + + + switch(cmod) + { + case RX_INTR_COAL : + timeout = coal_conf->rx_timeout; + event_count = coal_conf->rx_event_count; + if( timeout > MAX_TIMEOUT || + event_count > MAX_EVENT_COUNT ) + return -EINVAL; + + timeout = timeout * DELAY_TIMER_CONV; + writel(VAL0|STINTEN, mmio+INTEN0); + writel((u32)DLY_INT_A_R0|( event_count<< 16 )|timeout, + mmio+DLY_INT_A); + break; + + case TX_INTR_COAL : + timeout = coal_conf->tx_timeout; + event_count = coal_conf->tx_event_count; + if( timeout > MAX_TIMEOUT || + event_count > MAX_EVENT_COUNT ) + return -EINVAL; + + + timeout = timeout * DELAY_TIMER_CONV; + writel(VAL0|STINTEN,mmio+INTEN0); + writel((u32)DLY_INT_B_T0|( event_count<< 16 )|timeout, + mmio+DLY_INT_B); + break; + + case DISABLE_COAL: + writel(0,mmio+STVAL); + writel(STINTEN, mmio+INTEN0); + writel(0, mmio +DLY_INT_B); + writel(0, mmio+DLY_INT_A); + break; + case ENABLE_COAL: + /* Start the timer */ + writel((u32)SOFT_TIMER_FREQ, mmio+STVAL); /* 0.5 sec */ + writel(VAL0|STINTEN, mmio+INTEN0); + break; + default: + break; + + } + return 0; + +} + /* This function initializes the device registers and starts the device. */ @@ -267,13 +438,17 @@ if(amd8111e_init_ring(dev)) return -ENOMEM; + + /* enable the port manager and set auto negotiation always */ + writel((u32) VAL1|EN_PMGR, mmio + CMD3 ); + writel((u32)XPHYANE|XPHYRST , mmio + CTRL2); amd8111e_set_ext_phy(dev); /* set control registers */ reg_val = readl(mmio + CTRL1); - - writel( reg_val| XMTSP_128 | CACHE_ALIGN | B1_MASK, mmio + CTRL1 ); + reg_val &= ~XMTSP_MASK; + writel( reg_val| XMTSP_128 | CACHE_ALIGN, mmio + CTRL1 ); /* enable interrupt */ writel( APINT5EN | APINT4EN | APINT3EN | APINT2EN | APINT1EN | @@ -288,15 +463,21 @@ writew((u32)NUM_TX_RING_DR, mmio + XMT_RING_LEN0); writew((u16)NUM_RX_RING_DR, mmio + RCV_RING_LEN0); + + /* set default IPG to 96 */ + writew((u32)DEFAULT_IPG,mmio+IPG); + writew((u32)(DEFAULT_IPG-IFS1_DELTA), mmio + IFS1); if(lp->options & OPTION_JUMBO_ENABLE){ writel((u32)VAL2|JUMBO, mmio + CMD3); /* Reset REX_UFLO */ writel( REX_UFLO, mmio + CMD2); /* Should not set REX_UFLO for jumbo frames */ - writel( VAL0 | APAD_XMT | REX_RTRY, mmio + CMD2); - }else + writel( VAL0 | APAD_XMT|REX_RTRY , mmio + CMD2); + }else{ writel( VAL0 | APAD_XMT | REX_RTRY|REX_UFLO, mmio + CMD2); + writel((u32)JUMBO, mmio + CMD3); + } #if AMD8111E_VLAN_TAG_USED writel((u32) VAL2|VSIZE|VL_TAG_DEL, mmio + CMD3); @@ -306,11 +487,20 @@ /* Setting the MAC address to the device */ for(i = 0; i < ETH_ADDR_LEN; i++) writeb( dev->dev_addr[i], mmio + PADR + i ); + + /* Enable interrupt coalesce */ + if(lp->options & OPTION_INTR_COAL_ENABLE){ + printk(KERN_INFO "%s: Interrupt Coalescing Enabled.\n", + dev->name); + amd8111e_set_coalesce(dev,ENABLE_COAL); + } /* set RUN bit to start the chip */ writel(VAL2 | RDMD0, mmio + CMD0); writel(VAL0 | INTREN | RUN, mmio + CMD0); + /* To avoid PCI posting bug */ + readl(mmio+CMD0); return 0; } /* @@ -383,7 +573,7 @@ writew(MIB_CLEAR, mmio + MIB_ADDR); /* Clear LARF */ - AMD8111E_WRITE_REG64(mmio, LADRF,logic_filter); + amd8111e_writeq(*(u64*)logic_filter,mmio+LADRF); /* SRAM_SIZE register */ reg_val = readl(mmio + SRAM_SIZE); @@ -393,8 +583,11 @@ #if AMD8111E_VLAN_TAG_USED writel(VAL2|VSIZE|VL_TAG_DEL, mmio + CMD3 ); #endif - /* CMD2 register */ - reg_val = readl(mmio + CMD2); + /* Set default value to CTRL1 Register */ + writel(CTRL1_DEFAULT, mmio + CTRL1); + + /* To avoid PCI posting bug */ + readl(mmio + CMD2); } @@ -412,6 +605,9 @@ /* Clear INT0 */ intr0 = readl(lp->mmio + INT0); writel(intr0, lp->mmio + INT0); + + /* To avoid PCI posting bug */ + readl(lp->mmio + INT0); } @@ -421,6 +617,9 @@ static void amd8111e_stop_chip(struct amd8111e_priv* lp) { writel(RUN, lp->mmio + CMD0); + + /* To avoid PCI posting bug */ + readl(lp->mmio + CMD0); } /* @@ -467,11 +666,10 @@ struct amd8111e_priv* lp = dev->priv; int tx_index = lp->tx_complete_idx & TX_RING_DR_MOD_MASK; int status; - /* Complete all the transmit packet */ while (lp->tx_complete_idx != lp->tx_idx){ tx_index = lp->tx_complete_idx & TX_RING_DR_MOD_MASK; - status = le16_to_cpu(lp->tx_ring[tx_index].tx_dr_offset2); + status = le16_to_cpu(lp->tx_ring[tx_index].tx_flags); if(status & OWN_BIT) break; /* It still hasn't been Txed */ @@ -487,11 +685,15 @@ lp->tx_skbuff[tx_index] = 0; lp->tx_dma_addr[tx_index] = 0; } - lp->tx_complete_idx++; + lp->tx_complete_idx++; + /*COAL update tx coalescing parameters */ + lp->coal_conf.tx_packets++; + lp->coal_conf.tx_bytes += lp->tx_ring[tx_index].buff_count; if (netif_queue_stopped(dev) && lp->tx_complete_idx > lp->tx_idx - NUM_TX_BUFFERS +2){ /* The ring is no longer full, clear tbusy. */ + /* lp->tx_full = 0; */ netif_wake_queue (dev); } } @@ -516,33 +718,31 @@ /* If we own the next entry, it's a new packet. Send it up. */ while(++num_rx_pkt <= max_rx_pkt){ - if(lp->rx_ring[rx_index].rx_dr_offset10 & OWN_BIT) + if(lp->rx_ring[rx_index].rx_flags & OWN_BIT) return 0; /* check if err summary bit is set */ - if(le16_to_cpu(lp->rx_ring[rx_index].rx_dr_offset10) & ERR_BIT){ + if(le16_to_cpu(lp->rx_ring[rx_index].rx_flags) & ERR_BIT){ /* * There is a tricky error noted by John Murphy, * to Russ Nelson: Even with full-sized * buffers it's possible for a jabber packet to use two * buffers, with only the last correctly noting the error. */ /* reseting flags */ - lp->rx_ring[rx_index].rx_dr_offset10 &= - cpu_to_le16(RESET_RX_FLAGS); + lp->rx_ring[rx_index].rx_flags &= RESET_RX_FLAGS; goto err_next_pkt; } /* check for STP and ENP */ - status = le16_to_cpu(lp->rx_ring[rx_index].rx_dr_offset10); + status = le16_to_cpu(lp->rx_ring[rx_index].rx_flags); if(!((status & STP_BIT) && (status & ENP_BIT))){ /* reseting flags */ - lp->rx_ring[rx_index].rx_dr_offset10 &= - cpu_to_le16(RESET_RX_FLAGS); + lp->rx_ring[rx_index].rx_flags &= RESET_RX_FLAGS; goto err_next_pkt; } pkt_len = le16_to_cpu(lp->rx_ring[rx_index].msg_count) - 4; #if AMD8111E_VLAN_TAG_USED - vtag = le16_to_cpu(lp->rx_ring[rx_index].rx_dr_offset10) & TT_MASK; + vtag = le16_to_cpu(lp->rx_ring[rx_index].rx_flags) & TT_MASK; /*MAC will strip vlan tag*/ if(lp->vlgrp != NULL && vtag !=0) min_pkt_len =MIN_PKT_LEN - 4; @@ -551,16 +751,14 @@ min_pkt_len =MIN_PKT_LEN; if (pkt_len < min_pkt_len) { - lp->rx_ring[rx_index].rx_dr_offset10 &= - cpu_to_le16(RESET_RX_FLAGS); + lp->rx_ring[rx_index].rx_flags &= RESET_RX_FLAGS; lp->stats.rx_errors++; goto err_next_pkt; } if(!(new_skb = dev_alloc_skb(lp->rx_buff_len))){ /* if allocation fail, ignore that pkt and go to next one */ - lp->rx_ring[rx_index].rx_dr_offset10 &= - cpu_to_le16(RESET_RX_FLAGS); + lp->rx_ring[rx_index].rx_flags &= RESET_RX_FLAGS; lp->stats.rx_errors++; goto err_next_pkt; } @@ -580,22 +778,26 @@ #if AMD8111E_VLAN_TAG_USED - vtag = lp->rx_ring[rx_index].rx_dr_offset10 & TT_MASK; + vtag = lp->rx_ring[rx_index].rx_flags & TT_MASK; if(lp->vlgrp != NULL && (vtag == TT_VLAN_TAGGED)){ amd8111e_vlan_rx(lp, skb, lp->rx_ring[rx_index].tag_ctrl_info); } else #endif - dev->last_rx = jiffies; netif_rx (skb); + /*COAL update rx coalescing parameters*/ + lp->coal_conf.rx_packets++; + lp->coal_conf.rx_bytes += pkt_len; + + dev->last_rx = jiffies; err_next_pkt: lp->rx_ring[rx_index].buff_phy_addr = cpu_to_le32(lp->rx_dma_addr[rx_index]); lp->rx_ring[rx_index].buff_count = cpu_to_le16(lp->rx_buff_len-2); - lp->rx_ring[rx_index].rx_dr_offset10 |= cpu_to_le16(OWN_BIT); + lp->rx_ring[rx_index].rx_flags |= cpu_to_le16(OWN_BIT); rx_index = (++lp->rx_idx) & RX_RING_DR_MOD_MASK; } @@ -603,8 +805,8 @@ } /* -This function will store the original speed to restore later, if autoneg is turned on. This speed will be set later when the autoneg is turned off. If the link status indicates that link is down, that will be indicated to the kernel */ - +This function will indicate the link status to the kernel. +*/ static int amd8111e_link_change(struct net_device* dev) { struct amd8111e_priv *lp = dev->priv; @@ -614,21 +816,11 @@ status0 = readl(lp->mmio + STAT0); if(status0 & LINK_STATS){ - if(status0 & AUTONEG_COMPLETE){ - /* keeping the original speeds */ - if((lp->link_config.speed != SPEED_INVALID)&& - (lp->link_config.duplex != DUPLEX_INVALID)){ - lp->link_config.orig_speed = lp->link_config.speed; - lp->link_config.orig_duplex = lp->link_config.duplex; - lp->link_config.orig_phy_option = lp->ext_phy_option; - } - - lp->link_config.speed = SPEED_INVALID; - lp->link_config.duplex = DUPLEX_INVALID; + if(status0 & AUTONEG_COMPLETE) lp->link_config.autoneg = AUTONEG_ENABLE; - netif_carrier_on(dev); - return 0; - } + else + lp->link_config.autoneg = AUTONEG_DISABLE; + if(status0 & FULL_DPLX) lp->link_config.duplex = DUPLEX_FULL; else @@ -638,13 +830,17 @@ lp->link_config.speed = SPEED_10; else if(speed == PHY_SPEED_100) lp->link_config.speed = SPEED_100; - lp->link_config.autoneg = AUTONEG_DISABLE; + + printk(KERN_INFO "%s: Link is Up. Speed is %s Mbps %s Duplex\n", dev->name, + (lp->link_config.speed == SPEED_100) ? "100": "10", + (lp->link_config.duplex == DUPLEX_FULL)? "Full": "Half"); netif_carrier_on(dev); } else{ lp->link_config.speed = SPEED_INVALID; lp->link_config.duplex = DUPLEX_INVALID; lp->link_config.autoneg = AUTONEG_INVALID; + printk(KERN_INFO "%s: Link is Down.\n",dev->name); netif_carrier_off(dev); } @@ -671,114 +867,236 @@ } /* -This function retuurns the reads the mib registers and returns the hardware statistics. It adds the previous statistics with new values.*/ +This function reads the mib registers and returns the hardware statistics. It updates previous internal driver statistics with new values. +*/ static struct net_device_stats *amd8111e_get_stats(struct net_device * dev) { struct amd8111e_priv *lp = dev->priv; void * mmio = lp->mmio; unsigned long flags; - struct net_device_stats *prev_stats = &lp->prev_stats; + /* struct net_device_stats *prev_stats = &lp->prev_stats; */ struct net_device_stats* new_stats = &lp->stats; if(!lp->opened) - return prev_stats; + return &lp->stats; spin_lock_irqsave (&lp->lock, flags); /* stats.rx_packets */ - new_stats->rx_packets = prev_stats->rx_packets+ - amd8111e_read_mib(mmio, rcv_broadcast_pkts)+ - amd8111e_read_mib(mmio, rcv_multicast_pkts)+ - amd8111e_read_mib(mmio, rcv_unicast_pkts); + new_stats->rx_packets = amd8111e_read_mib(mmio, rcv_broadcast_pkts)+ + amd8111e_read_mib(mmio, rcv_multicast_pkts)+ + amd8111e_read_mib(mmio, rcv_unicast_pkts); /* stats.tx_packets */ - new_stats->tx_packets = prev_stats->tx_packets+ - amd8111e_read_mib(mmio, xmt_packets); + new_stats->tx_packets = amd8111e_read_mib(mmio, xmt_packets); /*stats.rx_bytes */ - new_stats->rx_bytes = prev_stats->rx_bytes+ - amd8111e_read_mib(mmio, rcv_octets); + new_stats->rx_bytes = amd8111e_read_mib(mmio, rcv_octets); /* stats.tx_bytes */ - new_stats->tx_bytes = prev_stats->tx_bytes+ - amd8111e_read_mib(mmio, xmt_octets); + new_stats->tx_bytes = amd8111e_read_mib(mmio, xmt_octets); /* stats.rx_errors */ - new_stats->rx_errors = prev_stats->rx_errors+ - amd8111e_read_mib(mmio, rcv_undersize_pkts)+ - amd8111e_read_mib(mmio, rcv_fragments)+ - amd8111e_read_mib(mmio, rcv_jabbers)+ - amd8111e_read_mib(mmio, rcv_alignment_errors)+ - amd8111e_read_mib(mmio, rcv_fcs_errors)+ - amd8111e_read_mib(mmio, rcv_miss_pkts); + new_stats->rx_errors = amd8111e_read_mib(mmio, rcv_undersize_pkts)+ + amd8111e_read_mib(mmio, rcv_fragments)+ + amd8111e_read_mib(mmio, rcv_jabbers)+ + amd8111e_read_mib(mmio, rcv_alignment_errors)+ + amd8111e_read_mib(mmio, rcv_fcs_errors)+ + amd8111e_read_mib(mmio, rcv_miss_pkts); /* stats.tx_errors */ - new_stats->tx_errors = prev_stats->tx_errors+ - amd8111e_read_mib(mmio, xmt_underrun_pkts); + new_stats->tx_errors = amd8111e_read_mib(mmio, xmt_underrun_pkts); /* stats.rx_dropped*/ - new_stats->rx_dropped = prev_stats->rx_dropped+ - amd8111e_read_mib(mmio, rcv_miss_pkts); + new_stats->rx_dropped = amd8111e_read_mib(mmio, rcv_miss_pkts); /* stats.tx_dropped*/ - new_stats->tx_dropped = prev_stats->tx_dropped+ - amd8111e_read_mib(mmio, xmt_underrun_pkts); + new_stats->tx_dropped = amd8111e_read_mib(mmio, xmt_underrun_pkts); /* stats.multicast*/ - new_stats->multicast = prev_stats->multicast+ - amd8111e_read_mib(mmio, rcv_multicast_pkts); + new_stats->multicast = amd8111e_read_mib(mmio, rcv_multicast_pkts); /* stats.collisions*/ - new_stats->collisions = prev_stats->collisions+ - amd8111e_read_mib(mmio, xmt_collisions); + new_stats->collisions = amd8111e_read_mib(mmio, xmt_collisions); /* stats.rx_length_errors*/ - new_stats->rx_length_errors = prev_stats->rx_length_errors+ + new_stats->rx_length_errors = amd8111e_read_mib(mmio, rcv_undersize_pkts)+ amd8111e_read_mib(mmio, rcv_oversize_pkts); /* stats.rx_over_errors*/ - new_stats->rx_over_errors = prev_stats->rx_over_errors+ - amd8111e_read_mib(mmio, rcv_miss_pkts); + new_stats->rx_over_errors = amd8111e_read_mib(mmio, rcv_miss_pkts); /* stats.rx_crc_errors*/ - new_stats->rx_crc_errors = prev_stats->rx_crc_errors+ - amd8111e_read_mib(mmio, rcv_fcs_errors); + new_stats->rx_crc_errors = amd8111e_read_mib(mmio, rcv_fcs_errors); /* stats.rx_frame_errors*/ - new_stats->rx_frame_errors = prev_stats->rx_frame_errors+ + new_stats->rx_frame_errors = amd8111e_read_mib(mmio, rcv_alignment_errors); /* stats.rx_fifo_errors */ - new_stats->rx_fifo_errors = prev_stats->rx_fifo_errors+ - amd8111e_read_mib(mmio, rcv_miss_pkts); + new_stats->rx_fifo_errors = amd8111e_read_mib(mmio, rcv_miss_pkts); /* stats.rx_missed_errors */ - new_stats->rx_missed_errors = prev_stats->rx_missed_errors+ - amd8111e_read_mib(mmio, rcv_miss_pkts); + new_stats->rx_missed_errors = amd8111e_read_mib(mmio, rcv_miss_pkts); /* stats.tx_aborted_errors*/ - new_stats->tx_aborted_errors = prev_stats->tx_aborted_errors+ + new_stats->tx_aborted_errors = amd8111e_read_mib(mmio, xmt_excessive_collision); /* stats.tx_carrier_errors*/ - new_stats->tx_carrier_errors = prev_stats->tx_carrier_errors+ + new_stats->tx_carrier_errors = amd8111e_read_mib(mmio, xmt_loss_carrier); /* stats.tx_fifo_errors*/ - new_stats->tx_fifo_errors = prev_stats->tx_fifo_errors+ - amd8111e_read_mib(mmio, xmt_underrun_pkts); + new_stats->tx_fifo_errors = amd8111e_read_mib(mmio, xmt_underrun_pkts); /* stats.tx_window_errors*/ - new_stats->tx_window_errors = prev_stats->tx_window_errors+ + new_stats->tx_window_errors = amd8111e_read_mib(mmio, xmt_late_collision); + /* Reset the mibs for collecting new statistics */ + /* writew(MIB_CLEAR, mmio + MIB_ADDR);*/ + spin_unlock_irqrestore (&lp->lock, flags); return new_stats; } +/* This function recalculate the interupt coalescing mode on every interrupt +according to the datarate and the packet rate. +*/ +static int amd8111e_calc_coalesce(struct net_device *dev) +{ + struct amd8111e_priv *lp = dev->priv; + struct amd8111e_coalesce_conf * coal_conf = &lp->coal_conf; + int tx_pkt_rate; + int rx_pkt_rate; + int tx_data_rate; + int rx_data_rate; + int rx_pkt_size; + int tx_pkt_size; + + tx_pkt_rate = coal_conf->tx_packets - coal_conf->tx_prev_packets; + coal_conf->tx_prev_packets = coal_conf->tx_packets; + + tx_data_rate = coal_conf->tx_bytes - coal_conf->tx_prev_bytes; + coal_conf->tx_prev_bytes = coal_conf->tx_bytes; + + rx_pkt_rate = coal_conf->rx_packets - coal_conf->rx_prev_packets; + coal_conf->rx_prev_packets = coal_conf->rx_packets; + + rx_data_rate = coal_conf->rx_bytes - coal_conf->rx_prev_bytes; + coal_conf->rx_prev_bytes = coal_conf->rx_bytes; + + if(rx_pkt_rate < 800){ + if(coal_conf->rx_coal_type != NO_COALESCE){ + + coal_conf->rx_timeout = 0x0; + coal_conf->rx_event_count = 0; + amd8111e_set_coalesce(dev,RX_INTR_COAL); + coal_conf->rx_coal_type = NO_COALESCE; + } + } + else{ + + rx_pkt_size = rx_data_rate/rx_pkt_rate; + if (rx_pkt_size < 128){ + if(coal_conf->rx_coal_type != NO_COALESCE){ + + coal_conf->rx_timeout = 0; + coal_conf->rx_event_count = 0; + amd8111e_set_coalesce(dev,RX_INTR_COAL); + coal_conf->rx_coal_type = NO_COALESCE; + } + } + else if ( (rx_pkt_size >= 128) && (rx_pkt_size < 512) ){ + + if(coal_conf->rx_coal_type != LOW_COALESCE){ + coal_conf->rx_timeout = 1; + coal_conf->rx_event_count = 4; + amd8111e_set_coalesce(dev,RX_INTR_COAL); + coal_conf->rx_coal_type = LOW_COALESCE; + } + } + else if ((rx_pkt_size >= 512) && (rx_pkt_size < 1024)){ + + if(coal_conf->rx_coal_type != MEDIUM_COALESCE){ + coal_conf->rx_timeout = 1; + coal_conf->rx_event_count = 4; + amd8111e_set_coalesce(dev,RX_INTR_COAL); + coal_conf->rx_coal_type = MEDIUM_COALESCE; + } + + } + else if(rx_pkt_size >= 1024){ + if(coal_conf->rx_coal_type != HIGH_COALESCE){ + coal_conf->rx_timeout = 2; + coal_conf->rx_event_count = 3; + amd8111e_set_coalesce(dev,RX_INTR_COAL); + coal_conf->rx_coal_type = HIGH_COALESCE; + } + } + } + /* NOW FOR TX INTR COALESC */ + if(tx_pkt_rate < 800){ + if(coal_conf->tx_coal_type != NO_COALESCE){ + + coal_conf->tx_timeout = 0x0; + coal_conf->tx_event_count = 0; + amd8111e_set_coalesce(dev,TX_INTR_COAL); + coal_conf->tx_coal_type = NO_COALESCE; + } + } + else{ + + tx_pkt_size = tx_data_rate/tx_pkt_rate; + if (tx_pkt_size < 128){ + + if(coal_conf->tx_coal_type != NO_COALESCE){ + + coal_conf->tx_timeout = 0; + coal_conf->tx_event_count = 0; + amd8111e_set_coalesce(dev,TX_INTR_COAL); + coal_conf->tx_coal_type = NO_COALESCE; + } + + } + else if ( (tx_pkt_size >= 128) && (tx_pkt_size < 512) ){ + + if(coal_conf->tx_coal_type != LOW_COALESCE){ + coal_conf->tx_timeout = 1; + coal_conf->tx_event_count = 2; + amd8111e_set_coalesce(dev,TX_INTR_COAL); + coal_conf->tx_coal_type = LOW_COALESCE; + + } + } + else if ((tx_pkt_size >= 512) && (tx_pkt_size < 1024)){ + + if(coal_conf->tx_coal_type != MEDIUM_COALESCE){ + coal_conf->tx_timeout = 2; + coal_conf->tx_event_count = 5; + amd8111e_set_coalesce(dev,TX_INTR_COAL); + coal_conf->tx_coal_type = MEDIUM_COALESCE; + } + + } + else if(tx_pkt_size >= 1024){ + if (tx_pkt_size >= 1024){ + if(coal_conf->tx_coal_type != HIGH_COALESCE){ + coal_conf->tx_timeout = 4; + coal_conf->tx_event_count = 8; + amd8111e_set_coalesce(dev,TX_INTR_COAL); + coal_conf->tx_coal_type = HIGH_COALESCE; + } + } + } + } + return 0; + +} /* -This is device interrupt function. It handles transmit, receive and link change interrupts. +This is device interrupt function. It handles transmit, receive,link change and hardware timer interrupts. */ static void amd8111e_interrupt(int irq, void *dev_id, struct pt_regs *regs) { @@ -791,7 +1109,7 @@ if(dev == NULL) return; - spin_lock (&lp->lock); + if (regs) spin_lock (&lp->lock); /* disabling interrupt */ writel(INTREN, mmio + CMD0); @@ -819,15 +1137,20 @@ /* Check if Link Change Interrupt has occurred. */ if (intr0 & LCINT) amd8111e_link_change(dev); - + + /* Check if Hardware Timer Interrupt has occurred. */ + if (intr0 & STINT) + amd8111e_calc_coalesce(dev); + err_no_interrupt: writel( VAL0 | INTREN,mmio + CMD0); - spin_unlock(&lp->lock); - return; - + + if (regs) spin_unlock(&lp->lock); + } + /* -This function closes the network interface and copies the new set of statistics into the previous statistics structure so that most recent statistics will be available after the interface is down. +This function closes the network interface and updates the statistics so that most recent statistics will be available after the interface is down. */ static int amd8111e_close(struct net_device * dev) { @@ -842,10 +1165,15 @@ netif_carrier_off(lp->amd8111e_net_dev); - spin_unlock_irq(&lp->lock); + /* Delete ipg timer */ + if(lp->options & OPTION_DYN_IPG_ENABLE) + del_timer_sync(&lp->ipg_data.ipg_timer); + spin_unlock_irq(&lp->lock); free_irq(dev->irq, dev); - memcpy(&lp->prev_stats,amd8111e_get_stats(dev), sizeof(lp->prev_stats)); + + /* Update the statistics before closing */ + amd8111e_get_stats(dev); lp->opened = 0; return 0; } @@ -867,7 +1195,12 @@ spin_unlock_irq(&lp->lock); return -ENOMEM; } - + /* Start ipg timer */ + if(lp->options & OPTION_DYN_IPG_ENABLE){ + add_timer(&lp->ipg_data.ipg_timer); + printk(KERN_INFO "%s: Dynamic IPG Enabled.\n",dev->name); + } + lp->opened = 1; spin_unlock_irq(&lp->lock); @@ -905,11 +1238,10 @@ lp->tx_ring[tx_index].buff_count = cpu_to_le16(skb->len); lp->tx_skbuff[tx_index] = skb; - lp->tx_ring[tx_index].tx_dr_offset2 = 0; + lp->tx_ring[tx_index].tx_flags = 0; #if AMD8111E_VLAN_TAG_USED if((lp->vlgrp != NULL) && vlan_tx_tag_present(skb)){ - lp->tx_ring[tx_index].tag_ctrl_cmd |= cpu_to_le32(TCC_VLAN_INSERT); lp->tx_ring[tx_index].tag_ctrl_info = @@ -923,7 +1255,7 @@ (u32) cpu_to_le32(lp->tx_dma_addr[tx_index]); /* Set FCS and LTINT bits */ - lp->tx_ring[tx_index].tx_dr_offset2 |= + lp->tx_ring[tx_index].tx_flags |= cpu_to_le16(OWN_BIT | STP_BIT | ENP_BIT|ADD_FCS_BIT|LTINT_BIT); lp->tx_idx++; @@ -946,16 +1278,54 @@ static char* amd8111e_read_regs(struct amd8111e_priv* lp) { void * mmio = lp->mmio; - unsigned char * reg_buff; + u32 * reg_buff; - int i; - reg_buff = kmalloc( AMD8111E_REG_DUMP_LEN,GFP_KERNEL); if(NULL == reg_buff) return NULL; - for (i=0; i < AMD8111E_REG_DUMP_LEN; i+=4) - reg_buff[i]= readl(mmio + i); - return reg_buff; + + /* Read only necessary registers */ + reg_buff[0] = readl(mmio + XMT_RING_BASE_ADDR0); + reg_buff[1] = readl(mmio + XMT_RING_LEN0); + reg_buff[2] = readl(mmio + RCV_RING_BASE_ADDR0); + reg_buff[3] = readl(mmio + RCV_RING_LEN0); + reg_buff[4] = readl(mmio + CMD0); + reg_buff[5] = readl(mmio + CMD2); + reg_buff[6] = readl(mmio + CMD3); + reg_buff[7] = readl(mmio + CMD7); + reg_buff[8] = readl(mmio + INT0); + reg_buff[9] = readl(mmio + INTEN0); + reg_buff[10] = readl(mmio + LADRF); + reg_buff[11] = readl(mmio + LADRF+4); + reg_buff[12] = readl(mmio + STAT0); + + return (char *)reg_buff; +} +/* +amd8111e crc generator implementation is different from the kernel +ether_crc() function. +*/ +int amd8111e_ether_crc(int len, char* mac_addr) +{ + int i,byte; + unsigned char octet; + u32 crc= INITCRC; + + for(byte=0; byte < len; byte++){ + octet = mac_addr[byte]; + for( i=0;i < 8; i++){ + /*If the next bit form the input stream is 1,subtract the divisor (CRC32) from the dividend(crc).*/ + if( (octet & 0x1) ^ (crc & 0x1) ){ + crc >>= 1; + crc ^= CRC32; + } + else + crc >>= 1; + + octet >>= 1; + } + } + return crc; } /* This function sets promiscuos mode, all-multi mode or the multicast address @@ -967,9 +1337,8 @@ struct amd8111e_priv *lp = dev->priv; u32 mc_filter[2] ; int i,bit_num; - if(dev->flags & IFF_PROMISC){ - printk("%s: Setting promiscuous mode.\n",dev->name); + printk(KERN_INFO "%s: Setting promiscuous mode.\n",dev->name); writel( VAL2 | PROM, lp->mmio + CMD2); return; } @@ -980,7 +1349,7 @@ mc_filter[1] = mc_filter[0] = 0xffffffff; lp->mc_list = dev->mc_list; lp->options |= OPTION_MULTICAST_ENABLE; - AMD8111E_WRITE_REG64(lp->mmio, LADRF,mc_filter); + amd8111e_writeq(*(u64*)mc_filter,lp->mmio + LADRF); return; } if( dev->mc_count == 0 ){ @@ -988,7 +1357,7 @@ mc_filter[1] = mc_filter[0] = 0; lp->mc_list = 0; lp->options &= ~OPTION_MULTICAST_ENABLE; - AMD8111E_WRITE_REG64(lp->mmio, LADRF,mc_filter); + amd8111e_writeq(*(u64*)mc_filter,lp->mmio + LADRF); /* disable promiscous mode */ writel(PROM, lp->mmio + CMD2); return; @@ -999,14 +1368,16 @@ mc_filter[1] = mc_filter[0] = 0; for (i = 0, mc_ptr = dev->mc_list; mc_ptr && i < dev->mc_count; i++, mc_ptr = mc_ptr->next) { - bit_num = ether_crc(ETH_ALEN, mc_ptr->dmi_addr) >> 26; - + bit_num = ( amd8111e_ether_crc(ETH_ALEN,mc_ptr->dmi_addr) >> 26 ) & 0x3f; mc_filter[bit_num >> 5] |= 1 << (bit_num & 31); } + amd8111e_writeq(*(u64*)mc_filter,lp->mmio+ LADRF); + + /* To eliminate PCI posting bug */ + readl(lp->mmio + CMD2); - AMD8111E_WRITE_REG64(lp->mmio, LADRF, mc_filter); - return; } + /* This function handles all the ethtool ioctls. It gives driver info, gets/sets driver speed, gets memory mapped register values, forces auto negotiation, sets/gets WOL options for ethtool application. */ @@ -1029,6 +1400,7 @@ strcpy (info.driver, MODULE_NAME); strcpy (info.version, MODULE_VERSION); memset(&info.fw_version, 0, sizeof(info.fw_version)); + sprintf(info.fw_version,"%u",chip_version); strcpy (info.bus_info, pci_dev->slot_name); info.eedump_len = 0; info.regdump_len = AMD8111E_REG_DUMP_LEN; @@ -1036,85 +1408,27 @@ return -EFAULT; return 0; } - case ETHTOOL_GSET:{ - struct ethtool_cmd cmd = { ETHTOOL_GSET }; - - if (!lp->opened) - return -EAGAIN; - - cmd.supported = SUPPORTED_Autoneg | - SUPPORTED_100baseT_Half | - SUPPORTED_100baseT_Full | - SUPPORTED_10baseT_Half | - SUPPORTED_10baseT_Full | - SUPPORTED_MII; - - cmd.advertising = ADVERTISED_Autoneg | - ADVERTISED_100baseT_Half | - ADVERTISED_100baseT_Full | - ADVERTISED_10baseT_Half | - ADVERTISED_10baseT_Full | - ADVERTISED_MII; - cmd.speed = lp->link_config.speed; - cmd.duplex = lp->link_config.duplex; - cmd.port = 0; - cmd.phy_address = PHY_ID; - cmd.transceiver = XCVR_EXTERNAL; - cmd.autoneg = lp->link_config.autoneg; - cmd.maxtxpkt = 0; /* not implemented interrupt coalasing */ - cmd.maxrxpkt = 0; /* not implemented interrupt coalasing */ - if (copy_to_user(useraddr, &cmd, sizeof(cmd))) + /* get settings */ + case ETHTOOL_GSET: { + struct ethtool_cmd ecmd = { ETHTOOL_GSET }; + spin_lock_irq(&lp->lock); + mii_ethtool_gset(&lp->mii_if, &ecmd); + spin_unlock_irq(&lp->lock); + if (copy_to_user(useraddr, &ecmd, sizeof(ecmd))) return -EFAULT; return 0; - } + } + /* set settings */ case ETHTOOL_SSET: { - - struct ethtool_cmd cmd; - - if (!lp->opened) - return -EAGAIN; - if (copy_from_user(&cmd, useraddr, sizeof(cmd))) + int r; + struct ethtool_cmd ecmd; + if (copy_from_user(&ecmd, useraddr, sizeof(ecmd))) return -EFAULT; spin_lock_irq(&lp->lock); - - if(cmd.autoneg == AUTONEG_ENABLE){ - /* keeping the original speeds */ - if((lp->link_config.speed != SPEED_INVALID)&& - (lp->link_config.duplex != DUPLEX_INVALID)){ - lp->link_config.orig_speed = lp->link_config.speed; - lp->link_config.orig_duplex = lp->link_config.duplex; - lp->link_config.orig_phy_option = lp->ext_phy_option; - } - - lp->ext_phy_option = XPHYANE; - } - else if(cmd.speed == SPEED_100 && cmd.duplex == DUPLEX_HALF) - lp->ext_phy_option = XPHYSP; - else if(cmd.speed == SPEED_100 && cmd.duplex == DUPLEX_FULL) - lp->ext_phy_option = XPHYSP |XPHYFD; - else if(cmd.speed == SPEED_10 && cmd.duplex == DUPLEX_HALF) - lp->ext_phy_option = 0; - else if(cmd.speed == SPEED_10 && cmd.duplex == DUPLEX_FULL) - lp->ext_phy_option = XPHYFD; - else { - /* setting the original speed */ - cmd.speed = lp->link_config.orig_speed; - cmd.duplex = lp->link_config.orig_duplex; - lp->ext_phy_option = lp->link_config.orig_phy_option; - } - lp->link_config.autoneg = cmd.autoneg; - if (cmd.autoneg == AUTONEG_ENABLE) { - - lp->link_config.speed = SPEED_INVALID; - lp->link_config.duplex = DUPLEX_INVALID; - } else { - lp->link_config.speed = cmd.speed; - lp->link_config.duplex = cmd.duplex; - } - amd8111e_set_ext_phy(dev); + r = mii_ethtool_sset(&lp->mii_if, &ecmd); spin_unlock_irq(&lp->lock); - return 0; + return r; } case ETHTOOL_GREGS: { struct ethtool_regs regs; @@ -1140,24 +1454,17 @@ kfree(regbuf); return ret; } + /* restart autonegotiation */ case ETHTOOL_NWAY_RST: { - int ret; - spin_lock_irq(&lp->lock); - if(lp->link_config.autoneg == AUTONEG_ENABLE){ - lp->ext_phy_option = XPHYANE; - amd8111e_set_ext_phy(dev); - ret = 0; - }else - ret = -EINVAL; - spin_unlock_irq(&lp->lock); - return ret; + return mii_nway_restart(&lp->mii_if); } + /* get link status */ case ETHTOOL_GLINK: { - struct ethtool_value val = { ETHTOOL_GLINK }; - - val.data = netif_carrier_ok(dev) ? 1 : 0; + struct ethtool_value val = {ETHTOOL_GLINK}; + val.data = mii_link_ok(&lp->mii_if); if (copy_to_user(useraddr, &val, sizeof(val))) return -EFAULT; + return 0; } case ETHTOOL_GWOL: { struct ethtool_wolinfo wol_info = { ETHTOOL_GWOL }; @@ -1196,60 +1503,6 @@ } return -EOPNOTSUPP; } -static int amd8111e_read_phy(struct amd8111e_priv* lp, int phy_id, int reg, u32* val) -{ - void * mmio = lp->mmio; - unsigned int reg_val; - unsigned int repeat= REPEAT_CNT; - - reg_val = readl(mmio + PHY_ACCESS); - while (reg_val & PHY_CMD_ACTIVE) - reg_val = readl( mmio + PHY_ACCESS ); - - writel( PHY_RD_CMD | ((phy_id & 0x1f) << 21) | - ((reg & 0x1f) << 16), mmio +PHY_ACCESS); - do{ - reg_val = readl(mmio + PHY_ACCESS); - udelay(30); /* It takes 30 us to read/write data */ - } while (--repeat && (reg_val & PHY_CMD_ACTIVE)); - if(reg_val & PHY_RD_ERR) - goto err_phy_read; - - *val = reg_val & 0xffff; - return 0; -err_phy_read: - *val = 0; - return -EINVAL; - -} -static int amd8111e_write_phy(struct amd8111e_priv* lp,int phy_id, int reg, u32 val) -{ - unsigned int repeat = REPEAT_CNT - void * mmio = lp->mmio; - unsigned int reg_val; - - - reg_val = readl(mmio + PHY_ACCESS); - while (reg_val & PHY_CMD_ACTIVE) - reg_val = readl( mmio + PHY_ACCESS ); - - writel( PHY_WR_CMD | ((phy_id & 0x1f) << 21) | - ((reg & 0x1f) << 16)|val, mmio + PHY_ACCESS); - - do{ - reg_val = readl(mmio + PHY_ACCESS); - udelay(30); /* It takes 30 us to read/write the data */ - } while (--repeat && (reg_val & PHY_CMD_ACTIVE)); - - if(reg_val & PHY_RD_ERR) - goto err_phy_write; - - return 0; - -err_phy_write: - return -EINVAL; - -} static int amd8111e_ioctl(struct net_device * dev , struct ifreq *ifr, int cmd) { struct mii_ioctl_data *data = (struct mii_ioctl_data *)&ifr->ifr_data; @@ -1317,15 +1570,10 @@ dev->mtu = new_mtu; - /* if (new_mtu > ETH_DATA_LEN) - lp->options |= OPTION_JUMBO_ENABLE; - else - lp->options &= ~OPTION_JUMBO_ENABLE; - */ err = amd8111e_restart(dev); spin_unlock_irq(&lp->lock); - - netif_start_queue(dev); + if(!err) + netif_start_queue(dev); return err; } @@ -1351,73 +1599,41 @@ { writel( VAL1|MPPLBA, lp->mmio + CMD3); writel( VAL0|MPEN_SW, lp->mmio + CMD7); + + /* To eliminate PCI posting bug */ + readl(lp->mmio + CMD7); return 0; } static int amd8111e_enable_link_change(struct amd8111e_priv* lp) { + /* Adapter is already stoped/suspended/interrupt-disabled */ writel(VAL0|LCMODE_SW,lp->mmio + CMD7); - return 0; -} - -/* -This function sets the power state of the device. When the device go to lower power states 1,2, and 3 it enables the wake on lan -*/ -static int amd8111e_set_power_state(struct amd8111e_priv* lp, u32 state) -{ - u16 power_control; - int pm = lp->pm_cap; - - pci_read_config_word(lp->pci_dev, - pm + PCI_PM_CTRL, - &power_control); - - power_control |= PCI_PM_CTRL_PME_STATUS; - power_control &= ~(PCI_PM_CTRL_STATE_MASK); - switch (state) { - case 0: - power_control |= 0; - pci_write_config_word(lp->pci_dev, - pm + PCI_PM_CTRL, - power_control); - return 0; - - case 1: - power_control |= 1; - break; - - case 2: - power_control |= 2; - break; - - case 3: - power_control |= 3; - break; - default: - - printk(KERN_WARNING "%s: Invalid power state (%d) requested.\n", - lp->amd8111e_net_dev->name, state); - return -EINVAL; - } - if(lp->options & OPTION_WAKE_MAGIC_ENABLE) - amd8111e_enable_magicpkt(lp); - if(lp->options & OPTION_WAKE_PHY_ENABLE) - amd8111e_enable_link_change(lp); - - /* Setting new power state. */ - pci_write_config_word(lp->pci_dev, pm + PCI_PM_CTRL, power_control); - + /* To eliminate PCI posting bug */ + readl(lp->mmio + CMD7); return 0; +} +/* This function is called when a packet transmission fails to complete within a resonable period, on the assumption that an interrupts have been failed or the interface is locked up. This function will reinitialize the hardware */ +static void amd8111e_tx_timeout(struct net_device *dev) +{ + struct amd8111e_priv* lp = dev->priv; + int err; + printk(KERN_ERR "%s: transmit timed out, resetting\n", + dev->name); + spin_lock_irq(&lp->lock); + err = amd8111e_restart(dev); + spin_unlock_irq(&lp->lock); + if(!err) + netif_wake_queue(dev); } static int amd8111e_suspend(struct pci_dev *pci_dev, u32 state) { struct net_device *dev = pci_get_drvdata(pci_dev); struct amd8111e_priv *lp = dev->priv; - int err; if (!netif_running(dev)) return 0; @@ -1431,37 +1647,54 @@ /* stop chip */ spin_lock_irq(&lp->lock); + if(lp->options & OPTION_DYN_IPG_ENABLE) + del_timer_sync(&lp->ipg_data.ipg_timer); amd8111e_stop_chip(lp); spin_unlock_irq(&lp->lock); - err = amd8111e_set_power_state(lp, state); - if (err) { + if(lp->options & OPTION_WOL_ENABLE){ + /* enable wol */ + if(lp->options & OPTION_WAKE_MAGIC_ENABLE) + amd8111e_enable_magicpkt(lp); + if(lp->options & OPTION_WAKE_PHY_ENABLE) + amd8111e_enable_link_change(lp); - spin_lock_irq(&lp->lock); - amd8111e_restart(dev); - spin_unlock_irq(&lp->lock); + pci_enable_wake(pci_dev, 3, 1); + pci_enable_wake(pci_dev, 4, 1); /* D3 cold */ - netif_device_attach(dev); } - return err; + else{ + pci_enable_wake(pci_dev, 3, 0); + pci_enable_wake(pci_dev, 4, 0); /* 4 == D3 cold */ + } + + pci_save_state(pci_dev, lp->pm_state); + pci_set_power_state(pci_dev, 3); + + return 0; } static int amd8111e_resume(struct pci_dev *pci_dev) { struct net_device *dev = pci_get_drvdata(pci_dev); struct amd8111e_priv *lp = dev->priv; - int err; if (!netif_running(dev)) return 0; - err = amd8111e_set_power_state(lp, 0); - if (err) - return err; + pci_set_power_state(pci_dev, 0); + pci_restore_state(pci_dev, lp->pm_state); + + pci_enable_wake(pci_dev, 3, 0); + pci_enable_wake(pci_dev, 4, 0); /* D3 cold */ netif_device_attach(dev); spin_lock_irq(&lp->lock); amd8111e_restart(dev); + /* Restart ipg timer */ + if(lp->options & OPTION_DYN_IPG_ENABLE) + mod_timer(&lp->ipg_data.ipg_timer, + jiffies + (IPG_CONVERGE_TIME * HZ)); spin_unlock_irq(&lp->lock); return 0; @@ -1480,6 +1713,65 @@ pci_set_drvdata(pdev, NULL); } } +static void amd8111e_config_ipg(struct net_device* dev) +{ + struct amd8111e_priv *lp = dev->priv; + struct ipg_info* ipg_data = &lp->ipg_data; + void * mmio = lp->mmio; + unsigned int prev_col_cnt = ipg_data->col_cnt; + unsigned int total_col_cnt; + unsigned int tmp_ipg; + + if(lp->link_config.duplex == DUPLEX_FULL){ + ipg_data->ipg = DEFAULT_IPG; + return; + } + + if(ipg_data->ipg_state == SSTATE){ + + if(ipg_data->timer_tick == IPG_STABLE_TIME){ + + ipg_data->timer_tick = 0; + ipg_data->ipg = MIN_IPG - IPG_STEP; + ipg_data->current_ipg = MIN_IPG; + ipg_data->diff_col_cnt = 0xFFFFFFFF; + ipg_data->ipg_state = CSTATE; + } + else + ipg_data->timer_tick++; + } + + if(ipg_data->ipg_state == CSTATE){ + + /* Get the current collision count */ + + total_col_cnt = ipg_data->col_cnt = + amd8111e_read_mib(mmio, xmt_collisions); + + if ((total_col_cnt - prev_col_cnt) < + (ipg_data->diff_col_cnt)){ + + ipg_data->diff_col_cnt = + total_col_cnt - prev_col_cnt ; + + ipg_data->ipg = ipg_data->current_ipg; + } + + ipg_data->current_ipg += IPG_STEP; + + if (ipg_data->current_ipg <= MAX_IPG) + tmp_ipg = ipg_data->current_ipg; + else{ + tmp_ipg = ipg_data->ipg; + ipg_data->ipg_state = SSTATE; + } + writew((u32)tmp_ipg, mmio + IPG); + writew((u32)(tmp_ipg - IFS1_DELTA), mmio + IFS1); + } + mod_timer(&lp->ipg_data.ipg_timer, jiffies + (IPG_CONVERGE_TIME * HZ)); + return; + +} static int __devinit amd8111e_probe_one(struct pci_dev *pdev, const struct pci_device_id *ent) @@ -1488,7 +1780,6 @@ unsigned long reg_addr,reg_len; struct amd8111e_priv* lp; struct net_device* dev; - unsigned int chip_version; err = pci_enable_device(pdev); if(err){ @@ -1547,11 +1838,16 @@ #endif lp = dev->priv; - memset (lp, 0, sizeof (*lp)); lp->pci_dev = pdev; lp->amd8111e_net_dev = dev; lp->pm_cap = pm_cap; + /* setting mii default values */ + lp->mii_if.dev = dev; + lp->mii_if.mdio_read = amd8111e_mdio_read; + lp->mii_if.mdio_write = amd8111e_mdio_write; + lp->mii_if.phy_id = PHY_ID; + spin_lock_init(&lp->lock); lp->mmio = ioremap(reg_addr, reg_len); @@ -1565,12 +1861,14 @@ /* Initializing MAC address */ for(i = 0; i < ETH_ADDR_LEN; i++) dev->dev_addr[i] =readb(lp->mmio + PADR + i); - /* Setting user defined speed */ - if (speed_duplex[card_idx] > sizeof(speed_duplex_mapping)) - lp->ext_phy_option = XPHYANE; - else - lp->ext_phy_option = - speed_duplex_mapping[speed_duplex[card_idx]]; + + /* Setting user defined parametrs */ + lp->ext_phy_option = speed_duplex[card_idx]; + if(coalesce[card_idx]) + lp->options |= OPTION_INTR_COAL_ENABLE; + if(dynamic_ipg[card_idx++]) + lp->options |= OPTION_DYN_IPG_ENABLE; + /* Initialize driver entry points */ dev->open = amd8111e_open; dev->hard_start_xmit = amd8111e_start_xmit; @@ -1580,6 +1878,8 @@ dev->do_ioctl = amd8111e_ioctl; dev->change_mtu = amd8111e_change_mtu; dev->irq =pdev->irq; + dev->tx_timeout = amd8111e_tx_timeout; + dev->watchdog_timeo = AMD8111E_TX_TIMEOUT; #if AMD8111E_VLAN_TAG_USED dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; @@ -1589,10 +1889,6 @@ /* Set receive buffer length and set jumbo option*/ amd8111e_set_rx_buff_len(dev); - - - /* dev->tx_timeout = tg3_tx_timeout; */ - /* dev->watchdog_timeo = TG3_TX_TIMEOUT; */ err = register_netdev(dev); if (err) { @@ -1603,15 +1899,26 @@ pci_set_drvdata(pdev, dev); + /* Initialize software ipg timer */ + if(lp->options & OPTION_DYN_IPG_ENABLE){ + init_timer(&lp->ipg_data.ipg_timer); + lp->ipg_data.ipg_timer.data = (unsigned long) dev; + lp->ipg_data.ipg_timer.function = (void *)&amd8111e_config_ipg; + lp->ipg_data.ipg_timer.expires = jiffies + + IPG_CONVERGE_TIME * HZ; + lp->ipg_data.ipg = DEFAULT_IPG; + lp->ipg_data.ipg_state = CSTATE; + }; + /* display driver and device information */ - chip_version = (readl(lp->mmio + CHIPID) & 0xf0000000)>>28; - printk("%s: AMD-8111e Driver Version: %s\n",dev->name,MODULE_VERSION); - printk("%s: [ Rev %x ] PCI 10/100BaseT Ethernet ", dev->name, chip_version); - for (i = 0; i < 6; i++) - printk("%2.2x%c", dev->dev_addr[i],i == 5 ? ' ' : ':'); - printk("\n"); - return 0; + chip_version = (readl(lp->mmio + CHIPID) & 0xf0000000)>>28; + printk(KERN_INFO "%s: AMD-8111e Driver Version: %s\n", dev->name,MODULE_VERSION); + printk(KERN_INFO "%s: [ Rev %x ] PCI 10/100BaseT Ethernet ", dev->name, chip_version); + for (i = 0; i < 6; i++) + printk("%2.2x%c",dev->dev_addr[i],i == 5 ? ' ' : ':'); + printk( "\n"); + return 0; err_iounmap: iounmap((void *) lp->mmio); diff -Nru a/drivers/net/amd8111e.h b/drivers/net/amd8111e.h --- a/drivers/net/amd8111e.h Thu Jun 19 23:46:52 2003 +++ b/drivers/net/amd8111e.h Thu Jun 19 23:46:52 2003 @@ -1,4 +1,7 @@ /* + * Advanced Micro Devices Inc. AMD8111E Linux Network Driver + * Copyright (C) 2003 Advanced Micro Devices + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -27,73 +30,14 @@ Kernel Mode Revision History: - + 3.0.0 + Initial Revision. + 3.0.1 */ #ifndef _AMD811E_H #define _AMD811E_H -/* Hardware definitions */ - -#define B31_MASK 0x80000000 -#define B30_MASK 0X40000000 -#define B29_MASK 0x20000000 -#define B28_MASK 0x10000000 -#define B27_MASK 0x08000000 -#define B26_MASK 0x04000000 -#define B25_MASK 0x02000000 -#define B24_MASK 0x01000000 -#define B23_MASK 0x00800000 -#define B22_MASK 0x00400000 -#define B21_MASK 0x00200000 -#define B20_MASK 0x00100000 -#define B19_MASK 0x00080000 -#define B18_MASK 0x00040000 -#define B17_MASK 0x00020000 -#define B16_MASK 0x00010000 - -#define B15_MASK 0x8000 -#define B14_MASK 0x4000 -#define B13_MASK 0x2000 -#define B12_MASK 0x1000 -#define B11_MASK 0x0800 -#define B10_MASK 0x0400 -#define B9_MASK 0x0200 -#define B8_MASK 0x0100 -#define B7_MASK 0x0080 -#define B6_MASK 0x0040 -#define B5_MASK 0x0020 -#define B4_MASK 0x0010 -#define B3_MASK 0x0008 -#define B2_MASK 0x0004 -#define B1_MASK 0x0002 -#define B0_MASK 0x0001 - -/* PCI register offset */ -#define PCI_ID_REG 0x00 -#define PCI_COMMAND_REG 0x04 -/* #define MEMEN_BIT B1_MASK */ -/* #define IOEN_BIT B0_MASK */ -#define PCI_REV_ID_REG 0x08 -#define PCI_MEM_BASE_REG 0x10 -/* #define MEMBASE_MASK 0xFFFFF000 */ -/* #define MEMBASE_SIZE 4096 */ -#define PCI_INTR_REG 0x3C -#define PCI_STATUS_REG 0x06 -#define PCI_CAP_ID_REG_OFFSET 0x34 -#define PCI_PMC_REG_OFFSET 0x36 -#define PCI_PMCSR_REG_OFFSET 0x38 - -/* #define NEW_CAP 0x0010 */ -#define PME_EN 0x0100 - -#define PARTID_MASK 0xFFFFF000 -#define PARTID_START_BIT 12 - -/* #define LANCE_DWIO_RESET_PORT 0x18 -#define LANCE_WIO_RESET_PORT 0x14 */ -#define MIB_OFFSET 0x28 - /* Command style register access Registers CMD0, CMD2, CMD3,CMD7 and INTEN0 uses a write access technique called command style access. It allows the write to selected bits of this register without altering the bits that are not selected. Command style registers are divided into 4 bytes that can be written independently. Higher order bit of each byte is the value bit that specifies the value that will be written into the selected bits of register. @@ -155,7 +99,7 @@ #define XMT_RING_LEN2 0x148 /* Transmit Ring2 length register */ #define XMT_RING_LEN3 0x14C /* Transmit Ring3 length register */ -#define RCV_RING_LEN0 0x150 /* Transmit Ring0 length register */ +#define RCV_RING_LEN0 0x150 /* Receive Ring0 length register */ #define SRAM_SIZE 0x178 /* SRAM size register */ #define SRAM_BOUNDARY 0x17A /* SRAM boundary register */ @@ -164,391 +108,398 @@ #define PADR 0x160 /* Physical address register */ +#define IFS1 0x18C /* Inter-frame spacing Part1 register */ +#define IFS 0x18D /* Inter-frame spacing register */ +#define IPG 0x18E /* Inter-frame gap register */ /* 64bit register */ #define LADRF 0x168 /* Logical address filter register */ -/* 8bit regsisters */ - -#define IFS1 0x18C /* Inter-frame spacing Part1 register */ -#define IFS 0x18D /* Inter-frame spacing register */ /* Register Bit Definitions */ +typedef enum { + + ASF_INIT_DONE = (1 << 1), + ASF_INIT_PRESENT = (1 << 0), + +}STAT_ASF_BITS; + +typedef enum { + + MIB_CMD_ACTIVE = (1 << 15 ), + MIB_RD_CMD = (1 << 13 ), + MIB_CLEAR = (1 << 12 ), + MIB_ADDRESS = (1 << 0) | (1 << 1) | (1 << 2) | (1 << 3)| + (1 << 4) | (1 << 5), +}MIB_ADDR_BITS; + + +typedef enum { + + PMAT_DET = (1 << 12), + MP_DET = (1 << 11), + LC_DET = (1 << 10), + SPEED_MASK = (1 << 9)|(1 << 8)|(1 << 7), + FULL_DPLX = (1 << 6), + LINK_STATS = (1 << 5), + AUTONEG_COMPLETE = (1 << 4), + MIIPD = (1 << 3), + RX_SUSPENDED = (1 << 2), + TX_SUSPENDED = (1 << 1), + RUNNING = (1 << 0), + +}STAT0_BITS; -/* STAT_ASF 0x00, 32bit register */ -#define ASF_INIT_DONE B1_MASK -#define ASF_INIT_PRESENT B0_MASK - -/* MIB_ADDR 0x14, 16bit register */ -#define MIB_CMD_ACTIVE B15_MASK -#define MIB_RD_CMD B13_MASK -#define MIB_CLEAR B12_MASK -#define MIB_ADDRESS 0x0000003F /* 5:0 */ - -/* QOS_ADDR 0x1C, 16bit register */ -#define QOS_CMD_ACTIVE B15_MASK -#define QOS_WR_CMD B14_MASK -#define QOS_RD_CMD B13_MASK -#define QOS_ADDRESS 0x0000001F /* 4:0 */ - -/* STAT0 0x30, 32bit register */ -#define PAUSE_PEND B14_MASK -#define PAUSING B13_MASK -#define PMAT_DET B12_MASK -#define MP_DET B11_MASK -#define LC_DET B10_MASK -#define SPEED_MASK 0x0380 /* 9:7 */ -#define FULL_DPLX B6_MASK -#define LINK_STATS B5_MASK -#define AUTONEG_COMPLETE B4_MASK -#define MIIPD B3_MASK -#define RX_SUSPENDED B2_MASK -#define TX_SUSPENDED B1_MASK -#define RUNNING B0_MASK #define PHY_SPEED_10 0x2 #define PHY_SPEED_100 0x3 /* INT0 0x38, 32bit register */ -#define INTR B31_MASK -#define PCSINT B28_MASK -#define LCINT B27_MASK -#define APINT5 B26_MASK -#define APINT4 B25_MASK -#define APINT3 B24_MASK -#define TINT_SUM B23_MASK -#define APINT2 B22_MASK -#define APINT1 B21_MASK -#define APINT0 B20_MASK -#define MIIPDTINT B19_MASK -#define MCCIINT B18_MASK -#define MCCINT B17_MASK -#define MREINT B16_MASK -#define RINT_SUM B15_MASK -#define SPNDINT B14_MASK -#define MPINT B13_MASK -#define SINT B12_MASK -#define TINT3 B11_MASK -#define TINT2 B10_MASK -#define TINT1 B9_MASK -#define TINT0 B8_MASK -#define UINT B7_MASK -#define STINT B4_MASK -#define RINT3 B3_MASK -#define RINT2 B2_MASK -#define RINT1 B1_MASK -#define RINT0 B0_MASK - -/* INTEN0 0x40, 32bit register */ -#define VAL3 B31_MASK /* VAL bit for byte 3 */ -#define VAL2 B23_MASK /* VAL bit for byte 2 */ -#define VAL1 B15_MASK /* VAL bit for byte 1 */ -#define VAL0 B7_MASK /* VAL bit for byte 0 */ -/* VAL3 */ -#define PSCINTEN B28_MASK -#define LCINTEN B27_MASK -#define APINT5EN B26_MASK -#define APINT4EN B25_MASK -#define APINT3EN B24_MASK -/* VAL2 */ -#define APINT2EN B22_MASK -#define APINT1EN B21_MASK -#define APINT0EN B20_MASK -#define MIIPDTINTEN B19_MASK -#define MCCIINTEN B18_MASK -#define MCCINTEN B17_MASK -#define MREINTEN B16_MASK -/* VAL1 */ -#define SPNDINTEN B14_MASK -#define MPINTEN B13_MASK -#define SINTEN B12_MASK -#define TINTEN3 B11_MASK -#define TINTEN2 B10_MASK -#define TINTEN1 B9_MASK -#define TINTEN0 B8_MASK -/* VAL0 */ -#define STINTEN B4_MASK -#define RINTEN3 B3_MASK -#define RINTEN2 B2_MASK -#define RINTEN1 B1_MASK -#define RINTEN0 B0_MASK - -#define INTEN0_CLEAR 0x1F7F7F1F /* Command style register */ - -/* CMD0 0x48, 32bit register */ -/* VAL2 */ -#define RDMD3 B19_MASK -#define RDMD2 B18_MASK -#define RDMD1 B17_MASK -#define RDMD0 B16_MASK -/* VAL1 */ -#define TDMD3 B11_MASK -#define TDMD2 B10_MASK -#define TDMD1 B9_MASK -#define TDMD0 B8_MASK -/* VAL0 */ -#define UINTCMD B6_MASK -#define RX_FAST_SPND B5_MASK -#define TX_FAST_SPND B4_MASK -#define RX_SPND B3_MASK -#define TX_SPND B2_MASK -#define INTREN B1_MASK -#define RUN B0_MASK - -#define CMD0_CLEAR 0x000F0F7F /* Command style register */ - -/* CMD2 0x50, 32bit register */ -/* VAL3 */ -#define CONDUIT_MODE B29_MASK -/* VAL2 */ -#define RPA B19_MASK -#define DRCVPA B18_MASK -#define DRCVBC B17_MASK -#define PROM B16_MASK -/* VAL1 */ -#define ASTRP_RCV B13_MASK -#define FCOLL B12_MASK -#define EMBA B11_MASK -#define DXMT2PD B10_MASK -#define LTINTEN B9_MASK -#define DXMTFCS B8_MASK -/* VAL0 */ -#define APAD_XMT B6_MASK -#define DRTY B5_MASK -#define INLOOP B4_MASK -#define EXLOOP B3_MASK -#define REX_RTRY B2_MASK -#define REX_UFLO B1_MASK -#define REX_LCOL B0_MASK - -#define CMD2_CLEAR 0x3F7F3F7F /* Command style register */ - -/* CMD3 0x54, 32bit register */ -/* VAL3 */ -#define ASF_INIT_DONE_ALIAS B29_MASK -/* VAL2 */ -#define JUMBO B21_MASK -#define VSIZE B20_MASK -#define VLONLY B19_MASK -#define VL_TAG_DEL B18_MASK -/* VAL1 */ -#define EN_PMGR B14_MASK -#define INTLEVEL B13_MASK -#define FORCE_FULL_DUPLEX B12_MASK -#define FORCE_LINK_STATUS B11_MASK -#define APEP B10_MASK -#define MPPLBA B9_MASK -/* VAL0 */ -#define RESET_PHY_PULSE B2_MASK -#define RESET_PHY B1_MASK -#define PHY_RST_POL B0_MASK -/* CMD7 0x64, 32bit register */ -/* VAL0 */ -#define PMAT_SAVE_MATCH B4_MASK -#define PMAT_MODE B3_MASK -#define MPEN_SW B1_MASK -#define LCMODE_SW B0_MASK - -#define CMD7_CLEAR 0x0000001B /* Command style register */ -/* CTRL0 0x68, 32bit register */ -#define PHY_SEL 0x03000000 /* 25:24 */ -#define RESET_PHY_WIDTH 0x00FF0000 /* 23:16 */ -#define BSWP_REGS B10_MASK -#define BSWP_DESC B9_MASK -#define BSWP_DATA B8_MASK -#define CACHE_ALIGN B4_MASK -#define BURST_LIMIT 0x0000000F /* 3:0 */ - -/* CTRL1 0x6C, 32bit register */ -#define SLOTMOD_MASK 0x03000000 /* 25:24 */ -#define XMTSP_MASK 0x300 /* 17:16 */ -#define XMTSP_128 0x200 -#define XMTSP_64 0x100 -#define CRTL1_DEFAULT 0x00000017 - -/* CTRL2 0x70, 32bit register */ -#define FS_MASK 0x00070000 /* 18:16 */ -#define FMDC_MASK 0x00000300 /* 9:8 */ -#define XPHYRST B7_MASK -#define XPHYANE B6_MASK -#define XPHYFD B5_MASK -#define XPHYSP B3_MASK /* 4:3 */ -#define APDW_MASK 0x00000007 /* 2:0 */ - -/* RCV_RING_CFG 0x78, 16bit register */ -#define RCV_DROP3 B11_MASK -#define RCV_DROP2 B10_MASK -#define RCV_DROP1 B9_MASK -#define RCV_DROP0 B8_MASK -#define RCV_RING_DEFAULT 0x0030 /* 5:4 */ -#define RCV_RING3_EN B3_MASK -#define RCV_RING2_EN B2_MASK -#define RCV_RING1_EN B1_MASK -#define RCV_RING0_EN B0_MASK +typedef enum { + + INTR = (1 << 31), + PCSINT = (1 << 28), + LCINT = (1 << 27), + APINT5 = (1 << 26), + APINT4 = (1 << 25), + APINT3 = (1 << 24), + TINT_SUM = (1 << 23), + APINT2 = (1 << 22), + APINT1 = (1 << 21), + APINT0 = (1 << 20), + MIIPDTINT = (1 << 19), + MCCINT = (1 << 17), + MREINT = (1 << 16), + RINT_SUM = (1 << 15), + SPNDINT = (1 << 14), + MPINT = (1 << 13), + SINT = (1 << 12), + TINT3 = (1 << 11), + TINT2 = (1 << 10), + TINT1 = (1 << 9), + TINT0 = (1 << 8), + UINT = (1 << 7), + STINT = (1 << 4), + RINT0 = (1 << 0), + +}INT0_BITS; + +typedef enum { + + VAL3 = (1 << 31), /* VAL bit for byte 3 */ + VAL2 = (1 << 23), /* VAL bit for byte 2 */ + VAL1 = (1 << 15), /* VAL bit for byte 1 */ + VAL0 = (1 << 7), /* VAL bit for byte 0 */ + +}VAL_BITS; + +typedef enum { + + /* VAL3 */ + LCINTEN = (1 << 27), + APINT5EN = (1 << 26), + APINT4EN = (1 << 25), + APINT3EN = (1 << 24), + /* VAL2 */ + APINT2EN = (1 << 22), + APINT1EN = (1 << 21), + APINT0EN = (1 << 20), + MIIPDTINTEN = (1 << 19), + MCCIINTEN = (1 << 18), + MCCINTEN = (1 << 17), + MREINTEN = (1 << 16), + /* VAL1 */ + SPNDINTEN = (1 << 14), + MPINTEN = (1 << 13), + TINTEN3 = (1 << 11), + SINTEN = (1 << 12), + TINTEN2 = (1 << 10), + TINTEN1 = (1 << 9), + TINTEN0 = (1 << 8), + /* VAL0 */ + STINTEN = (1 << 4), + RINTEN0 = (1 << 0), + + INTEN0_CLEAR = 0x1F7F7F1F, /* Command style register */ + +}INTEN0_BITS; + +typedef enum { + /* VAL2 */ + RDMD0 = (1 << 16), + /* VAL1 */ + TDMD3 = (1 << 11), + TDMD2 = (1 << 10), + TDMD1 = (1 << 9), + TDMD0 = (1 << 8), + /* VAL0 */ + UINTCMD = (1 << 6), + RX_FAST_SPND = (1 << 5), + TX_FAST_SPND = (1 << 4), + RX_SPND = (1 << 3), + TX_SPND = (1 << 2), + INTREN = (1 << 1), + RUN = (1 << 0), + + CMD0_CLEAR = 0x000F0F7F, /* Command style register */ + +}CMD0_BITS; + +typedef enum { + + /* VAL3 */ + CONDUIT_MODE = (1 << 29), + /* VAL2 */ + RPA = (1 << 19), + DRCVPA = (1 << 18), + DRCVBC = (1 << 17), + PROM = (1 << 16), + /* VAL1 */ + ASTRP_RCV = (1 << 13), + RCV_DROP0 = (1 << 12), + EMBA = (1 << 11), + DXMT2PD = (1 << 10), + LTINTEN = (1 << 9), + DXMTFCS = (1 << 8), + /* VAL0 */ + APAD_XMT = (1 << 6), + DRTY = (1 << 5), + INLOOP = (1 << 4), + EXLOOP = (1 << 3), + REX_RTRY = (1 << 2), + REX_UFLO = (1 << 1), + REX_LCOL = (1 << 0), + + CMD2_CLEAR = 0x3F7F3F7F, /* Command style register */ + +}CMD2_BITS; + +typedef enum { + + /* VAL3 */ + ASF_INIT_DONE_ALIAS = (1 << 29), + /* VAL2 */ + JUMBO = (1 << 21), + VSIZE = (1 << 20), + VLONLY = (1 << 19), + VL_TAG_DEL = (1 << 18), + /* VAL1 */ + EN_PMGR = (1 << 14), + INTLEVEL = (1 << 13), + FORCE_FULL_DUPLEX = (1 << 12), + FORCE_LINK_STATUS = (1 << 11), + APEP = (1 << 10), + MPPLBA = (1 << 9), + /* VAL0 */ + RESET_PHY_PULSE = (1 << 2), + RESET_PHY = (1 << 1), + PHY_RST_POL = (1 << 0), + +}CMD3_BITS; + + +typedef enum { + + /* VAL0 */ + PMAT_SAVE_MATCH = (1 << 4), + PMAT_MODE = (1 << 3), + MPEN_SW = (1 << 1), + LCMODE_SW = (1 << 0), + + CMD7_CLEAR = 0x0000001B /* Command style register */ + +}CMD7_BITS; + + +typedef enum { + + RESET_PHY_WIDTH = (0xF << 16) | (0xF<< 20), /* 0x00FF0000 */ + XMTSP_MASK = (1 << 9) | (1 << 8), /* 9:8 */ + XMTSP_128 = (1 << 9), /* 9 */ + XMTSP_64 = (1 << 8), + CACHE_ALIGN = (1 << 4), + BURST_LIMIT_MASK = (0xF << 0 ), + CTRL1_DEFAULT = 0x00010111, + +}CTRL1_BITS; + +typedef enum { + + FMDC_MASK = (1 << 9)|(1 << 8), /* 9:8 */ + XPHYRST = (1 << 7), + XPHYANE = (1 << 6), + XPHYFD = (1 << 5), + XPHYSP = (1 << 4) | (1 << 3), /* 4:3 */ + APDW_MASK = (1 << 2) | (1 << 1) | (1 << 0), /* 2:0 */ + +}CTRL2_BITS; /* XMT_RING_LIMIT 0x7C, 32bit register */ -#define XMT_RING2_LIMIT 0x00FF0000 /* 23:16 */ -#define XMT_RING1_LIMIT 0x0000FF00 /* 15:8 */ -#define XMT_RING0_LIMIT 0x000000FF /* 7:0 */ - -/* AUTOPOLL0 0x88, 16bit register */ -#define AP_REG0_EN B15_MASK -#define AP_REG0_ADDR_MASK 0x1F00 /* 12:8 */ -#define AP_PHY0_ADDR_MASK 0x001F /* 4:0 */ +typedef enum { + + XMT_RING2_LIMIT = (0xFF << 16), /* 23:16 */ + XMT_RING1_LIMIT = (0xFF << 8), /* 15:8 */ + XMT_RING0_LIMIT = (0xFF << 0), /* 7:0 */ + +}XMT_RING_LIMIT_BITS; + +typedef enum { + + AP_REG0_EN = (1 << 15), + AP_REG0_ADDR_MASK = (0xF << 8) |(1 << 12),/* 12:8 */ + AP_PHY0_ADDR_MASK = (0xF << 0) |(1 << 4),/* 4:0 */ + +}AUTOPOLL0_BITS; /* AUTOPOLL1 0x8A, 16bit register */ -#define AP_REG1_EN B15_MASK -#define AP_REG1_ADDR_MASK 0x1F00 /* 12:8 */ -#define AP_PRE_SUP1 B6_MASK -#define AP_PHY1_DFLT B5_MASK -#define AP_PHY1_ADDR_MASK 0x001F /* 4:0 */ - -/* AUTOPOLL2 0x8C, 16bit register */ -#define AP_REG2_EN B15_MASK -#define AP_REG2_ADDR_MASK 0x1F00 /* 12:8 */ -#define AP_PRE_SUP2 B6_MASK -#define AP_PHY2_DFLT B5_MASK -#define AP_PHY2_ADDR_MASK 0x001F /* 4:0 */ - -/* AUTOPOLL3 0x8E, 16bit register */ -#define AP_REG3_EN B15_MASK -#define AP_REG3_ADDR_MASK 0x1F00 /* 12:8 */ -#define AP_PRE_SUP3 B6_MASK -#define AP_PHY3_DFLT B5_MASK -#define AP_PHY3_ADDR_MASK 0x001F /* 4:0 */ - -/* AUTOPOLL4 0x90, 16bit register */ -#define AP_REG4_EN B15_MASK -#define AP_REG4_ADDR_MASK 0x1F00 /* 12:8 */ -#define AP_PRE_SUP4 B6_MASK -#define AP_PHY4_DFLT B5_MASK -#define AP_PHY4_ADDR_MASK 0x001F /* 4:0 */ - -/* AUTOPOLL5 0x92, 16bit register */ -#define AP_REG5_EN B15_MASK -#define AP_REG5_ADDR_MASK 0x1F00 /* 12:8 */ -#define AP_PRE_SUP5 B6_MASK -#define AP_PHY5_DFLT B5_MASK -#define AP_PHY5_ADDR_MASK 0x001F /* 4:0 */ +typedef enum { + + AP_REG1_EN = (1 << 15), + AP_REG1_ADDR_MASK = (0xF << 8) |(1 << 12),/* 12:8 */ + AP_PRE_SUP1 = (1 << 6), + AP_PHY1_DFLT = (1 << 5), + AP_PHY1_ADDR_MASK = (0xF << 0) |(1 << 4),/* 4:0 */ + +}AUTOPOLL1_BITS; + + +typedef enum { + + AP_REG2_EN = (1 << 15), + AP_REG2_ADDR_MASK = (0xF << 8) |(1 << 12),/* 12:8 */ + AP_PRE_SUP2 = (1 << 6), + AP_PHY2_DFLT = (1 << 5), + AP_PHY2_ADDR_MASK = (0xF << 0) |(1 << 4),/* 4:0 */ + +}AUTOPOLL2_BITS; + +typedef enum { + + AP_REG3_EN = (1 << 15), + AP_REG3_ADDR_MASK = (0xF << 8) |(1 << 12),/* 12:8 */ + AP_PRE_SUP3 = (1 << 6), + AP_PHY3_DFLT = (1 << 5), + AP_PHY3_ADDR_MASK = (0xF << 0) |(1 << 4),/* 4:0 */ + +}AUTOPOLL3_BITS; + + +typedef enum { + + AP_REG4_EN = (1 << 15), + AP_REG4_ADDR_MASK = (0xF << 8) |(1 << 12),/* 12:8 */ + AP_PRE_SUP4 = (1 << 6), + AP_PHY4_DFLT = (1 << 5), + AP_PHY4_ADDR_MASK = (0xF << 0) |(1 << 4),/* 4:0 */ + +}AUTOPOLL4_BITS; + + +typedef enum { + + AP_REG5_EN = (1 << 15), + AP_REG5_ADDR_MASK = (0xF << 8) |(1 << 12),/* 12:8 */ + AP_PRE_SUP5 = (1 << 6), + AP_PHY5_DFLT = (1 << 5), + AP_PHY5_ADDR_MASK = (0xF << 0) |(1 << 4),/* 4:0 */ + +}AUTOPOLL5_BITS; + + + /* AP_VALUE 0x98, 32bit ragister */ -#define AP_VAL_ACTIVE B31_MASK -#define AP_VAL_RD_CMD B29_MASK -#define AP_ADDR 0x00070000 /* 18:16 */ -#define AP_VAL 0x0000FFFF /* 15:0 */ - -/* PCS_ANEG 0x9C, 32bit register */ -#define SYNC_LOST B10_MASK -#define IMATCH B9_MASK -#define CMATCH B8_MASK -#define PCS_AN_IDLE B1_MASK -#define PCS_AN_CFG B0_MASK - -/* DLY_INT_A 0xA8, 32bit register */ -#define DLY_INT_A_R3 B31_MASK -#define DLY_INT_A_R2 B30_MASK -#define DLY_INT_A_R1 B29_MASK -#define DLY_INT_A_R0 B28_MASK -#define DLY_INT_A_T3 B27_MASK -#define DLY_INT_A_T2 B26_MASK -#define DLY_INT_A_T1 B25_MASK -#define DLY_INT_A_T0 B24_MASK -#define EVENT_COUNT_A 0x00FF0000 /* 20:16 */ -#define MAX_DELAY_TIME_A 0x000007FF /* 10:0 */ - -/* DLY_INT_B 0xAC, 32bit register */ -#define DLY_INT_B_R3 B31_MASK -#define DLY_INT_B_R2 B30_MASK -#define DLY_INT_B_R1 B29_MASK -#define DLY_INT_B_R0 B28_MASK -#define DLY_INT_B_T3 B27_MASK -#define DLY_INT_B_T2 B26_MASK -#define DLY_INT_B_T1 B25_MASK -#define DLY_INT_B_T0 B24_MASK -#define EVENT_COUNT_B 0x00FF0000 /* 20:16 */ -#define MAX_DELAY_TIME_B 0x000007FF /* 10:0 */ - -/* DFC_THRESH2 0xC0, 16bit register */ -#define DFC_THRESH2_HIGH 0xFF00 /* 15:8 */ -#define DFC_THRESH2_LOW 0x00FF /* 7:0 */ - -/* DFC_THRESH3 0xC2, 16bit register */ -#define DFC_THRESH3_HIGH 0xFF00 /* 15:8 */ -#define DFC_THRESH3_LOW 0x00FF /* 7:0 */ - -/* DFC_THRESH0 0xC4, 16bit register */ -#define DFC_THRESH0_HIGH 0xFF00 /* 15:8 */ -#define DFC_THRESH0_LOW 0x00FF /* 7:0 */ - -/* DFC_THRESH1 0xC6, 16bit register */ -#define DFC_THRESH1_HIGH 0xFF00 /* 15:8 */ -#define DFC_THRESH1_LOW 0x00FF /* 7:0 */ +typedef enum { + + AP_VAL_ACTIVE = (1 << 31), + AP_VAL_RD_CMD = ( 1 << 29), + AP_ADDR = (1 << 18)|(1 << 17)|(1 << 16), /* 18:16 */ + AP_VAL = (0xF << 0) | (0xF << 4) |( 0xF << 8) | + (0xF << 12), /* 15:0 */ + +}AP_VALUE_BITS; + +typedef enum { + + DLY_INT_A_R3 = (1 << 31), + DLY_INT_A_R2 = (1 << 30), + DLY_INT_A_R1 = (1 << 29), + DLY_INT_A_R0 = (1 << 28), + DLY_INT_A_T3 = (1 << 27), + DLY_INT_A_T2 = (1 << 26), + DLY_INT_A_T1 = (1 << 25), + DLY_INT_A_T0 = ( 1 << 24), + EVENT_COUNT_A = (0xF << 16) | (0x1 << 20),/* 20:16 */ + MAX_DELAY_TIME_A = (0xF << 0) | (0xF << 4) | (1 << 8)| + (1 << 9) | (1 << 10), /* 10:0 */ + +}DLY_INT_A_BITS; + +typedef enum { + + DLY_INT_B_R3 = (1 << 31), + DLY_INT_B_R2 = (1 << 30), + DLY_INT_B_R1 = (1 << 29), + DLY_INT_B_R0 = (1 << 28), + DLY_INT_B_T3 = (1 << 27), + DLY_INT_B_T2 = (1 << 26), + DLY_INT_B_T1 = (1 << 25), + DLY_INT_B_T0 = ( 1 << 24), + EVENT_COUNT_B = (0xF << 16) | (0x1 << 20),/* 20:16 */ + MAX_DELAY_TIME_B = (0xF << 0) | (0xF << 4) | (1 << 8)| + (1 << 9) | (1 << 10), /* 10:0 */ +}DLY_INT_B_BITS; + /* FLOW_CONTROL 0xC8, 32bit register */ -#define PAUSE_LEN_CHG B30_MASK -#define FFC_EN B28_MASK -#define DFC_RING3_EN B27_MASK -#define DFC_RING2_EN B26_MASK -#define DFC_RING1_EN B25_MASK -#define DFC_RING0_EN B24_MASK -#define FIXP_CONGEST B21_MASK -#define FPA B20_MASK -#define NPA B19_MASK -#define FIXP B18_MASK -#define FCPEN B17_MASK -#define FCCMD B16_MASK -#define PAUSE_LEN 0x0000FFFF /* 15:0 */ - -/* FFC THRESH 0xCC, 32bit register */ -#define FFC_HIGH 0xFFFF0000 /* 31:16 */ -#define FFC_LOW 0x0000FFFF /* 15:0 */ +typedef enum { + + PAUSE_LEN_CHG = (1 << 30), + FTPE = (1 << 22), + FRPE = (1 << 21), + NAPA = (1 << 20), + NPA = (1 << 19), + FIXP = ( 1 << 18), + FCCMD = ( 1 << 16), + PAUSE_LEN = (0xF << 0) | (0xF << 4) |( 0xF << 8) | (0xF << 12), /* 15:0 */ + +}FLOW_CONTROL_BITS; /* PHY_ ACCESS 0xD0, 32bit register */ -#define PHY_CMD_ACTIVE B31_MASK -#define PHY_WR_CMD B30_MASK -#define PHY_RD_CMD B29_MASK -#define PHY_RD_ERR B28_MASK -#define PHY_PRE_SUP B27_MASK -#define PHY_ADDR 0x03E00000 /* 25:21 */ -#define PHY_REG_ADDR 0x001F0000 /* 20:16 */ -#define PHY_DATA 0x0000FFFF /* 15:0 */ - -/* LED0..3 0xE0..0xE6, 16bit register */ -#define LEDOUT B15_MASK -#define LEDPOL B14_MASK -#define LEDDIS B13_MASK -#define LEDSTRETCH B12_MASK -#define LED1000 B8_MASK -#define LED100 B7_MASK -#define LEDMP B6_MASK -#define LEDFD B5_MASK -#define LEDLINK B4_MASK -#define LEDRCVMAT B3_MASK -#define LEDXMT B2_MASK -#define LEDRCV B1_MASK -#define LEDCOLOUT B0_MASK - -/* EEPROM_ACC 0x17C, 16bit register */ -#define PVALID B15_MASK -#define PREAD B14_MASK -#define EEDET B13_MASK -#define EEN B4_MASK -#define ECS B2_MASK -#define EESK B1_MASK -#define edi_edo b0_MASK +typedef enum { + + PHY_CMD_ACTIVE = (1 << 31), + PHY_WR_CMD = (1 << 30), + PHY_RD_CMD = (1 << 29), + PHY_RD_ERR = (1 << 28), + PHY_PRE_SUP = (1 << 27), + PHY_ADDR = (1 << 21) | (1 << 22) | (1 << 23)| + (1 << 24) |(1 << 25),/* 25:21 */ + PHY_REG_ADDR = (1 << 16) | (1 << 17) | (1 << 18)| (1 << 19) | (1 << 20),/* 20:16 */ + PHY_DATA = (0xF << 0)|(0xF << 4) |(0xF << 8)| + (0xF << 12),/* 15:0 */ + +}PHY_ACCESS_BITS; + /* PMAT0 0x190, 32bit register */ -#define PMR_ACTIVE B31_MASK -#define PMR_WR_CMD B30_MASK -#define PMR_RD_CMD B29_MASK -#define PMR_BANK B28_MASK -#define PMR_ADDR 0x007F0000 /* 22:16 */ -#define PMR_B4 0x000000FF /* 15:0 */ +typedef enum { + PMR_ACTIVE = (1 << 31), + PMR_WR_CMD = (1 << 30), + PMR_RD_CMD = (1 << 29), + PMR_BANK = (1 <<28), + PMR_ADDR = (0xF << 16)|(1 << 20)|(1 << 21)| + (1 << 22),/* 22:16 */ + PMR_B4 = (0xF << 0) | (0xF << 4),/* 15:0 */ +}PMAT0_BITS; + /* PMAT1 0x194, 32bit register */ -#define PMR_B3 0xFF000000 /* 31:24 */ -#define PMR_B2 0x00FF0000 /* 23:16 */ -#define PMR_B1 0x0000FF00 /* 15:8 */ -#define PMR_B0 0x000000FF /* 7:0 */ +typedef enum { + PMR_B3 = (0xF << 24) | (0xF <<28),/* 31:24 */ + PMR_B2 = (0xF << 16) |(0xF << 20),/* 23:16 */ + PMR_B1 = (0xF << 8) | (0xF <<12), /* 15:8 */ + PMR_B0 = (0xF << 0)|(0xF << 4),/* 7:0 */ +}PMAT1_BITS; /************************************************************************/ /* */ @@ -615,7 +566,7 @@ #define PCI_VENDOR_ID_AMD 0x1022 #define PCI_DEVICE_ID_AMD8111E_7462 0x7462 -#define MAX_UNITS 16 /* Maximum number of devices possible */ +#define MAX_UNITS 8 /* Maximum number of devices possible */ #define NUM_TX_BUFFERS 32 /* Number of transmit buffers */ #define NUM_RX_BUFFERS 32 /* Number of receive buffers */ @@ -637,45 +588,73 @@ #define MIN_PKT_LEN 60 #define ETH_ADDR_LEN 6 +#define AMD8111E_TX_TIMEOUT (3 * HZ)/* 3 sec */ +#define SOFT_TIMER_FREQ 0xBEBC /* 0.5 sec */ +#define DELAY_TIMER_CONV 50 /* msec to 10 usec conversion. + Only 500 usec resolution */ #define OPTION_VLAN_ENABLE 0x0001 #define OPTION_JUMBO_ENABLE 0x0002 #define OPTION_MULTICAST_ENABLE 0x0004 #define OPTION_WOL_ENABLE 0x0008 #define OPTION_WAKE_MAGIC_ENABLE 0x0010 #define OPTION_WAKE_PHY_ENABLE 0x0020 +#define OPTION_INTR_COAL_ENABLE 0x0040 +#define OPTION_DYN_IPG_ENABLE 0x0080 #define PHY_REG_ADDR_MASK 0x1f +/* ipg parameters */ +#define DEFAULT_IPG 0x60 +#define IFS1_DELTA 36 +#define IPG_CONVERGE_TIME 0.5 +#define IPG_STABLE_TIME 5 +#define MIN_IPG 96 +#define MAX_IPG 255 +#define IPG_STEP 16 +#define CSTATE 1 +#define SSTATE 2 + /* Assume contoller gets data 10 times the maximum processing time */ #define REPEAT_CNT 10; /* amd8111e decriptor flag definitions */ +typedef enum { -#define OWN_BIT B15_MASK -#define ADD_FCS_BIT B13_MASK -#define LTINT_BIT B12_MASK -#define STP_BIT B9_MASK -#define ENP_BIT B8_MASK -#define KILL_BIT B6_MASK -#define TCC_MASK 0x0003 -#define TCC_VLAN_INSERT B1_MASK -#define TCC_VLAN_REPLACE 0x0003 -#define RESET_RX_FLAGS 0x0000 + OWN_BIT = (1 << 15), + ADD_FCS_BIT = (1 << 13), + LTINT_BIT = (1 << 12), + STP_BIT = (1 << 9), + ENP_BIT = (1 << 8), + KILL_BIT = (1 << 6), + TCC_VLAN_INSERT = (1 << 1), + TCC_VLAN_REPLACE = (1 << 1) |( 1<< 0), + +}TX_FLAG_BITS; + +typedef enum { + ERR_BIT = (1 << 14), + FRAM_BIT = (1 << 13), + OFLO_BIT = (1 << 12), + CRC_BIT = (1 << 11), + PAM_BIT = (1 << 6), + LAFM_BIT = (1 << 5), + BAM_BIT = (1 << 4), + TT_VLAN_TAGGED = (1 << 3) |(1 << 2),/* 0x000 */ + TT_PRTY_TAGGED = (1 << 3),/* 0x0008 */ + +}RX_FLAG_BITS; -#define ERR_BIT B14_MASK -#define FRAM_BIT B13_MASK -#define OFLO_BIT B12_MASK -#define CRC_BIT B11_MASK -#define PAM_BIT B6_MASK -#define LAFM_BIT B5_MASK -#define BAM_BIT B4_MASK +#define RESET_RX_FLAGS 0x0000 #define TT_MASK 0x000c -#define TT_VLAN_TAGGED 0x000c -#define TT_PRTY_TAGGED 0x0008 +#define TCC_MASK 0x0003 /* driver ioctl parameters */ #define PHY_ID 0x01 /* currently it is fixed */ -#define AMD8111E_REG_DUMP_LEN 4096 /* Memory mapped register length */ +#define AMD8111E_REG_DUMP_LEN 13*sizeof(u32) + +/* crc generator constants */ +#define CRC32 0xedb88320 +#define INITCRC 0xFFFFFFFF /* amd8111e desriptor format */ @@ -683,7 +662,7 @@ u16 buff_count; /* Size of the buffer pointed by this descriptor */ - u16 tx_dr_offset2; + u16 tx_flags; u16 tag_ctrl_info; @@ -704,7 +683,7 @@ u16 buff_count; /* Len of the buffer pointed by descriptor. */ - u16 rx_dr_offset10; + u16 rx_flags; u32 buff_phy_addr; @@ -719,10 +698,58 @@ u16 speed; u8 duplex; u8 autoneg; - u16 orig_speed; - u8 orig_duplex; u8 reserved; /* 32bit alignment */ }; + +enum coal_type{ + + NO_COALESCE, + LOW_COALESCE, + MEDIUM_COALESCE, + HIGH_COALESCE, + +}; + +enum coal_mode{ + RX_INTR_COAL, + TX_INTR_COAL, + DISABLE_COAL, + ENABLE_COAL, + +}; +#define MAX_TIMEOUT 40 +#define MAX_EVENT_COUNT 31 +struct amd8111e_coalesce_conf{ + + unsigned int rx_timeout; + unsigned int rx_event_count; + unsigned long rx_packets; + unsigned long rx_prev_packets; + unsigned long rx_bytes; + unsigned long rx_prev_bytes; + unsigned int rx_coal_type; + + unsigned int tx_timeout; + unsigned int tx_event_count; + unsigned long tx_packets; + unsigned long tx_prev_packets; + unsigned long tx_bytes; + unsigned long tx_prev_bytes; + unsigned int tx_coal_type; + +}; +struct ipg_info{ + + unsigned int ipg_state; + unsigned int ipg; + unsigned int current_ipg; + unsigned int col_cnt; + unsigned int diff_col_cnt; + unsigned int timer_tick; + unsigned int prev_ipg; + struct timer_list ipg_timer; +}; + struct amd8111e_priv{ struct amd8111e_tx_dr* tx_ring; @@ -742,45 +769,54 @@ void * mmio; spinlock_t lock; /* Guard lock */ - unsigned long rx_idx, tx_idx; /* The next free ring entry */ - unsigned long tx_complete_idx; + unsigned long rx_idx, tx_idx; /* The next free ring entry */ + unsigned long tx_complete_idx; unsigned long tx_ring_complete_idx; unsigned long tx_ring_idx; - int rx_buff_len; /* Buffer length of rx buffers */ + unsigned int rx_buff_len; /* Buffer length of rx buffers */ int options; /* Options enabled/disabled for the device */ + unsigned long ext_phy_option; + struct amd8111e_link_config link_config; int pm_cap; + u32 pm_state[12]; struct net_device *next; + int mii; + struct mii_if_info mii_if; #if AMD8111E_VLAN_TAG_USED struct vlan_group *vlgrp; #endif char opened; struct net_device_stats stats; - struct net_device_stats prev_stats; struct dev_mc_list* mc_list; + struct amd8111e_coalesce_conf coal_conf; + + struct ipg_info ipg_data; }; -#define AMD8111E_READ_REG64(_memMapBase, _offset, _pUlData) \ - *(u32*)(_pUlData) = readl(_memMapBase + (_offset)); \ - *((u32*)(_pUlData))+1) = readl(_memMapBase + ((_offset)+4)) - -#define AMD8111E_WRITE_REG64(_memMapBase, _offset, _pUlData) \ - writel(*(u32*)(_pUlData), _memMapBase + (_offset)); \ - writel(*(u32*)((u8*)(_pUlData)+4), _memMapBase + ((_offset)+4)) \ + +/* kernel provided writeq does not write 64 bits into the amd8111e device register instead writes only higher 32bits data into lower 32bits of the register. +BUG? */ +#define amd8111e_writeq(_UlData,_memMap) \ + writel(*(u32*)(&_UlData), _memMap); \ + writel(*(u32*)((u8*)(&_UlData)+4), _memMap+4) /* maps the external speed options to internal value */ -static unsigned char speed_duplex_mapping[] = { +typedef enum { + SPEED_AUTONEG, + SPEED10_HALF, + SPEED10_FULL, + SPEED100_HALF, + SPEED100_FULL, +}EXT_PHY_OPTION; - XPHYANE, /* Auto-negotiation, speed_duplex option 0 */ - 0, /* 10M Half, speed_duplex option 1 */ - XPHYFD, /* 10M Full, speed_duplex option 2 */ - XPHYSP, /* 100M Half, speed_duplex option 3 */ - XPHYFD | XPHYSP /* 100M Full, speed_duplex option 4 */ -}; static int card_idx; static int speed_duplex[MAX_UNITS] = { 0, }; +static int coalesce[MAX_UNITS] = {1,1,1,1,1,1,1,1}; +static int dynamic_ipg[MAX_UNITS] = {0,0,0,0,0,0,0,0}; +static unsigned int chip_version; #endif /* _AMD8111E_H */ diff -Nru a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c --- a/drivers/net/arcnet/arcnet.c Thu Jun 19 23:46:52 2003 +++ b/drivers/net/arcnet/arcnet.c Thu Jun 19 23:46:52 2003 @@ -343,7 +343,7 @@ dev->hard_header_len = sizeof(struct archdr); dev->mtu = choose_mtu(); - dev->addr_len = 1; + dev->addr_len = ARCNET_ALEN; dev->tx_queue_len = 30; dev->broadcast[0] = 0x00; /* for us, broadcasts are address 0 */ dev->watchdog_timeo = TX_TIMEOUT; diff -Nru a/drivers/net/arcnet/rfc1201.c b/drivers/net/arcnet/rfc1201.c --- a/drivers/net/arcnet/rfc1201.c Thu Jun 19 23:46:52 2003 +++ b/drivers/net/arcnet/rfc1201.c Thu Jun 19 23:46:52 2003 @@ -56,6 +56,7 @@ void __init arcnet_rfc1201_init(void) { arc_proto_map[ARC_P_IP] + = arc_proto_map[ARC_P_IPV6] = arc_proto_map[ARC_P_ARP] = arc_proto_map[ARC_P_RARP] = arc_proto_map[ARC_P_IPX] @@ -114,6 +115,8 @@ switch (soft->proto) { case ARC_P_IP: return htons(ETH_P_IP); + case ARC_P_IPV6: + return htons(ETH_P_IPV6); case ARC_P_ARP: return htons(ETH_P_ARP); case ARC_P_RARP: @@ -387,6 +390,9 @@ switch (type) { case ETH_P_IP: soft->proto = ARC_P_IP; + break; + case ETH_P_IPV6: + soft->proto = ARC_P_IPV6; break; case ETH_P_ARP: soft->proto = ARC_P_ARP; diff -Nru a/drivers/net/bonding/Makefile b/drivers/net/bonding/Makefile --- /dev/null Wed Dec 31 16:00:00 1969 +++ b/drivers/net/bonding/Makefile Thu Jun 19 23:46:53 2003 @@ -0,0 +1,14 @@ +# +# Makefile for the Ethernet Bonding driver +# + +O_TARGET := bonding.o + +obj-y := bond_main.o \ + bond_3ad.o \ + bond_alb.o + +obj-m := $(O_TARGET) + +include $(TOPDIR)/Rules.make + diff -Nru a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c --- /dev/null Wed Dec 31 16:00:00 1969 +++ b/drivers/net/bonding/bond_3ad.c Thu Jun 19 23:46:53 2003 @@ -0,0 +1,2493 @@ +/* + * Copyright(c) 1999 - 2003 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called LICENSE. + * + * + * Changes: + * + * 2003/05/01 - Tsippy Mendelson and + * Amir Noam + * - Added support for lacp_rate module param. + * + * 2003/05/01 - Shmulik Hen + * - Based on discussion on mailing list, changed locking scheme + * to use lock/unlock or lock_bh/unlock_bh appropriately instead + * of lock_irqsave/unlock_irqrestore. The new scheme helps exposing + * hidden bugs and solves system hangs that occurred due to the fact + * that holding lock_irqsave doesn't prevent softirqs from running. + * This also increases total throughput since interrupts are not + * blocked on each transmitted packets or monitor timeout. + * + * 2003/05/01 - Shmulik Hen + * - Renamed bond_3ad_link_status_changed() to + * bond_3ad_handle_link_change() for compatibility with TLB. + */ + +#include +#include +#include +#include +#include +#include +#include "bonding.h" +#include "bond_3ad.h" + +// General definitions +#define AD_SHORT_TIMEOUT 1 +#define AD_LONG_TIMEOUT 0 +#define AD_STANDBY 0x2 +#define AD_MAX_TX_IN_SECOND 3 +#define AD_COLLECTOR_MAX_DELAY 0 + +// Timer definitions(43.4.4 in the 802.3ad standard) +#define AD_FAST_PERIODIC_TIME 1 +#define AD_SLOW_PERIODIC_TIME 30 +#define AD_SHORT_TIMEOUT_TIME (3*AD_FAST_PERIODIC_TIME) +#define AD_LONG_TIMEOUT_TIME (3*AD_SLOW_PERIODIC_TIME) +#define AD_CHURN_DETECTION_TIME 60 +#define AD_AGGREGATE_WAIT_TIME 2 + +// Port state definitions(43.4.2.2 in the 802.3ad standard) +#define AD_STATE_LACP_ACTIVITY 0x1 +#define AD_STATE_LACP_TIMEOUT 0x2 +#define AD_STATE_AGGREGATION 0x4 +#define AD_STATE_SYNCHRONIZATION 0x8 +#define AD_STATE_COLLECTING 0x10 +#define AD_STATE_DISTRIBUTING 0x20 +#define AD_STATE_DEFAULTED 0x40 +#define AD_STATE_EXPIRED 0x80 + +// Port Variables definitions used by the State Machines(43.4.7 in the 802.3ad standard) +#define AD_PORT_BEGIN 0x1 +#define AD_PORT_LACP_ENABLED 0x2 +#define AD_PORT_ACTOR_CHURN 0x4 +#define AD_PORT_PARTNER_CHURN 0x8 +#define AD_PORT_READY 0x10 +#define AD_PORT_READY_N 0x20 +#define AD_PORT_MATCHED 0x40 +#define AD_PORT_STANDBY 0x80 +#define AD_PORT_SELECTED 0x100 +#define AD_PORT_MOVED 0x200 + +// Port Key definitions +// key is determined according to the link speed, duplex and +// user key(which is yet not supported) +// ------------------------------------------------------------ +// Port key : | User key | Speed |Duplex| +// ------------------------------------------------------------ +// 16 6 1 0 +#define AD_DUPLEX_KEY_BITS 0x1 +#define AD_SPEED_KEY_BITS 0x3E +#define AD_USER_KEY_BITS 0xFFC0 + +//dalloun +#define AD_LINK_SPEED_BITMASK_1MBPS 0x1 +#define AD_LINK_SPEED_BITMASK_10MBPS 0x2 +#define AD_LINK_SPEED_BITMASK_100MBPS 0x4 +#define AD_LINK_SPEED_BITMASK_1000MBPS 0x8 +//endalloun + +// compare MAC addresses +#define MAC_ADDRESS_COMPARE(A, B) memcmp(A, B, ETH_ALEN) + +static struct mac_addr null_mac_addr = {{0, 0, 0, 0, 0, 0}}; +static u16 ad_ticks_per_sec; + +// ================= 3AD api to bonding and kernel code ================== +static u16 __get_link_speed(struct port *port); +static u8 __get_duplex(struct port *port); +static inline void __initialize_port_locks(struct port *port); +static inline void __deinitialize_port_locks(struct port *port); +//conversions +static void __ntohs_lacpdu(struct lacpdu *lacpdu); +static u16 __ad_timer_to_ticks(u16 timer_type, u16 Par); + + +// ================= ad code helper functions ================== +//needed by ad_rx_machine(...) +static void __record_pdu(struct lacpdu *lacpdu, struct port *port); +static void __record_default(struct port *port); +static void __update_selected(struct lacpdu *lacpdu, struct port *port); +static void __update_default_selected(struct port *port); +static void __choose_matched(struct lacpdu *lacpdu, struct port *port); +static void __update_ntt(struct lacpdu *lacpdu, struct port *port); + +//needed for ad_mux_machine(..) +static void __attach_bond_to_agg(struct port *port); +static void __detach_bond_from_agg(struct port *port); +static int __agg_ports_are_ready(struct aggregator *aggregator); +static void __set_agg_ports_ready(struct aggregator *aggregator, int val); + +//needed for ad_agg_selection_logic(...) +static u32 __get_agg_bandwidth(struct aggregator *aggregator); +static struct aggregator *__get_active_agg(struct aggregator *aggregator); + + +// ================= main 802.3ad protocol functions ================== +static int ad_lacpdu_send(struct port *port); +static int ad_marker_send(struct port *port, struct marker *marker); +static void ad_mux_machine(struct port *port); +static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port); +static void ad_tx_machine(struct port *port); +static void ad_periodic_machine(struct port *port); +static void ad_port_selection_logic(struct port *port); +static void ad_agg_selection_logic(struct aggregator *aggregator); +static void ad_clear_agg(struct aggregator *aggregator); +static void ad_initialize_agg(struct aggregator *aggregator); +static void ad_initialize_port(struct port *port, int lacp_fast); +static void ad_initialize_lacpdu(struct lacpdu *Lacpdu); +static void ad_enable_collecting_distributing(struct port *port); +static void ad_disable_collecting_distributing(struct port *port); +static void ad_marker_info_received(struct marker *marker_info, struct port *port); +static void ad_marker_response_received(struct marker *marker, struct port *port); + + +///////////////////////////////////////////////////////////////////////////////// +// ================= api to bonding and kernel code ================== +///////////////////////////////////////////////////////////////////////////////// + +/** + * __get_bond_by_port - get the port's bonding struct + * @port: the port we're looking at + * + * Return @port's bonding struct, or %NULL if it can't be found. + */ +static inline struct bonding *__get_bond_by_port(struct port *port) +{ + if (port->slave == NULL) { + return NULL; + } + + return bond_get_bond_by_slave(port->slave); +} + +/** + * __get_first_port - get the first port in the bond + * @bond: the bond we're looking at + * + * Return the port of the first slave in @bond, or %NULL if it can't be found. + */ +static inline struct port *__get_first_port(struct bonding *bond) +{ + struct slave *slave = bond->next; + + if (slave == (struct slave *)bond) { + return NULL; + } + + return &(SLAVE_AD_INFO(slave).port); +} + +/** + * __get_next_port - get the next port in the bond + * @port: the port we're looking at + * + * Return the port of the slave that is next in line of @port's slave in the + * bond, or %NULL if it can't be found. + */ +static inline struct port *__get_next_port(struct port *port) +{ + struct bonding *bond = __get_bond_by_port(port); + struct slave *slave = port->slave; + + // If there's no bond for this port, or this is the last slave + if ((bond == NULL) || (slave->next == bond->next)) { + return NULL; + } + + return &(SLAVE_AD_INFO(slave->next).port); +} + +/** + * __get_first_agg - get the first aggregator in the bond + * @bond: the bond we're looking at + * + * Return the aggregator of the first slave in @bond, or %NULL if it can't be + * found. + */ +static inline struct aggregator *__get_first_agg(struct port *port) +{ + struct bonding *bond = __get_bond_by_port(port); + + // If there's no bond for this port, or this is the last slave + if ((bond == NULL) || (bond->next == (struct slave *)bond)) { + return NULL; + } + + return &(SLAVE_AD_INFO(bond->next).aggregator); +} + +/** + * __get_next_agg - get the next aggregator in the bond + * @aggregator: the aggregator we're looking at + * + * Return the aggregator of the slave that is next in line of @aggregator's + * slave in the bond, or %NULL if it can't be found. + */ +static inline struct aggregator *__get_next_agg(struct aggregator *aggregator) +{ + struct slave *slave = aggregator->slave; + struct bonding *bond = bond_get_bond_by_slave(slave); + + // If there's no bond for this aggregator, or this is the last slave + if ((bond == NULL) || (slave->next == bond->next)) { + return NULL; + } + + return &(SLAVE_AD_INFO(slave->next).aggregator); +} + +/** + * __disable_port - disable the port's slave + * @port: the port we're looking at + * + */ +static inline void __disable_port(struct port *port) +{ + bond_set_slave_inactive_flags(port->slave); +} + +/** + * __enable_port - enable the port's slave, if it's up + * @port: the port we're looking at + * + */ +static inline void __enable_port(struct port *port) +{ + struct slave *slave = port->slave; + + if ((slave->link == BOND_LINK_UP) && IS_UP(slave->dev)) { + bond_set_slave_active_flags(slave); + } +} + +/** + * __port_is_enabled - check if the port's slave is in active state + * @port: the port we're looking at + * + */ +static inline int __port_is_enabled(struct port *port) +{ + return(port->slave->state == BOND_STATE_ACTIVE); +} + +/** + * __get_agg_selection_mode - get the aggregator selection mode + * @port: the port we're looking at + * + * Get the aggregator selection mode. Can be %BANDWIDTH or %COUNT. + */ +static inline u32 __get_agg_selection_mode(struct port *port) +{ + struct bonding *bond = __get_bond_by_port(port); + + if (bond == NULL) { + return AD_BANDWIDTH; + } + + return BOND_AD_INFO(bond).agg_select_mode; +} + +/** + * __check_agg_selection_timer - check if the selection timer has expired + * @port: the port we're looking at + * + */ +static inline int __check_agg_selection_timer(struct port *port) +{ + struct bonding *bond = __get_bond_by_port(port); + + if (bond == NULL) { + return 0; + } + + return BOND_AD_INFO(bond).agg_select_timer ? 1 : 0; +} + +/** + * __get_rx_machine_lock - lock the port's RX machine + * @port: the port we're looking at + * + */ +static inline void __get_rx_machine_lock(struct port *port) +{ + spin_lock(&(SLAVE_AD_INFO(port->slave).rx_machine_lock)); +} + +/** + * __release_rx_machine_lock - unlock the port's RX machine + * @port: the port we're looking at + * + */ +static inline void __release_rx_machine_lock(struct port *port) +{ + spin_unlock(&(SLAVE_AD_INFO(port->slave).rx_machine_lock)); +} + +/** + * __get_link_speed - get a port's speed + * @port: the port we're looking at + * + * Return @port's speed in 802.3ad bitmask format. i.e. one of: + * 0, + * %AD_LINK_SPEED_BITMASK_10MBPS, + * %AD_LINK_SPEED_BITMASK_100MBPS, + * %AD_LINK_SPEED_BITMASK_1000MBPS + */ +static u16 __get_link_speed(struct port *port) +{ + struct slave *slave = port->slave; + u16 speed; + + /* this if covers only a special case: when the configuration starts with + * link down, it sets the speed to 0. + * This is done in spite of the fact that the e100 driver reports 0 to be + * compatible with MVT in the future.*/ + if (slave->link != BOND_LINK_UP) { + speed=0; + } else { + switch (slave->speed) { + case SPEED_10: + speed = AD_LINK_SPEED_BITMASK_10MBPS; + break; + + case SPEED_100: + speed = AD_LINK_SPEED_BITMASK_100MBPS; + break; + + case SPEED_1000: + speed = AD_LINK_SPEED_BITMASK_1000MBPS; + break; + + default: + speed = 0; // unknown speed value from ethtool. shouldn't happen + break; + } + } + + BOND_PRINT_DBG(("Port %d Received link speed %d update from adapter", port->actor_port_number, speed)); + return speed; +} + +/** + * __get_duplex - get a port's duplex + * @port: the port we're looking at + * + * Return @port's duplex in 802.3ad bitmask format. i.e.: + * 0x01 if in full duplex + * 0x00 otherwise + */ +static u8 __get_duplex(struct port *port) +{ + struct slave *slave = port->slave; + + u8 retval; + + // handling a special case: when the configuration starts with + // link down, it sets the duplex to 0. + if (slave->link != BOND_LINK_UP) { + retval=0x0; + } else { + switch (slave->duplex) { + case DUPLEX_FULL: + retval=0x1; + BOND_PRINT_DBG(("Port %d Received status full duplex update from adapter", port->actor_port_number)); + break; + case DUPLEX_HALF: + default: + retval=0x0; + BOND_PRINT_DBG(("Port %d Received status NOT full duplex update from adapter", port->actor_port_number)); + break; + } + } + return retval; +} + +/** + * __initialize_port_locks - initialize a port's RX machine spinlock + * @port: the port we're looking at + * + */ +static inline void __initialize_port_locks(struct port *port) +{ + // make sure it isn't called twice + spin_lock_init(&(SLAVE_AD_INFO(port->slave).rx_machine_lock)); +} + +/** + * __deinitialize_port_locks - deinitialize a port's RX machine spinlock + * @port: the port we're looking at + * + */ +static inline void __deinitialize_port_locks(struct port *port) +{ +} + +//conversions +/** + * __ntohs_lacpdu - convert the contents of a LACPDU to host byte order + * @lacpdu: the speicifed lacpdu + * + * For each multi-byte field in the lacpdu, convert its content + */ +static void __ntohs_lacpdu(struct lacpdu *lacpdu) +{ + if (lacpdu) { + lacpdu->actor_system_priority = ntohs(lacpdu->actor_system_priority); + lacpdu->actor_key = ntohs(lacpdu->actor_key); + lacpdu->actor_port_priority = ntohs(lacpdu->actor_port_priority); + lacpdu->actor_port = ntohs(lacpdu->actor_port); + lacpdu->partner_system_priority = ntohs(lacpdu->partner_system_priority); + lacpdu->partner_key = ntohs(lacpdu->partner_key); + lacpdu->partner_port_priority = ntohs(lacpdu->partner_port_priority); + lacpdu->partner_port = ntohs(lacpdu->partner_port); + lacpdu->collector_max_delay = ntohs(lacpdu->collector_max_delay); + } +} + +/** + * __ad_timer_to_ticks - convert a given timer type to AD module ticks + * @timer_type: which timer to operate + * @par: timer parameter. see below + * + * If @timer_type is %current_while_timer, @par indicates long/short timer. + * If @timer_type is %periodic_timer, @par is one of %FAST_PERIODIC_TIME, + * %SLOW_PERIODIC_TIME. + */ +static u16 __ad_timer_to_ticks(u16 timer_type, u16 par) +{ + u16 retval=0; //to silence the compiler + + switch (timer_type) { + case AD_CURRENT_WHILE_TIMER: // for rx machine usage + if (par) { // for short or long timeout + retval = (AD_SHORT_TIMEOUT_TIME*ad_ticks_per_sec); // short timeout + } else { + retval = (AD_LONG_TIMEOUT_TIME*ad_ticks_per_sec); // long timeout + } + break; + case AD_ACTOR_CHURN_TIMER: // for local churn machine + retval = (AD_CHURN_DETECTION_TIME*ad_ticks_per_sec); + break; + case AD_PERIODIC_TIMER: // for periodic machine + retval = (par*ad_ticks_per_sec); // long timeout + break; + case AD_PARTNER_CHURN_TIMER: // for remote churn machine + retval = (AD_CHURN_DETECTION_TIME*ad_ticks_per_sec); + break; + case AD_WAIT_WHILE_TIMER: // for selection machine + retval = (AD_AGGREGATE_WAIT_TIME*ad_ticks_per_sec); + break; + } + return retval; +} + + +///////////////////////////////////////////////////////////////////////////////// +// ================= ad_rx_machine helper functions ================== +///////////////////////////////////////////////////////////////////////////////// + +/** + * __record_pdu - record parameters from a received lacpdu + * @lacpdu: the lacpdu we've received + * @port: the port we're looking at + * + * Record the parameter values for the Actor carried in a received lacpdu as + * the current partner operational parameter values and sets + * actor_oper_port_state.defaulted to FALSE. + */ +static void __record_pdu(struct lacpdu *lacpdu, struct port *port) +{ + // validate lacpdu and port + if (lacpdu && port) { + // record the new parameter values for the partner operational + port->partner_oper_port_number = lacpdu->actor_port; + port->partner_oper_port_priority = lacpdu->actor_port_priority; + port->partner_oper_system = lacpdu->actor_system; + port->partner_oper_system_priority = lacpdu->actor_system_priority; + port->partner_oper_key = lacpdu->actor_key; + // zero partener's lase states + port->partner_oper_port_state = 0; + port->partner_oper_port_state |= (lacpdu->actor_state & AD_STATE_LACP_ACTIVITY); + port->partner_oper_port_state |= (lacpdu->actor_state & AD_STATE_LACP_TIMEOUT); + port->partner_oper_port_state |= (lacpdu->actor_state & AD_STATE_AGGREGATION); + port->partner_oper_port_state |= (lacpdu->actor_state & AD_STATE_SYNCHRONIZATION); + port->partner_oper_port_state |= (lacpdu->actor_state & AD_STATE_COLLECTING); + port->partner_oper_port_state |= (lacpdu->actor_state & AD_STATE_DISTRIBUTING); + port->partner_oper_port_state |= (lacpdu->actor_state & AD_STATE_DEFAULTED); + port->partner_oper_port_state |= (lacpdu->actor_state & AD_STATE_EXPIRED); + + // set actor_oper_port_state.defaulted to FALSE + port->actor_oper_port_state &= ~AD_STATE_DEFAULTED; + + // set the partner sync. to on if the partner is sync. and the port is matched + if ((port->sm_vars & AD_PORT_MATCHED) && (lacpdu->actor_state & AD_STATE_SYNCHRONIZATION)) { + port->partner_oper_port_state |= AD_STATE_SYNCHRONIZATION; + } else { + port->partner_oper_port_state &= ~AD_STATE_SYNCHRONIZATION; + } + } +} + +/** + * __record_default - record default parameters + * @port: the port we're looking at + * + * This function records the default parameter values for the partner carried + * in the Partner Admin parameters as the current partner operational parameter + * values and sets actor_oper_port_state.defaulted to TRUE. + */ +static void __record_default(struct port *port) +{ + // validate the port + if (port) { + // record the partner admin parameters + port->partner_oper_port_number = port->partner_admin_port_number; + port->partner_oper_port_priority = port->partner_admin_port_priority; + port->partner_oper_system = port->partner_admin_system; + port->partner_oper_system_priority = port->partner_admin_system_priority; + port->partner_oper_key = port->partner_admin_key; + port->partner_oper_port_state = port->partner_admin_port_state; + + // set actor_oper_port_state.defaulted to true + port->actor_oper_port_state |= AD_STATE_DEFAULTED; + } +} + +/** + * __update_selected - update a port's Selected variable from a received lacpdu + * @lacpdu: the lacpdu we've received + * @port: the port we're looking at + * + * Update the value of the selected variable, using parameter values from a + * newly received lacpdu. The parameter values for the Actor carried in the + * received PDU are compared with the corresponding operational parameter + * values for the ports partner. If one or more of the comparisons shows that + * the value(s) received in the PDU differ from the current operational values, + * then selected is set to FALSE and actor_oper_port_state.synchronization is + * set to out_of_sync. Otherwise, selected remains unchanged. + */ +static void __update_selected(struct lacpdu *lacpdu, struct port *port) +{ + // validate lacpdu and port + if (lacpdu && port) { + // check if any parameter is different + if ((lacpdu->actor_port != port->partner_oper_port_number) || + (lacpdu->actor_port_priority != port->partner_oper_port_priority) || + MAC_ADDRESS_COMPARE(&(lacpdu->actor_system), &(port->partner_oper_system)) || + (lacpdu->actor_system_priority != port->partner_oper_system_priority) || + (lacpdu->actor_key != port->partner_oper_key) || + ((lacpdu->actor_state & AD_STATE_AGGREGATION) != (port->partner_oper_port_state & AD_STATE_AGGREGATION)) + ) { + // update the state machine Selected variable + port->sm_vars &= ~AD_PORT_SELECTED; + } + } +} + +/** + * __update_default_selected - update a port's Selected variable from Partner + * @port: the port we're looking at + * + * This function updates the value of the selected variable, using the partner + * administrative parameter values. The administrative values are compared with + * the corresponding operational parameter values for the partner. If one or + * more of the comparisons shows that the administrative value(s) differ from + * the current operational values, then Selected is set to FALSE and + * actor_oper_port_state.synchronization is set to OUT_OF_SYNC. Otherwise, + * Selected remains unchanged. + */ +static void __update_default_selected(struct port *port) +{ + // validate the port + if (port) { + // check if any parameter is different + if ((port->partner_admin_port_number != port->partner_oper_port_number) || + (port->partner_admin_port_priority != port->partner_oper_port_priority) || + MAC_ADDRESS_COMPARE(&(port->partner_admin_system), &(port->partner_oper_system)) || + (port->partner_admin_system_priority != port->partner_oper_system_priority) || + (port->partner_admin_key != port->partner_oper_key) || + ((port->partner_admin_port_state & AD_STATE_AGGREGATION) != (port->partner_oper_port_state & AD_STATE_AGGREGATION)) + ) { + // update the state machine Selected variable + port->sm_vars &= ~AD_PORT_SELECTED; + } + } +} + +/** + * __choose_matched - update a port's matched variable from a received lacpdu + * @lacpdu: the lacpdu we've received + * @port: the port we're looking at + * + * Update the value of the matched variable, using parameter values from a + * newly received lacpdu. Parameter values for the partner carried in the + * received PDU are compared with the corresponding operational parameter + * values for the actor. Matched is set to TRUE if all of these parameters + * match and the PDU parameter partner_state.aggregation has the same value as + * actor_oper_port_state.aggregation and lacp will actively maintain the link + * in the aggregation. Matched is also set to TRUE if the value of + * actor_state.aggregation in the received PDU is set to FALSE, i.e., indicates + * an individual link and lacp will actively maintain the link. Otherwise, + * matched is set to FALSE. LACP is considered to be actively maintaining the + * link if either the PDU's actor_state.lacp_activity variable is TRUE or both + * the actor's actor_oper_port_state.lacp_activity and the PDU's + * partner_state.lacp_activity variables are TRUE. + */ +static void __choose_matched(struct lacpdu *lacpdu, struct port *port) +{ + // validate lacpdu and port + if (lacpdu && port) { + // check if all parameters are alike + if (((lacpdu->partner_port == port->actor_port_number) && + (lacpdu->partner_port_priority == port->actor_port_priority) && + !MAC_ADDRESS_COMPARE(&(lacpdu->partner_system), &(port->actor_system)) && + (lacpdu->partner_system_priority == port->actor_system_priority) && + (lacpdu->partner_key == port->actor_oper_port_key) && + ((lacpdu->partner_state & AD_STATE_AGGREGATION) == (port->actor_oper_port_state & AD_STATE_AGGREGATION))) || + // or this is individual link(aggregation == FALSE) + ((lacpdu->actor_state & AD_STATE_AGGREGATION) == 0) + ) { + // update the state machine Matched variable + port->sm_vars |= AD_PORT_MATCHED; + } else { + port->sm_vars &= ~AD_PORT_MATCHED; + } + } +} + +/** + * __update_ntt - update a port's ntt variable from a received lacpdu + * @lacpdu: the lacpdu we've received + * @port: the port we're looking at + * + * Updates the value of the ntt variable, using parameter values from a newly + * received lacpdu. The parameter values for the partner carried in the + * received PDU are compared with the corresponding operational parameter + * values for the Actor. If one or more of the comparisons shows that the + * value(s) received in the PDU differ from the current operational values, + * then ntt is set to TRUE. Otherwise, ntt remains unchanged. + */ +static void __update_ntt(struct lacpdu *lacpdu, struct port *port) +{ + // validate lacpdu and port + if (lacpdu && port) { + // check if any parameter is different + if ((lacpdu->partner_port != port->actor_port_number) || + (lacpdu->partner_port_priority != port->actor_port_priority) || + MAC_ADDRESS_COMPARE(&(lacpdu->partner_system), &(port->actor_system)) || + (lacpdu->partner_system_priority != port->actor_system_priority) || + (lacpdu->partner_key != port->actor_oper_port_key) || + ((lacpdu->partner_state & AD_STATE_LACP_ACTIVITY) != (port->actor_oper_port_state & AD_STATE_LACP_ACTIVITY)) || + ((lacpdu->partner_state & AD_STATE_LACP_TIMEOUT) != (port->actor_oper_port_state & AD_STATE_LACP_TIMEOUT)) || + ((lacpdu->partner_state & AD_STATE_SYNCHRONIZATION) != (port->actor_oper_port_state & AD_STATE_SYNCHRONIZATION)) || + ((lacpdu->partner_state & AD_STATE_AGGREGATION) != (port->actor_oper_port_state & AD_STATE_AGGREGATION)) + ) { + // set ntt to be TRUE + port->ntt = 1; + } + } +} + +/** + * __attach_bond_to_agg + * @port: the port we're looking at + * + * Handle the attaching of the port's control parser/multiplexer and the + * aggregator. This function does nothing since the parser/multiplexer of the + * receive and the parser/multiplexer of the aggregator are already combined. + */ +static void __attach_bond_to_agg(struct port *port) +{ + port=NULL; // just to satisfy the compiler + // This function does nothing since the parser/multiplexer of the receive + // and the parser/multiplexer of the aggregator are already combined +} + +/** + * __detach_bond_from_agg + * @port: the port we're looking at + * + * Handle the detaching of the port's control parser/multiplexer from the + * aggregator. This function does nothing since the parser/multiplexer of the + * receive and the parser/multiplexer of the aggregator are already combined. + */ +static void __detach_bond_from_agg(struct port *port) +{ + port=NULL; // just to satisfy the compiler + // This function does nothing sience the parser/multiplexer of the receive + // and the parser/multiplexer of the aggregator are already combined +} + +/** + * __agg_ports_are_ready - check if all ports in an aggregator are ready + * @aggregator: the aggregator we're looking at + * + */ +static int __agg_ports_are_ready(struct aggregator *aggregator) +{ + struct port *port; + int retval = 1; + + if (aggregator) { + // scan all ports in this aggregator to verfy if they are all ready + for (port=aggregator->lag_ports; port; port=port->next_port_in_aggregator) { + if (!(port->sm_vars & AD_PORT_READY_N)) { + retval = 0; + break; + } + } + } + + return retval; +} + +/** + * __set_agg_ports_ready - set value of Ready bit in all ports of an aggregator + * @aggregator: the aggregator we're looking at + * @val: Should the ports' ready bit be set on or off + * + */ +static void __set_agg_ports_ready(struct aggregator *aggregator, int val) +{ + struct port *port; + + for (port=aggregator->lag_ports; port; port=port->next_port_in_aggregator) { + if (val) { + port->sm_vars |= AD_PORT_READY; + } else { + port->sm_vars &= ~AD_PORT_READY; + } + } +} + +/** + * __get_agg_bandwidth - get the total bandwidth of an aggregator + * @aggregator: the aggregator we're looking at + * + */ +static u32 __get_agg_bandwidth(struct aggregator *aggregator) +{ + u32 bandwidth=0; + u32 basic_speed; + + if (aggregator->num_of_ports) { + basic_speed = __get_link_speed(aggregator->lag_ports); + switch (basic_speed) { + case AD_LINK_SPEED_BITMASK_1MBPS: + bandwidth = aggregator->num_of_ports; + break; + case AD_LINK_SPEED_BITMASK_10MBPS: + bandwidth = aggregator->num_of_ports * 10; + break; + case AD_LINK_SPEED_BITMASK_100MBPS: + bandwidth = aggregator->num_of_ports * 100; + break; + case AD_LINK_SPEED_BITMASK_1000MBPS: + bandwidth = aggregator->num_of_ports * 1000; + break; + default: + bandwidth=0; // to silent the compilor .... + } + } + return bandwidth; +} + +/** + * __get_active_agg - get the current active aggregator + * @aggregator: the aggregator we're looking at + * + */ +static struct aggregator *__get_active_agg(struct aggregator *aggregator) +{ + struct aggregator *retval = NULL; + + for (; aggregator; aggregator = __get_next_agg(aggregator)) { + if (aggregator->is_active) { + retval = aggregator; + break; + } + } + + return retval; +} + +/** + * __update_lacpdu_from_port - update a port's lacpdu fields + * @port: the port we're looking at + * + */ +static inline void __update_lacpdu_from_port(struct port *port) +{ + struct lacpdu *lacpdu = &port->lacpdu; + + /* update current actual Actor parameters */ + /* lacpdu->subtype initialized + * lacpdu->version_number initialized + * lacpdu->tlv_type_actor_info initialized + * lacpdu->actor_information_length initialized + */ + + lacpdu->actor_system_priority = port->actor_system_priority; + lacpdu->actor_system = port->actor_system; + lacpdu->actor_key = port->actor_oper_port_key; + lacpdu->actor_port_priority = port->actor_port_priority; + lacpdu->actor_port = port->actor_port_number; + lacpdu->actor_state = port->actor_oper_port_state; + + /* lacpdu->reserved_3_1 initialized + * lacpdu->tlv_type_partner_info initialized + * lacpdu->partner_information_length initialized + */ + + lacpdu->partner_system_priority = port->partner_oper_system_priority; + lacpdu->partner_system = port->partner_oper_system; + lacpdu->partner_key = port->partner_oper_key; + lacpdu->partner_port_priority = port->partner_oper_port_priority; + lacpdu->partner_port = port->partner_oper_port_number; + lacpdu->partner_state = port->partner_oper_port_state; + + /* lacpdu->reserved_3_2 initialized + * lacpdu->tlv_type_collector_info initialized + * lacpdu->collector_information_length initialized + * collector_max_delay initialized + * reserved_12[12] initialized + * tlv_type_terminator initialized + * terminator_length initialized + * reserved_50[50] initialized + */ + + /* Convert all non u8 parameters to Big Endian for transmit */ + __ntohs_lacpdu(lacpdu); +} + +////////////////////////////////////////////////////////////////////////////////////// +// ================= main 802.3ad protocol code ====================================== +////////////////////////////////////////////////////////////////////////////////////// + +/** + * ad_lacpdu_send - send out a lacpdu packet on a given port + * @port: the port we're looking at + * + * Returns: 0 on success + * < 0 on error + */ +static int ad_lacpdu_send(struct port *port) +{ + struct slave *slave = port->slave; + struct sk_buff *skb; + struct lacpdu_header *lacpdu_header; + int length = sizeof(struct lacpdu_header); + struct mac_addr lacpdu_multicast_address = AD_MULTICAST_LACPDU_ADDR; + + skb = dev_alloc_skb(length); + if (!skb) { + return -ENOMEM; + } + + skb->dev = slave->dev; + skb->mac.raw = skb->data; + skb->nh.raw = skb->data + ETH_HLEN; + skb->protocol = PKT_TYPE_LACPDU; + + lacpdu_header = (struct lacpdu_header *)skb_put(skb, length); + + lacpdu_header->ad_header.destination_address = lacpdu_multicast_address; + /* Note: source addres is set to be the member's PERMANENT address, because we use it + to identify loopback lacpdus in receive. */ + lacpdu_header->ad_header.source_address = *((struct mac_addr *)(slave->perm_hwaddr)); + lacpdu_header->ad_header.length_type = PKT_TYPE_LACPDU; + + lacpdu_header->lacpdu = port->lacpdu; // struct copy + + dev_queue_xmit(skb); + + return 0; +} + +/** + * ad_marker_send - send marker information/response on a given port + * @port: the port we're looking at + * @marker: marker data to send + * + * Returns: 0 on success + * < 0 on error + */ +static int ad_marker_send(struct port *port, struct marker *marker) +{ + struct slave *slave = port->slave; + struct sk_buff *skb; + struct marker_header *marker_header; + int length = sizeof(struct marker_header); + struct mac_addr lacpdu_multicast_address = AD_MULTICAST_LACPDU_ADDR; + + skb = dev_alloc_skb(length + 16); + if (!skb) { + return -ENOMEM; + } + + skb_reserve(skb, 16); + + skb->dev = slave->dev; + skb->mac.raw = skb->data; + skb->nh.raw = skb->data + ETH_HLEN; + skb->protocol = PKT_TYPE_LACPDU; + + marker_header = (struct marker_header *)skb_put(skb, length); + + marker_header->ad_header.destination_address = lacpdu_multicast_address; + /* Note: source addres is set to be the member's PERMANENT address, because we use it + to identify loopback MARKERs in receive. */ + marker_header->ad_header.source_address = *((struct mac_addr *)(slave->perm_hwaddr)); + marker_header->ad_header.length_type = PKT_TYPE_LACPDU; + + marker_header->marker = *marker; // struct copy + + dev_queue_xmit(skb); + + return 0; +} + +/** + * ad_mux_machine - handle a port's mux state machine + * @port: the port we're looking at + * + */ +static void ad_mux_machine(struct port *port) +{ + mux_states_t last_state; + + // keep current State Machine state to compare later if it was changed + last_state = port->sm_mux_state; + + if (port->sm_vars & AD_PORT_BEGIN) { + port->sm_mux_state = AD_MUX_DETACHED; // next state + } else { + switch (port->sm_mux_state) { + case AD_MUX_DETACHED: + if ((port->sm_vars & AD_PORT_SELECTED) || (port->sm_vars & AD_PORT_STANDBY)) { // if SELECTED or STANDBY + port->sm_mux_state = AD_MUX_WAITING; // next state + } + break; + case AD_MUX_WAITING: + // if SELECTED == FALSE return to DETACH state + if (!(port->sm_vars & AD_PORT_SELECTED)) { // if UNSELECTED + port->sm_vars &= ~AD_PORT_READY_N; + // in order to withhold the Selection Logic to check all ports READY_N value + // every callback cycle to update ready variable, we check READY_N and update READY here + __set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); + port->sm_mux_state = AD_MUX_DETACHED; // next state + break; + } + + // check if the wait_while_timer expired + if (port->sm_mux_timer_counter && !(--port->sm_mux_timer_counter)) { + port->sm_vars |= AD_PORT_READY_N; + } + + // in order to withhold the selection logic to check all ports READY_N value + // every callback cycle to update ready variable, we check READY_N and update READY here + __set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); + + // if the wait_while_timer expired, and the port is in READY state, move to ATTACHED state + if ((port->sm_vars & AD_PORT_READY) && !port->sm_mux_timer_counter) { + port->sm_mux_state = AD_MUX_ATTACHED; // next state + } + break; + case AD_MUX_ATTACHED: + // check also if agg_select_timer expired(so the edable port will take place only after this timer) + if ((port->sm_vars & AD_PORT_SELECTED) && (port->partner_oper_port_state & AD_STATE_SYNCHRONIZATION) && !__check_agg_selection_timer(port)) { + port->sm_mux_state = AD_MUX_COLLECTING_DISTRIBUTING;// next state + } else if (!(port->sm_vars & AD_PORT_SELECTED) || (port->sm_vars & AD_PORT_STANDBY)) { // if UNSELECTED or STANDBY + port->sm_vars &= ~AD_PORT_READY_N; + // in order to withhold the selection logic to check all ports READY_N value + // every callback cycle to update ready variable, we check READY_N and update READY here + __set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); + port->sm_mux_state = AD_MUX_DETACHED;// next state + } + break; + case AD_MUX_COLLECTING_DISTRIBUTING: + if (!(port->sm_vars & AD_PORT_SELECTED) || (port->sm_vars & AD_PORT_STANDBY) || + !(port->partner_oper_port_state & AD_STATE_SYNCHRONIZATION) + ) { + port->sm_mux_state = AD_MUX_ATTACHED;// next state + + } else { + // if port state hasn't changed make + // sure that a collecting distributing + // port in an active aggregator is enabled + if (port->aggregator && + port->aggregator->is_active && + !__port_is_enabled(port)) { + + __enable_port(port); + } + } + break; + default: //to silence the compiler + break; + } + } + + // check if the state machine was changed + if (port->sm_mux_state != last_state) { + BOND_PRINT_DBG(("Mux Machine: Port=%d, Last State=%d, Curr State=%d", port->actor_port_number, last_state, port->sm_mux_state)); + switch (port->sm_mux_state) { + case AD_MUX_DETACHED: + __detach_bond_from_agg(port); + port->actor_oper_port_state &= ~AD_STATE_SYNCHRONIZATION; + ad_disable_collecting_distributing(port); + port->actor_oper_port_state &= ~AD_STATE_COLLECTING; + port->actor_oper_port_state &= ~AD_STATE_DISTRIBUTING; + port->ntt = 1; + break; + case AD_MUX_WAITING: + port->sm_mux_timer_counter = __ad_timer_to_ticks(AD_WAIT_WHILE_TIMER, 0); + break; + case AD_MUX_ATTACHED: + __attach_bond_to_agg(port); + port->actor_oper_port_state |= AD_STATE_SYNCHRONIZATION; + port->actor_oper_port_state &= ~AD_STATE_COLLECTING; + port->actor_oper_port_state &= ~AD_STATE_DISTRIBUTING; + ad_disable_collecting_distributing(port); + port->ntt = 1; + break; + case AD_MUX_COLLECTING_DISTRIBUTING: + port->actor_oper_port_state |= AD_STATE_COLLECTING; + port->actor_oper_port_state |= AD_STATE_DISTRIBUTING; + ad_enable_collecting_distributing(port); + port->ntt = 1; + break; + default: //to silence the compiler + break; + } + } +} + +/** + * ad_rx_machine - handle a port's rx State Machine + * @lacpdu: the lacpdu we've received + * @port: the port we're looking at + * + * If lacpdu arrived, stop previous timer (if exists) and set the next state as + * CURRENT. If timer expired set the state machine in the proper state. + * In other cases, this function checks if we need to switch to other state. + */ +static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port) +{ + rx_states_t last_state; + + // Lock to prevent 2 instances of this function to run simultaneously(rx interrupt and periodic machine callback) + __get_rx_machine_lock(port); + + // keep current State Machine state to compare later if it was changed + last_state = port->sm_rx_state; + + // check if state machine should change state + // first, check if port was reinitialized + if (port->sm_vars & AD_PORT_BEGIN) { + port->sm_rx_state = AD_RX_INITIALIZE; // next state + } + // check if port is not enabled + else if (!(port->sm_vars & AD_PORT_BEGIN) && !port->is_enabled && !(port->sm_vars & AD_PORT_MOVED)) { + port->sm_rx_state = AD_RX_PORT_DISABLED; // next state + } + // check if new lacpdu arrived + else if (lacpdu && ((port->sm_rx_state == AD_RX_EXPIRED) || (port->sm_rx_state == AD_RX_DEFAULTED) || (port->sm_rx_state == AD_RX_CURRENT))) { + port->sm_rx_timer_counter = 0; // zero timer + port->sm_rx_state = AD_RX_CURRENT; + } else { + // if timer is on, and if it is expired + if (port->sm_rx_timer_counter && !(--port->sm_rx_timer_counter)) { + switch (port->sm_rx_state) { + case AD_RX_EXPIRED: + port->sm_rx_state = AD_RX_DEFAULTED; // next state + break; + case AD_RX_CURRENT: + port->sm_rx_state = AD_RX_EXPIRED; // next state + break; + default: //to silence the compiler + break; + } + } else { + // if no lacpdu arrived and no timer is on + switch (port->sm_rx_state) { + case AD_RX_PORT_DISABLED: + if (port->sm_vars & AD_PORT_MOVED) { + port->sm_rx_state = AD_RX_INITIALIZE; // next state + } else if (port->is_enabled && (port->sm_vars & AD_PORT_LACP_ENABLED)) { + port->sm_rx_state = AD_RX_EXPIRED; // next state + } else if (port->is_enabled && ((port->sm_vars & AD_PORT_LACP_ENABLED) == 0)) { + port->sm_rx_state = AD_RX_LACP_DISABLED; // next state + } + break; + default: //to silence the compiler + break; + + } + } + } + + // check if the State machine was changed or new lacpdu arrived + if ((port->sm_rx_state != last_state) || (lacpdu)) { + BOND_PRINT_DBG(("Rx Machine: Port=%d, Last State=%d, Curr State=%d", port->actor_port_number, last_state, port->sm_rx_state)); + switch (port->sm_rx_state) { + case AD_RX_INITIALIZE: + if (!(port->actor_oper_port_key & AD_DUPLEX_KEY_BITS)) { + port->sm_vars &= ~AD_PORT_LACP_ENABLED; + } else { + port->sm_vars |= AD_PORT_LACP_ENABLED; + } + port->sm_vars &= ~AD_PORT_SELECTED; + __record_default(port); + port->actor_oper_port_state &= ~AD_STATE_EXPIRED; + port->sm_vars &= ~AD_PORT_MOVED; + port->sm_rx_state = AD_RX_PORT_DISABLED; // next state + + /*- Fall Through -*/ + + case AD_RX_PORT_DISABLED: + port->sm_vars &= ~AD_PORT_MATCHED; + break; + case AD_RX_LACP_DISABLED: + port->sm_vars &= ~AD_PORT_SELECTED; + __record_default(port); + port->partner_oper_port_state &= ~AD_STATE_AGGREGATION; + port->sm_vars |= AD_PORT_MATCHED; + port->actor_oper_port_state &= ~AD_STATE_EXPIRED; + break; + case AD_RX_EXPIRED: + //Reset of the Synchronization flag. (Standard 43.4.12) + //This reset cause to disable this port in the COLLECTING_DISTRIBUTING state of the + //mux machine in case of EXPIRED even if LINK_DOWN didn't arrive for the port. + port->partner_oper_port_state &= ~AD_STATE_SYNCHRONIZATION; + port->sm_vars &= ~AD_PORT_MATCHED; + port->partner_oper_port_state |= AD_SHORT_TIMEOUT; + port->sm_rx_timer_counter = __ad_timer_to_ticks(AD_CURRENT_WHILE_TIMER, (u16)(AD_SHORT_TIMEOUT)); + port->actor_oper_port_state |= AD_STATE_EXPIRED; + break; + case AD_RX_DEFAULTED: + __update_default_selected(port); + __record_default(port); + port->sm_vars |= AD_PORT_MATCHED; + port->actor_oper_port_state &= ~AD_STATE_EXPIRED; + break; + case AD_RX_CURRENT: + // detect loopback situation + if (!MAC_ADDRESS_COMPARE(&(lacpdu->actor_system), &(port->actor_system))) { + // INFO_RECEIVED_LOOPBACK_FRAMES + printk(KERN_ERR "bonding: An illegal loopback occurred on adapter (%s)\n", + port->slave->dev->name); + printk(KERN_ERR "Check the configuration to verify that all Adapters " + "are connected to 802.3ad compliant switch ports\n"); + __release_rx_machine_lock(port); + return; + } + __update_selected(lacpdu, port); + __update_ntt(lacpdu, port); + __record_pdu(lacpdu, port); + __choose_matched(lacpdu, port); + port->sm_rx_timer_counter = __ad_timer_to_ticks(AD_CURRENT_WHILE_TIMER, (u16)(port->actor_oper_port_state & AD_STATE_LACP_TIMEOUT)); + port->actor_oper_port_state &= ~AD_STATE_EXPIRED; + // verify that if the aggregator is enabled, the port is enabled too. + //(because if the link goes down for a short time, the 802.3ad will not + // catch it, and the port will continue to be disabled) + if (port->aggregator && port->aggregator->is_active && !__port_is_enabled(port)) { + __enable_port(port); + } + break; + default: //to silence the compiler + break; + } + } + __release_rx_machine_lock(port); +} + +/** + * ad_tx_machine - handle a port's tx state machine + * @port: the port we're looking at + * + */ +static void ad_tx_machine(struct port *port) +{ + // check if tx timer expired, to verify that we do not send more than 3 packets per second + if (port->sm_tx_timer_counter && !(--port->sm_tx_timer_counter)) { + // check if there is something to send + if (port->ntt && (port->sm_vars & AD_PORT_LACP_ENABLED)) { + __update_lacpdu_from_port(port); + // send the lacpdu + if (ad_lacpdu_send(port) >= 0) { + BOND_PRINT_DBG(("Sent LACPDU on port %d", port->actor_port_number)); + // mark ntt as false, so it will not be sent again until demanded + port->ntt = 0; + } + } + // restart tx timer(to verify that we will not exceed AD_MAX_TX_IN_SECOND + port->sm_tx_timer_counter=ad_ticks_per_sec/AD_MAX_TX_IN_SECOND; + } +} + +/** + * ad_periodic_machine - handle a port's periodic state machine + * @port: the port we're looking at + * + * Turn ntt flag on priodically to perform periodic transmission of lacpdu's. + */ +static void ad_periodic_machine(struct port *port) +{ + periodic_states_t last_state; + + // keep current state machine state to compare later if it was changed + last_state = port->sm_periodic_state; + + // check if port was reinitialized + if (((port->sm_vars & AD_PORT_BEGIN) || !(port->sm_vars & AD_PORT_LACP_ENABLED) || !port->is_enabled) || + (!(port->actor_oper_port_state & AD_STATE_LACP_ACTIVITY) && !(port->partner_oper_port_state & AD_STATE_LACP_ACTIVITY)) + ) { + port->sm_periodic_state = AD_NO_PERIODIC; // next state + } + // check if state machine should change state + else if (port->sm_periodic_timer_counter) { + // check if periodic state machine expired + if (!(--port->sm_periodic_timer_counter)) { + // if expired then do tx + port->sm_periodic_state = AD_PERIODIC_TX; // next state + } else { + // If not expired, check if there is some new timeout parameter from the partner state + switch (port->sm_periodic_state) { + case AD_FAST_PERIODIC: + if (!(port->partner_oper_port_state & AD_STATE_LACP_TIMEOUT)) { + port->sm_periodic_state = AD_SLOW_PERIODIC; // next state + } + break; + case AD_SLOW_PERIODIC: + if ((port->partner_oper_port_state & AD_STATE_LACP_TIMEOUT)) { + // stop current timer + port->sm_periodic_timer_counter = 0; + port->sm_periodic_state = AD_PERIODIC_TX; // next state + } + break; + default: //to silence the compiler + break; + } + } + } else { + switch (port->sm_periodic_state) { + case AD_NO_PERIODIC: + port->sm_periodic_state = AD_FAST_PERIODIC; // next state + break; + case AD_PERIODIC_TX: + if (!(port->partner_oper_port_state & AD_STATE_LACP_TIMEOUT)) { + port->sm_periodic_state = AD_SLOW_PERIODIC; // next state + } else { + port->sm_periodic_state = AD_FAST_PERIODIC; // next state + } + break; + default: //to silence the compiler + break; + } + } + + // check if the state machine was changed + if (port->sm_periodic_state != last_state) { + BOND_PRINT_DBG(("Periodic Machine: Port=%d, Last State=%d, Curr State=%d", port->actor_port_number, last_state, port->sm_periodic_state)); + switch (port->sm_periodic_state) { + case AD_NO_PERIODIC: + port->sm_periodic_timer_counter = 0; // zero timer + break; + case AD_FAST_PERIODIC: + port->sm_periodic_timer_counter = __ad_timer_to_ticks(AD_PERIODIC_TIMER, (u16)(AD_FAST_PERIODIC_TIME))-1; // decrement 1 tick we lost in the PERIODIC_TX cycle + break; + case AD_SLOW_PERIODIC: + port->sm_periodic_timer_counter = __ad_timer_to_ticks(AD_PERIODIC_TIMER, (u16)(AD_SLOW_PERIODIC_TIME))-1; // decrement 1 tick we lost in the PERIODIC_TX cycle + break; + case AD_PERIODIC_TX: + port->ntt = 1; + break; + default: //to silence the compiler + break; + } + } +} + +/** + * ad_port_selection_logic - select aggregation groups + * @port: the port we're looking at + * + * Select aggregation groups, and assign each port for it's aggregetor. The + * selection logic is called in the inititalization (after all the handshkes), + * and after every lacpdu receive (if selected is off). + */ +static void ad_port_selection_logic(struct port *port) +{ + struct aggregator *aggregator, *free_aggregator = NULL, *temp_aggregator; + struct port *last_port = NULL, *curr_port; + int found = 0; + + // if the port is already Selected, do nothing + if (port->sm_vars & AD_PORT_SELECTED) { + return; + } + + // if the port is connected to other aggregator, detach it + if (port->aggregator) { + // detach the port from its former aggregator + temp_aggregator=port->aggregator; + for (curr_port=temp_aggregator->lag_ports; curr_port; last_port=curr_port, curr_port=curr_port->next_port_in_aggregator) { + if (curr_port == port) { + temp_aggregator->num_of_ports--; + if (!last_port) {// if it is the first port attached to the aggregator + temp_aggregator->lag_ports=port->next_port_in_aggregator; + } else {// not the first port attached to the aggregator + last_port->next_port_in_aggregator=port->next_port_in_aggregator; + } + + // clear the port's relations to this aggregator + port->aggregator = NULL; + port->next_port_in_aggregator=NULL; + port->actor_port_aggregator_identifier=0; + + BOND_PRINT_DBG(("Port %d left LAG %d", port->actor_port_number, temp_aggregator->aggregator_identifier)); + // if the aggregator is empty, clear its parameters, and set it ready to be attached + if (!temp_aggregator->lag_ports) { + ad_clear_agg(temp_aggregator); + } + break; + } + } + if (!curr_port) { // meaning: the port was related to an aggregator but was not on the aggregator port list + printk(KERN_WARNING "bonding: Warning: Port %d (on %s) was " + "related to aggregator %d but was not on its port list\n", + port->actor_port_number, port->slave->dev->name, + port->aggregator->aggregator_identifier); + } + } + // search on all aggregators for a suitable aggregator for this port + for (aggregator = __get_first_agg(port); aggregator; + aggregator = __get_next_agg(aggregator)) { + + // keep a free aggregator for later use(if needed) + if (!aggregator->lag_ports) { + if (!free_aggregator) { + free_aggregator=aggregator; + } + continue; + } + // check if current aggregator suits us + if (((aggregator->actor_oper_aggregator_key == port->actor_oper_port_key) && // if all parameters match AND + !MAC_ADDRESS_COMPARE(&(aggregator->partner_system), &(port->partner_oper_system)) && + (aggregator->partner_system_priority == port->partner_oper_system_priority) && + (aggregator->partner_oper_aggregator_key == port->partner_oper_key) + ) && + ((MAC_ADDRESS_COMPARE(&(port->partner_oper_system), &(null_mac_addr)) && // partner answers + !aggregator->is_individual) // but is not individual OR + ) + ) { + // attach to the founded aggregator + port->aggregator = aggregator; + port->actor_port_aggregator_identifier=port->aggregator->aggregator_identifier; + port->next_port_in_aggregator=aggregator->lag_ports; + port->aggregator->num_of_ports++; + aggregator->lag_ports=port; + BOND_PRINT_DBG(("Port %d joined LAG %d(existing LAG)", port->actor_port_number, port->aggregator->aggregator_identifier)); + + // mark this port as selected + port->sm_vars |= AD_PORT_SELECTED; + found = 1; + break; + } + } + + // the port couldn't find an aggregator - attach it to a new aggregator + if (!found) { + if (free_aggregator) { + // assign port a new aggregator + port->aggregator = free_aggregator; + port->actor_port_aggregator_identifier=port->aggregator->aggregator_identifier; + + // update the new aggregator's parameters + // if port was responsed from the end-user + if (port->actor_oper_port_key & AD_DUPLEX_KEY_BITS) {// if port is full duplex + port->aggregator->is_individual = 0; + } else { + port->aggregator->is_individual = 1; + } + + port->aggregator->actor_admin_aggregator_key = port->actor_admin_port_key; + port->aggregator->actor_oper_aggregator_key = port->actor_oper_port_key; + port->aggregator->partner_system=port->partner_oper_system; + port->aggregator->partner_system_priority = port->partner_oper_system_priority; + port->aggregator->partner_oper_aggregator_key = port->partner_oper_key; + port->aggregator->receive_state = 1; + port->aggregator->transmit_state = 1; + port->aggregator->lag_ports = port; + port->aggregator->num_of_ports++; + + // mark this port as selected + port->sm_vars |= AD_PORT_SELECTED; + + BOND_PRINT_DBG(("Port %d joined LAG %d(new LAG)", port->actor_port_number, port->aggregator->aggregator_identifier)); + } else { + printk(KERN_ERR "bonding: Port %d (on %s) did not find a suitable aggregator\n", + port->actor_port_number, port->slave->dev->name); + } + } + // if all aggregator's ports are READY_N == TRUE, set ready=TRUE in all aggregator's ports + // else set ready=FALSE in all aggregator's ports + __set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); + + if (!__check_agg_selection_timer(port) && (aggregator = __get_first_agg(port))) { + ad_agg_selection_logic(aggregator); + } +} + +/** + * ad_agg_selection_logic - select an aggregation group for a team + * @aggregator: the aggregator we're looking at + * + * It is assumed that only one aggregator may be selected for a team. + * The logic of this function is to select (at first time) the aggregator with + * the most ports attached to it, and to reselect the active aggregator only if + * the previous aggregator has no more ports related to it. + * + * FIXME: this function MUST be called with the first agg in the bond, or + * __get_active_agg() won't work correctly. This function should be better + * called with the bond itself, and retrieve the first agg from it. + */ +static void ad_agg_selection_logic(struct aggregator *aggregator) +{ + struct aggregator *best_aggregator = NULL, *active_aggregator = NULL; + struct aggregator *last_active_aggregator = NULL, *origin_aggregator; + struct port *port; + u16 num_of_aggs=0; + + origin_aggregator = aggregator; + + //get current active aggregator + last_active_aggregator = __get_active_agg(aggregator); + + // search for the aggregator with the most ports attached to it. + do { + // count how many candidate lag's we have + if (aggregator->lag_ports) { + num_of_aggs++; + } + if (aggregator->is_active && !aggregator->is_individual && // if current aggregator is the active aggregator + MAC_ADDRESS_COMPARE(&(aggregator->partner_system), &(null_mac_addr))) { // and partner answers to 802.3ad PDUs + if (aggregator->num_of_ports) { // if any ports attached to the current aggregator + best_aggregator=NULL; // disregard the best aggregator that was chosen by now + break; // stop the selection of other aggregator if there are any ports attached to this active aggregator + } else { // no ports attached to this active aggregator + aggregator->is_active = 0; // mark this aggregator as not active anymore + } + } + if (aggregator->num_of_ports) { // if any ports attached + if (best_aggregator) { // if there is a candidte aggregator + //The reasons for choosing new best aggregator: + // 1. if current agg is NOT individual and the best agg chosen so far is individual OR + // current and best aggs are both individual or both not individual, AND + // 2a. current agg partner reply but best agg partner do not reply OR + // 2b. current agg partner reply OR current agg partner do not reply AND best agg partner also do not reply AND + // current has more ports/bandwidth, or same amount of ports but current has faster ports, THEN + // current agg become best agg so far + + //if current agg is NOT individual and the best agg chosen so far is individual change best_aggregator + if (!aggregator->is_individual && best_aggregator->is_individual) { + best_aggregator=aggregator; + } + // current and best aggs are both individual or both not individual + else if ((aggregator->is_individual && best_aggregator->is_individual) || + (!aggregator->is_individual && !best_aggregator->is_individual)) { + // current and best aggs are both individual or both not individual AND + // current agg partner reply but best agg partner do not reply + if ((MAC_ADDRESS_COMPARE(&(aggregator->partner_system), &(null_mac_addr)) && + !MAC_ADDRESS_COMPARE(&(best_aggregator->partner_system), &(null_mac_addr)))) { + best_aggregator=aggregator; + } + // current agg partner reply OR current agg partner do not reply AND best agg partner also do not reply + else if (! (!MAC_ADDRESS_COMPARE(&(aggregator->partner_system), &(null_mac_addr)) && + MAC_ADDRESS_COMPARE(&(best_aggregator->partner_system), &(null_mac_addr)))) { + if ((__get_agg_selection_mode(aggregator->lag_ports) == AD_BANDWIDTH)&& + (__get_agg_bandwidth(aggregator) > __get_agg_bandwidth(best_aggregator))) { + best_aggregator=aggregator; + } else if (__get_agg_selection_mode(aggregator->lag_ports) == AD_COUNT) { + if (((aggregator->num_of_ports > best_aggregator->num_of_ports) && + (aggregator->actor_oper_aggregator_key & AD_SPEED_KEY_BITS))|| + ((aggregator->num_of_ports == best_aggregator->num_of_ports) && + ((u16)(aggregator->actor_oper_aggregator_key & AD_SPEED_KEY_BITS) > + (u16)(best_aggregator->actor_oper_aggregator_key & AD_SPEED_KEY_BITS)))) { + best_aggregator=aggregator; + } + } + } + } + } else { + best_aggregator=aggregator; + } + } + aggregator->is_active = 0; // mark all aggregators as not active anymore + } while ((aggregator = __get_next_agg(aggregator))); + + // if we have new aggregator selected, don't replace the old aggregator if it has an answering partner, + // or if both old aggregator and new aggregator don't have answering partner + if (best_aggregator) { + if (last_active_aggregator && last_active_aggregator->lag_ports && last_active_aggregator->lag_ports->is_enabled && + (MAC_ADDRESS_COMPARE(&(last_active_aggregator->partner_system), &(null_mac_addr)) || // partner answers OR + (!MAC_ADDRESS_COMPARE(&(last_active_aggregator->partner_system), &(null_mac_addr)) && // both old and new + !MAC_ADDRESS_COMPARE(&(best_aggregator->partner_system), &(null_mac_addr)))) // partner do not answer + ) { + // if new aggregator has link, and old aggregator does not, replace old aggregator.(do nothing) + // -> don't replace otherwise. + if (!(!last_active_aggregator->actor_oper_aggregator_key && best_aggregator->actor_oper_aggregator_key)) { + best_aggregator=NULL; + last_active_aggregator->is_active = 1; // don't replace good old aggregator + + } + } + } + + // if there is new best aggregator, activate it + if (best_aggregator) { + for (aggregator = __get_first_agg(best_aggregator->lag_ports); + aggregator; + aggregator = __get_next_agg(aggregator)) { + + BOND_PRINT_DBG(("Agg=%d; Ports=%d; a key=%d; p key=%d; Indiv=%d; Active=%d", + aggregator->aggregator_identifier, aggregator->num_of_ports, + aggregator->actor_oper_aggregator_key, aggregator->partner_oper_aggregator_key, + aggregator->is_individual, aggregator->is_active)); + } + + // check if any partner replys + if (best_aggregator->is_individual) { + printk(KERN_WARNING "bonding: Warning: No 802.3ad response from the link partner " + "for any adapters in the bond\n"); + } + + // check if there are more than one aggregator + if (num_of_aggs > 1) { + BOND_PRINT_DBG(("Warning: More than one Link Aggregation Group was " + "found in the bond. Only one group will function in the bond")); + } + + best_aggregator->is_active = 1; + BOND_PRINT_DBG(("LAG %d choosed as the active LAG", best_aggregator->aggregator_identifier)); + BOND_PRINT_DBG(("Agg=%d; Ports=%d; a key=%d; p key=%d; Indiv=%d; Active=%d", + best_aggregator->aggregator_identifier, best_aggregator->num_of_ports, + best_aggregator->actor_oper_aggregator_key, best_aggregator->partner_oper_aggregator_key, + best_aggregator->is_individual, best_aggregator->is_active)); + + // disable the ports that were related to the former active_aggregator + if (last_active_aggregator) { + for (port=last_active_aggregator->lag_ports; port; port=port->next_port_in_aggregator) { + __disable_port(port); + } + } + } + + // if the selected aggregator is of join individuals(partner_system is NULL), enable their ports + active_aggregator = __get_active_agg(origin_aggregator); + + if (active_aggregator) { + if (!MAC_ADDRESS_COMPARE(&(active_aggregator->partner_system), &(null_mac_addr))) { + for (port=active_aggregator->lag_ports; port; port=port->next_port_in_aggregator) { + __enable_port(port); + } + } + } +} + +/** + * ad_clear_agg - clear a given aggregator's parameters + * @aggregator: the aggregator we're looking at + * + */ +static void ad_clear_agg(struct aggregator *aggregator) +{ + if (aggregator) { + aggregator->is_individual = 0; + aggregator->actor_admin_aggregator_key = 0; + aggregator->actor_oper_aggregator_key = 0; + aggregator->partner_system = null_mac_addr; + aggregator->partner_system_priority = 0; + aggregator->partner_oper_aggregator_key = 0; + aggregator->receive_state = 0; + aggregator->transmit_state = 0; + aggregator->lag_ports = NULL; + aggregator->is_active = 0; + aggregator->num_of_ports = 0; + BOND_PRINT_DBG(("LAG %d was cleared", aggregator->aggregator_identifier)); + } +} + +/** + * ad_initialize_agg - initialize a given aggregator's parameters + * @aggregator: the aggregator we're looking at + * + */ +static void ad_initialize_agg(struct aggregator *aggregator) +{ + if (aggregator) { + ad_clear_agg(aggregator); + + aggregator->aggregator_mac_address = null_mac_addr; + aggregator->aggregator_identifier = 0; + aggregator->slave = NULL; + } +} + +/** + * ad_initialize_port - initialize a given port's parameters + * @aggregator: the aggregator we're looking at + * @lacp_fast: boolean. whether fast periodic should be used + * + */ +static void ad_initialize_port(struct port *port, int lacp_fast) +{ + if (port) { + port->actor_port_number = 1; + port->actor_port_priority = 0xff; + port->actor_system = null_mac_addr; + port->actor_system_priority = 0xffff; + port->actor_port_aggregator_identifier = 0; + port->ntt = 0; + port->actor_admin_port_key = 1; + port->actor_oper_port_key = 1; + port->actor_admin_port_state = AD_STATE_AGGREGATION | AD_STATE_LACP_ACTIVITY; + port->actor_oper_port_state = AD_STATE_AGGREGATION | AD_STATE_LACP_ACTIVITY; + + if (lacp_fast) { + port->actor_oper_port_state |= AD_STATE_LACP_TIMEOUT; + } + + port->partner_admin_system = null_mac_addr; + port->partner_oper_system = null_mac_addr; + port->partner_admin_system_priority = 0xffff; + port->partner_oper_system_priority = 0xffff; + port->partner_admin_key = 1; + port->partner_oper_key = 1; + port->partner_admin_port_number = 1; + port->partner_oper_port_number = 1; + port->partner_admin_port_priority = 0xff; + port->partner_oper_port_priority = 0xff; + port->partner_admin_port_state = 1; + port->partner_oper_port_state = 1; + port->is_enabled = 1; + // ****** private parameters ****** + port->sm_vars = 0x3; + port->sm_rx_state = 0; + port->sm_rx_timer_counter = 0; + port->sm_periodic_state = 0; + port->sm_periodic_timer_counter = 0; + port->sm_mux_state = 0; + port->sm_mux_timer_counter = 0; + port->sm_tx_state = 0; + port->sm_tx_timer_counter = 0; + port->slave = NULL; + port->aggregator = NULL; + port->next_port_in_aggregator = NULL; + port->transaction_id = 0; + + ad_initialize_lacpdu(&(port->lacpdu)); + } +} + +/** + * ad_enable_collecting_distributing - enable a port's transmit/receive + * @port: the port we're looking at + * + * Enable @port if it's in an active aggregator + */ +static void ad_enable_collecting_distributing(struct port *port) +{ + if (port->aggregator->is_active) { + BOND_PRINT_DBG(("Enabling port %d(LAG %d)", port->actor_port_number, port->aggregator->aggregator_identifier)); + __enable_port(port); + } +} + +/** + * ad_disable_collecting_distributing - disable a port's transmit/receive + * @port: the port we're looking at + * + */ +static void ad_disable_collecting_distributing(struct port *port) +{ + if (port->aggregator && MAC_ADDRESS_COMPARE(&(port->aggregator->partner_system), &(null_mac_addr))) { + BOND_PRINT_DBG(("Disabling port %d(LAG %d)", port->actor_port_number, port->aggregator->aggregator_identifier)); + __disable_port(port); + } +} + +#if 0 +/** + * ad_marker_info_send - send a marker information frame + * @port: the port we're looking at + * + * This function does nothing since we decided not to implement send and handle + * response for marker PDU's, in this stage, but only to respond to marker + * information. + */ +static void ad_marker_info_send(struct port *port) +{ + struct marker marker; + u16 index; + + // fill the marker PDU with the appropriate values + marker.subtype = 0x02; + marker.version_number = 0x01; + marker.tlv_type = AD_MARKER_INFORMATION_SUBTYPE; + marker.marker_length = 0x16; + // convert requester_port to Big Endian + marker.requester_port = (((port->actor_port_number & 0xFF) << 8) |((u16)(port->actor_port_number & 0xFF00) >> 8)); + marker.requester_system = port->actor_system; + // convert requester_port(u32) to Big Endian + marker.requester_transaction_id = (((++port->transaction_id & 0xFF) << 24) |((port->transaction_id & 0xFF00) << 8) |((port->transaction_id & 0xFF0000) >> 8) |((port->transaction_id & 0xFF000000) >> 24)); + marker.pad = 0; + marker.tlv_type_terminator = 0x00; + marker.terminator_length = 0x00; + for (index=0; index<90; index++) { + marker.reserved_90[index]=0; + } + + // send the marker information + if (ad_marker_send(port, &marker) >= 0) { + BOND_PRINT_DBG(("Sent Marker Information on port %d", port->actor_port_number)); + } +} +#endif + +/** + * ad_marker_info_received - handle receive of a Marker information frame + * @marker_info: Marker info received + * @port: the port we're looking at + * + */ +static void ad_marker_info_received(struct marker *marker_info,struct port *port) +{ + struct marker marker; + + // copy the received marker data to the response marker + //marker = *marker_info; + memcpy(&marker, marker_info, sizeof(struct marker)); + // change the marker subtype to marker response + marker.tlv_type=AD_MARKER_RESPONSE_SUBTYPE; + // send the marker response + + if (ad_marker_send(port, &marker) >= 0) { + BOND_PRINT_DBG(("Sent Marker Response on port %d", port->actor_port_number)); + } +} + +/** + * ad_marker_response_received - handle receive of a marker response frame + * @marker: marker PDU received + * @port: the port we're looking at + * + * This function does nothing since we decided not to implement send and handle + * response for marker PDU's, in this stage, but only to respond to marker + * information. + */ +static void ad_marker_response_received(struct marker *marker, struct port *port) +{ + marker=NULL; // just to satisfy the compiler + port=NULL; // just to satisfy the compiler + // DO NOTHING, SINCE WE DECIDED NOT TO IMPLEMENT THIS FEATURE FOR NOW +} + +/** + * ad_initialize_lacpdu - initialize a given lacpdu structure + * @lacpdu: lacpdu structure to initialize + * + */ +static void ad_initialize_lacpdu(struct lacpdu *lacpdu) +{ + u16 index; + + // initialize lacpdu data + lacpdu->subtype = 0x01; + lacpdu->version_number = 0x01; + lacpdu->tlv_type_actor_info = 0x01; + lacpdu->actor_information_length = 0x14; + // lacpdu->actor_system_priority updated on send + // lacpdu->actor_system updated on send + // lacpdu->actor_key updated on send + // lacpdu->actor_port_priority updated on send + // lacpdu->actor_port updated on send + // lacpdu->actor_state updated on send + lacpdu->tlv_type_partner_info = 0x02; + lacpdu->partner_information_length = 0x14; + for (index=0; index<=2; index++) { + lacpdu->reserved_3_1[index]=0; + } + // lacpdu->partner_system_priority updated on send + // lacpdu->partner_system updated on send + // lacpdu->partner_key updated on send + // lacpdu->partner_port_priority updated on send + // lacpdu->partner_port updated on send + // lacpdu->partner_state updated on send + for (index=0; index<=2; index++) { + lacpdu->reserved_3_2[index]=0; + } + lacpdu->tlv_type_collector_info = 0x03; + lacpdu->collector_information_length= 0x10; + lacpdu->collector_max_delay = AD_COLLECTOR_MAX_DELAY; + for (index=0; index<=11; index++) { + lacpdu->reserved_12[index]=0; + } + lacpdu->tlv_type_terminator = 0x00; + lacpdu->terminator_length = 0; + for (index=0; index<=49; index++) { + lacpdu->reserved_50[index]=0; + } +} + +////////////////////////////////////////////////////////////////////////////////////// +// ================= AD exported functions to the main bonding code ================== +////////////////////////////////////////////////////////////////////////////////////// + +// Check aggregators status in team every T seconds +#define AD_AGGREGATOR_SELECTION_TIMER 8 + +static u16 aggregator_identifier; + +/** + * bond_3ad_initialize - initialize a bond's 802.3ad parameters and structures + * @bond: bonding struct to work on + * @tick_resolution: tick duration (millisecond resolution) + * @lacp_fast: boolean. whether fast periodic should be used + * + * Can be called only after the mac address of the bond is set. + */ +void bond_3ad_initialize(struct bonding *bond, u16 tick_resolution, int lacp_fast) +{ + // check that the bond is not initialized yet + if (MAC_ADDRESS_COMPARE(&(BOND_AD_INFO(bond).system.sys_mac_addr), &(bond->device->dev_addr))) { + + aggregator_identifier = 0; + + BOND_AD_INFO(bond).lacp_fast = lacp_fast; + BOND_AD_INFO(bond).system.sys_priority = 0xFFFF; + BOND_AD_INFO(bond).system.sys_mac_addr = *((struct mac_addr *)bond->device->dev_addr); + + // initialize how many times this module is called in one second(should be about every 100ms) + ad_ticks_per_sec = tick_resolution; + + // initialize the aggregator selection timer(to activate an aggregation selection after initialize) + BOND_AD_INFO(bond).agg_select_timer = (AD_AGGREGATOR_SELECTION_TIMER * ad_ticks_per_sec); + BOND_AD_INFO(bond).agg_select_mode = AD_BANDWIDTH; + } +} + +/** + * bond_3ad_bind_slave - initialize a slave's port + * @slave: slave struct to work on + * + * Returns: 0 on success + * < 0 on error + */ +int bond_3ad_bind_slave(struct slave *slave) +{ + struct bonding *bond = bond_get_bond_by_slave(slave); + struct port *port; + struct aggregator *aggregator; + + if (bond == NULL) { + printk(KERN_CRIT "The slave %s is not attached to its bond\n", slave->dev->name); + return -1; + } + + //check that the slave has not been intialized yet. + if (SLAVE_AD_INFO(slave).port.slave != slave) { + + // port initialization + port = &(SLAVE_AD_INFO(slave).port); + + ad_initialize_port(port, BOND_AD_INFO(bond).lacp_fast); + + port->slave = slave; + port->actor_port_number = SLAVE_AD_INFO(slave).id; + // key is determined according to the link speed, duplex and user key(which is yet not supported) + // ------------------------------------------------------------ + // Port key : | User key | Speed |Duplex| + // ------------------------------------------------------------ + // 16 6 1 0 + port->actor_admin_port_key = 0; // initialize this parameter + port->actor_admin_port_key |= __get_duplex(port); + port->actor_admin_port_key |= (__get_link_speed(port) << 1); + port->actor_oper_port_key = port->actor_admin_port_key; + // if the port is not full duplex, then the port should be not lacp Enabled + if (!(port->actor_oper_port_key & AD_DUPLEX_KEY_BITS)) { + port->sm_vars &= ~AD_PORT_LACP_ENABLED; + } + // actor system is the bond's system + port->actor_system = BOND_AD_INFO(bond).system.sys_mac_addr; + // tx timer(to verify that no more than MAX_TX_IN_SECOND lacpdu's are sent in one second) + port->sm_tx_timer_counter = ad_ticks_per_sec/AD_MAX_TX_IN_SECOND; + port->aggregator = NULL; + port->next_port_in_aggregator = NULL; + + __disable_port(port); + __initialize_port_locks(port); + + + // aggregator initialization + aggregator = &(SLAVE_AD_INFO(slave).aggregator); + + ad_initialize_agg(aggregator); + + aggregator->aggregator_mac_address = *((struct mac_addr *)bond->device->dev_addr); + aggregator->aggregator_identifier = (++aggregator_identifier); + aggregator->slave = slave; + aggregator->is_active = 0; + aggregator->num_of_ports = 0; + } + + return 0; +} + +/** + * bond_3ad_unbind_slave - deinitialize a slave's port + * @slave: slave struct to work on + * + * Search for the aggregator that is related to this port, remove the + * aggregator and assign another aggregator for other port related to it + * (if any), and remove the port. + */ +void bond_3ad_unbind_slave(struct slave *slave) +{ + struct port *port, *prev_port, *temp_port; + struct aggregator *aggregator, *new_aggregator, *temp_aggregator; + int select_new_active_agg = 0; + + // find the aggregator related to this slave + aggregator = &(SLAVE_AD_INFO(slave).aggregator); + + // find the port related to this slave + port = &(SLAVE_AD_INFO(slave).port); + + // if slave is null, the whole port is not initialized + if (!port->slave) { + printk(KERN_WARNING "bonding: Trying to unbind an uninitialized port on %s\n", slave->dev->name); + return; + } + + BOND_PRINT_DBG(("Unbinding Link Aggregation Group %d", aggregator->aggregator_identifier)); + + /* Tell the partner that this port is not suitable for aggregation */ + port->actor_oper_port_state &= ~AD_STATE_AGGREGATION; + __update_lacpdu_from_port(port); + ad_lacpdu_send(port); + + // check if this aggregator is occupied + if (aggregator->lag_ports) { + // check if there are other ports related to this aggregator except + // the port related to this slave(thats ensure us that there is a + // reason to search for new aggregator, and that we will find one + if ((aggregator->lag_ports != port) || (aggregator->lag_ports->next_port_in_aggregator)) { + // find new aggregator for the related port(s) + new_aggregator = __get_first_agg(port); + for (; new_aggregator; new_aggregator = __get_next_agg(new_aggregator)) { + // if the new aggregator is empty, or it connected to to our port only + if (!new_aggregator->lag_ports || ((new_aggregator->lag_ports == port) && !new_aggregator->lag_ports->next_port_in_aggregator)) { + break; + } + } + // if new aggregator found, copy the aggregator's parameters + // and connect the related lag_ports to the new aggregator + if ((new_aggregator) && ((!new_aggregator->lag_ports) || ((new_aggregator->lag_ports == port) && !new_aggregator->lag_ports->next_port_in_aggregator))) { + BOND_PRINT_DBG(("Some port(s) related to LAG %d - replaceing with LAG %d", aggregator->aggregator_identifier, new_aggregator->aggregator_identifier)); + + if ((new_aggregator->lag_ports == port) && new_aggregator->is_active) { + printk(KERN_INFO "bonding: Removing an active aggregator\n"); + // select new active aggregator + select_new_active_agg = 1; + } + + new_aggregator->is_individual = aggregator->is_individual; + new_aggregator->actor_admin_aggregator_key = aggregator->actor_admin_aggregator_key; + new_aggregator->actor_oper_aggregator_key = aggregator->actor_oper_aggregator_key; + new_aggregator->partner_system = aggregator->partner_system; + new_aggregator->partner_system_priority = aggregator->partner_system_priority; + new_aggregator->partner_oper_aggregator_key = aggregator->partner_oper_aggregator_key; + new_aggregator->receive_state = aggregator->receive_state; + new_aggregator->transmit_state = aggregator->transmit_state; + new_aggregator->lag_ports = aggregator->lag_ports; + new_aggregator->is_active = aggregator->is_active; + new_aggregator->num_of_ports = aggregator->num_of_ports; + + // update the information that is written on the ports about the aggregator + for (temp_port=aggregator->lag_ports; temp_port; temp_port=temp_port->next_port_in_aggregator) { + temp_port->aggregator=new_aggregator; + temp_port->actor_port_aggregator_identifier = new_aggregator->aggregator_identifier; + } + + // clear the aggregator + ad_clear_agg(aggregator); + + if (select_new_active_agg) { + ad_agg_selection_logic(__get_first_agg(port)); + } + } else { + printk(KERN_WARNING "bonding: Warning: unbinding aggregator, " + "and could not find a new aggregator for its ports\n"); + } + } else { // in case that the only port related to this aggregator is the one we want to remove + select_new_active_agg = aggregator->is_active; + // clear the aggregator + ad_clear_agg(aggregator); + if (select_new_active_agg) { + printk(KERN_INFO "Removing an active aggregator\n"); + // select new active aggregator + ad_agg_selection_logic(__get_first_agg(port)); + } + } + } + + BOND_PRINT_DBG(("Unbinding port %d", port->actor_port_number)); + // find the aggregator that this port is connected to + temp_aggregator = __get_first_agg(port); + for (; temp_aggregator; temp_aggregator = __get_next_agg(temp_aggregator)) { + prev_port = NULL; + // search the port in the aggregator's related ports + for (temp_port=temp_aggregator->lag_ports; temp_port; prev_port=temp_port, temp_port=temp_port->next_port_in_aggregator) { + if (temp_port == port) { // the aggregator found - detach the port from this aggregator + if (prev_port) { + prev_port->next_port_in_aggregator = temp_port->next_port_in_aggregator; + } else { + temp_aggregator->lag_ports = temp_port->next_port_in_aggregator; + } + temp_aggregator->num_of_ports--; + if (temp_aggregator->num_of_ports==0) { + select_new_active_agg = temp_aggregator->is_active; + // clear the aggregator + ad_clear_agg(temp_aggregator); + if (select_new_active_agg) { + printk(KERN_INFO "Removing an active aggregator\n"); + // select new active aggregator + ad_agg_selection_logic(__get_first_agg(port)); + } + } + break; + } + } + } + port->slave=NULL; +} + +/** + * bond_3ad_state_machine_handler - handle state machines timeout + * @bond: bonding struct to work on + * + * The state machine handling concept in this module is to check every tick + * which state machine should operate any function. The execution order is + * round robin, so when we have an interaction between state machines, the + * reply of one to each other might be delayed until next tick. + * + * This function also complete the initialization when the agg_select_timer + * times out, and it selects an aggregator for the ports that are yet not + * related to any aggregator, and selects the active aggregator for a bond. + */ +void bond_3ad_state_machine_handler(struct bonding *bond) +{ + struct port *port; + struct aggregator *aggregator; + + read_lock(&bond->lock); + + //check if there are any slaves + if (bond->next == (struct slave *)bond) { + goto end; + } + + if ((bond->device->flags & IFF_UP) != IFF_UP) { + goto end; + } + + // check if agg_select_timer timer after initialize is timed out + if (BOND_AD_INFO(bond).agg_select_timer && !(--BOND_AD_INFO(bond).agg_select_timer)) { + // select the active aggregator for the bond + if ((port = __get_first_port(bond))) { + if (!port->slave) { + printk(KERN_WARNING "bonding: Warning: bond's first port is uninitialized\n"); + goto end; + } + + aggregator = __get_first_agg(port); + ad_agg_selection_logic(aggregator); + } + } + + // for each port run the state machines + for (port = __get_first_port(bond); port; port = __get_next_port(port)) { + if (!port->slave) { + printk(KERN_WARNING "bonding: Warning: Found an uninitialized port\n"); + goto end; + } + + ad_rx_machine(NULL, port); + ad_periodic_machine(port); + ad_port_selection_logic(port); + ad_mux_machine(port); + ad_tx_machine(port); + + // turn off the BEGIN bit, since we already handled it + if (port->sm_vars & AD_PORT_BEGIN) { + port->sm_vars &= ~AD_PORT_BEGIN; + } + } + +end: + read_unlock(&bond->lock); + + + if ((bond->device->flags & IFF_UP) == IFF_UP) { + /* re-arm the timer */ + mod_timer(&(BOND_AD_INFO(bond).ad_timer), jiffies + (AD_TIMER_INTERVAL * HZ / 1000)); + } +} + +/** + * bond_3ad_rx_indication - handle a received frame + * @lacpdu: received lacpdu + * @slave: slave struct to work on + * @length: length of the data received + * + * It is assumed that frames that were sent on this NIC don't returned as new + * received frames (loopback). Since only the payload is given to this + * function, it check for loopback. + */ +void bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, u16 length) +{ + struct port *port; + + if (length >= sizeof(struct lacpdu)) { + + port = &(SLAVE_AD_INFO(slave).port); + + if (!port->slave) { + printk(KERN_WARNING "bonding: Warning: port of slave %s is uninitialized\n", slave->dev->name); + return; + } + + switch (lacpdu->subtype) { + case AD_TYPE_LACPDU: + __ntohs_lacpdu(lacpdu); + BOND_PRINT_DBG(("Received LACPDU on port %d", port->actor_port_number)); + ad_rx_machine(lacpdu, port); + break; + + case AD_TYPE_MARKER: + // No need to convert fields to Little Endian since we don't use the marker's fields. + + switch (((struct marker *)lacpdu)->tlv_type) { + case AD_MARKER_INFORMATION_SUBTYPE: + BOND_PRINT_DBG(("Received Marker Information on port %d", port->actor_port_number)); + ad_marker_info_received((struct marker *)lacpdu, port); + break; + + case AD_MARKER_RESPONSE_SUBTYPE: + BOND_PRINT_DBG(("Received Marker Response on port %d", port->actor_port_number)); + ad_marker_response_received((struct marker *)lacpdu, port); + break; + + default: + BOND_PRINT_DBG(("Received an unknown Marker subtype on slot %d", port->actor_port_number)); + } + } + } +} + +/** + * bond_3ad_adapter_speed_changed - handle a slave's speed change indication + * @slave: slave struct to work on + * + * Handle reselection of aggregator (if needed) for this port. + */ +void bond_3ad_adapter_speed_changed(struct slave *slave) +{ + struct port *port; + + port = &(SLAVE_AD_INFO(slave).port); + + // if slave is null, the whole port is not initialized + if (!port->slave) { + printk(KERN_WARNING "bonding: Warning: speed changed for uninitialized port on %s\n", + slave->dev->name); + return; + } + + port->actor_admin_port_key &= ~AD_SPEED_KEY_BITS; + port->actor_oper_port_key=port->actor_admin_port_key |= (__get_link_speed(port) << 1); + BOND_PRINT_DBG(("Port %d changed speed", port->actor_port_number)); + // there is no need to reselect a new aggregator, just signal the + // state machines to reinitialize + port->sm_vars |= AD_PORT_BEGIN; +} + +/** + * bond_3ad_adapter_duplex_changed - handle a slave's duplex change indication + * @slave: slave struct to work on + * + * Handle reselection of aggregator (if needed) for this port. + */ +void bond_3ad_adapter_duplex_changed(struct slave *slave) +{ + struct port *port; + + port=&(SLAVE_AD_INFO(slave).port); + + // if slave is null, the whole port is not initialized + if (!port->slave) { + printk(KERN_WARNING "bonding: Warning: duplex changed for uninitialized port on %s\n", + slave->dev->name); + return; + } + + port->actor_admin_port_key &= ~AD_DUPLEX_KEY_BITS; + port->actor_oper_port_key=port->actor_admin_port_key |= __get_duplex(port); + BOND_PRINT_DBG(("Port %d changed duplex", port->actor_port_number)); + // there is no need to reselect a new aggregator, just signal the + // state machines to reinitialize + port->sm_vars |= AD_PORT_BEGIN; +} + +/** + * bond_3ad_handle_link_change - handle a slave's link status change indication + * @slave: slave struct to work on + * @status: whether the link is now up or down + * + * Handle reselection of aggregator (if needed) for this port. + */ +void bond_3ad_handle_link_change(struct slave *slave, char link) +{ + struct port *port; + + port = &(SLAVE_AD_INFO(slave).port); + + // if slave is null, the whole port is not initialized + if (!port->slave) { +#ifdef BONDING_DEBUG + printk(KERN_WARNING "bonding: Warning: link status changed for uninitialized port on %s\n", + slave->dev->name); +#endif + return; + } + + // on link down we are zeroing duplex and speed since some of the adaptors(ce1000.lan) report full duplex/speed instead of N/A(duplex) / 0(speed) + // on link up we are forcing recheck on the duplex and speed since some of he adaptors(ce1000.lan) report + if (link == BOND_LINK_UP) { + port->is_enabled = 1; + port->actor_admin_port_key &= ~AD_DUPLEX_KEY_BITS; + port->actor_oper_port_key=port->actor_admin_port_key |= __get_duplex(port); + port->actor_admin_port_key &= ~AD_SPEED_KEY_BITS; + port->actor_oper_port_key=port->actor_admin_port_key |= (__get_link_speed(port) << 1); + } else { + /* link has failed */ + port->is_enabled = 0; + port->actor_admin_port_key &= ~AD_DUPLEX_KEY_BITS; + port->actor_oper_port_key= (port->actor_admin_port_key &= ~AD_SPEED_KEY_BITS); + } + //BOND_PRINT_DBG(("Port %d changed link status to %s", port->actor_port_number, ((link == BOND_LINK_UP)?"UP":"DOWN"))); + // there is no need to reselect a new aggregator, just signal the + // state machines to reinitialize + port->sm_vars |= AD_PORT_BEGIN; +} + +/** + * bond_3ad_get_active_agg_info - get information of the active aggregator + * @bond: bonding struct to work on + * @ad_info: ad_info struct to fill with the bond's info + * + * Returns: 0 on success + * < 0 on error + */ +int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info) +{ + struct aggregator *aggregator = NULL; + struct port *port; + + for (port = __get_first_port(bond); port; port = __get_next_port(port)) { + if (port->aggregator && port->aggregator->is_active) { + aggregator = port->aggregator; + break; + } + } + + if (aggregator) { + ad_info->aggregator_id = aggregator->aggregator_identifier; + ad_info->ports = aggregator->num_of_ports; + ad_info->actor_key = aggregator->actor_oper_aggregator_key; + ad_info->partner_key = aggregator->partner_oper_aggregator_key; + memcpy(ad_info->partner_system, aggregator->partner_system.mac_addr_value, ETH_ALEN); + return 0; + } + + return -1; +} + +int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev) +{ + slave_t *slave, *start_at; + struct bonding *bond = (struct bonding *) dev->priv; + struct ethhdr *data = (struct ethhdr *)skb->data; + int slave_agg_no; + int slaves_in_agg; + int agg_id; + struct ad_info ad_info; + + if (!IS_UP(dev)) { /* bond down */ + dev_kfree_skb(skb); + return 0; + } + + if (bond == NULL) { + printk(KERN_CRIT "bonding: Error: bond is NULL on device %s\n", dev->name); + dev_kfree_skb(skb); + return 0; + } + + read_lock(&bond->lock); + slave = bond->prev; + + /* check if bond is empty */ + if ((slave == (struct slave *) bond) || (bond->slave_cnt == 0)) { + printk(KERN_DEBUG "ERROR: bond is empty\n"); + dev_kfree_skb(skb); + read_unlock(&bond->lock); + return 0; + } + + if (bond_3ad_get_active_agg_info(bond, &ad_info)) { + printk(KERN_DEBUG "ERROR: bond_3ad_get_active_agg_info failed\n"); + dev_kfree_skb(skb); + read_unlock(&bond->lock); + return 0; + } + + slaves_in_agg = ad_info.ports; + agg_id = ad_info.aggregator_id; + + if (slaves_in_agg == 0) { + /*the aggregator is empty*/ + printk(KERN_DEBUG "ERROR: active aggregator is empty\n"); + dev_kfree_skb(skb); + read_unlock(&bond->lock); + return 0; + } + + /* we're at the root, get the first slave */ + if ((slave == NULL) || (slave->dev == NULL)) { + /* no suitable interface, frame not sent */ + dev_kfree_skb(skb); + read_unlock(&bond->lock); + return 0; + } + + slave_agg_no = (data->h_dest[5]^slave->dev->dev_addr[5]) % slaves_in_agg; + while (slave != (slave_t *)bond) { + struct aggregator *agg = SLAVE_AD_INFO(slave).port.aggregator; + + if (agg && (agg->aggregator_identifier == agg_id)) { + slave_agg_no--; + if (slave_agg_no < 0) { + break; + } + } + + slave = slave->prev; + if (slave == NULL) { + printk(KERN_ERR "bonding: Error: slave is NULL\n"); + dev_kfree_skb(skb); + read_unlock(&bond->lock); + return 0; + } + } + + if (slave == (slave_t *)bond) { + printk(KERN_ERR "bonding: Error: Couldn't find a slave to tx on for aggregator ID %d\n", agg_id); + dev_kfree_skb(skb); + read_unlock(&bond->lock); + return 0; + } + + start_at = slave; + + do { + int slave_agg_id = 0; + struct aggregator *agg; + + if (slave == NULL) { + printk(KERN_ERR "bonding: Error: slave is NULL\n"); + dev_kfree_skb(skb); + read_unlock(&bond->lock); + return 0; + } + + agg = SLAVE_AD_INFO(slave).port.aggregator; + + if (agg) { + slave_agg_id = agg->aggregator_identifier; + } + + if (SLAVE_IS_OK(slave) && + agg && (slave_agg_id == agg_id)) { + skb->dev = slave->dev; + skb->priority = 1; + dev_queue_xmit(skb); + read_unlock(&bond->lock); + return 0; + } + } while ((slave = slave->next) != start_at); + + /* no suitable interface, frame not sent */ + dev_kfree_skb(skb); + read_unlock(&bond->lock); + return 0; +} + +int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type* ptype) +{ + struct bonding *bond = (struct bonding *)dev->priv; + struct slave *slave = NULL; + int ret = NET_RX_DROP; + + if (!(dev->flags & IFF_MASTER)) { + goto out; + } + + read_lock(&bond->lock); + slave = bond_get_slave_by_dev((struct bonding *)dev->priv, + skb->real_dev); + if (slave == NULL) { + goto out_unlock; + } + + bond_3ad_rx_indication((struct lacpdu *) skb->data, slave, skb->len); + + ret = NET_RX_SUCCESS; + +out_unlock: + read_unlock(&bond->lock); +out: + dev_kfree_skb(skb); + + return ret; +} + diff -Nru a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h --- /dev/null Wed Dec 31 16:00:00 1969 +++ b/drivers/net/bonding/bond_3ad.h Thu Jun 19 23:46:53 2003 @@ -0,0 +1,298 @@ +/* + * Copyright(c) 1999 - 2003 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called LICENSE. + * + * + * Changes: + * + * 2003/05/01 - Tsippy Mendelson and + * Amir Noam + * - Added support for lacp_rate module param. + * + * 2003/05/01 - Shmulik Hen + * - Renamed bond_3ad_link_status_changed() to + * bond_3ad_handle_link_change() for compatibility with TLB. + */ + +#ifndef __BOND_3AD_H__ +#define __BOND_3AD_H__ + +#include +#include +#include + +// General definitions +#define BOND_ETH_P_LACPDU 0x8809 +#define PKT_TYPE_LACPDU __constant_htons(BOND_ETH_P_LACPDU) +#define AD_TIMER_INTERVAL 100 /*msec*/ + +#define MULTICAST_LACPDU_ADDR {0x01, 0x80, 0xC2, 0x00, 0x00, 0x02} +#define AD_MULTICAST_LACPDU_ADDR {MULTICAST_LACPDU_ADDR} + +#define AD_LACP_SLOW 0 +#define AD_LACP_FAST 1 + +typedef struct mac_addr { + u8 mac_addr_value[ETH_ALEN]; +} mac_addr_t; + +typedef enum { + AD_BANDWIDTH = 0, + AD_COUNT +} agg_selection_t; + +// rx machine states(43.4.11 in the 802.3ad standard) +typedef enum { + AD_RX_DUMMY, + AD_RX_INITIALIZE, // rx Machine + AD_RX_PORT_DISABLED, // rx Machine + AD_RX_LACP_DISABLED, // rx Machine + AD_RX_EXPIRED, // rx Machine + AD_RX_DEFAULTED, // rx Machine + AD_RX_CURRENT // rx Machine +} rx_states_t; + +// periodic machine states(43.4.12 in the 802.3ad standard) +typedef enum { + AD_PERIODIC_DUMMY, + AD_NO_PERIODIC, // periodic machine + AD_FAST_PERIODIC, // periodic machine + AD_SLOW_PERIODIC, // periodic machine + AD_PERIODIC_TX // periodic machine +} periodic_states_t; + +// mux machine states(43.4.13 in the 802.3ad standard) +typedef enum { + AD_MUX_DUMMY, + AD_MUX_DETACHED, // mux machine + AD_MUX_WAITING, // mux machine + AD_MUX_ATTACHED, // mux machine + AD_MUX_COLLECTING_DISTRIBUTING // mux machine +} mux_states_t; + +// tx machine states(43.4.15 in the 802.3ad standard) +typedef enum { + AD_TX_DUMMY, + AD_TRANSMIT // tx Machine +} tx_states_t; + +// rx indication types +typedef enum { + AD_TYPE_LACPDU = 1, // type lacpdu + AD_TYPE_MARKER // type marker +} pdu_type_t; + +// rx marker indication types +typedef enum { + AD_MARKER_INFORMATION_SUBTYPE = 1, // marker imformation subtype + AD_MARKER_RESPONSE_SUBTYPE // marker response subtype +} marker_subtype_t; + +// timers types(43.4.9 in the 802.3ad standard) +typedef enum { + AD_CURRENT_WHILE_TIMER, + AD_ACTOR_CHURN_TIMER, + AD_PERIODIC_TIMER, + AD_PARTNER_CHURN_TIMER, + AD_WAIT_WHILE_TIMER +} ad_timers_t; + +#pragma pack(1) + +typedef struct ad_header { + struct mac_addr destination_address; + struct mac_addr source_address; + u16 length_type; +} ad_header_t; + +// Link Aggregation Control Protocol(LACP) data unit structure(43.4.2.2 in the 802.3ad standard) +typedef struct lacpdu { + u8 subtype; // = LACP(= 0x01) + u8 version_number; + u8 tlv_type_actor_info; // = actor information(type/length/value) + u8 actor_information_length; // = 20 + u16 actor_system_priority; + struct mac_addr actor_system; + u16 actor_key; + u16 actor_port_priority; + u16 actor_port; + u8 actor_state; + u8 reserved_3_1[3]; // = 0 + u8 tlv_type_partner_info; // = partner information + u8 partner_information_length; // = 20 + u16 partner_system_priority; + struct mac_addr partner_system; + u16 partner_key; + u16 partner_port_priority; + u16 partner_port; + u8 partner_state; + u8 reserved_3_2[3]; // = 0 + u8 tlv_type_collector_info; // = collector information + u8 collector_information_length; // = 16 + u16 collector_max_delay; + u8 reserved_12[12]; + u8 tlv_type_terminator; // = terminator + u8 terminator_length; // = 0 + u8 reserved_50[50]; // = 0 +} lacpdu_t; + +typedef struct lacpdu_header { + struct ad_header ad_header; + struct lacpdu lacpdu; +} lacpdu_header_t; + +// Marker Protocol Data Unit(PDU) structure(43.5.3.2 in the 802.3ad standard) +typedef struct marker { + u8 subtype; // = 0x02 (marker PDU) + u8 version_number; // = 0x01 + u8 tlv_type; // = 0x01 (marker information) + // = 0x02 (marker response information) + u8 marker_length; // = 0x16 + u16 requester_port; // The number assigned to the port by the requester + struct mac_addr requester_system; // The requester’s system id + u32 requester_transaction_id; // The transaction id allocated by the requester, + u16 pad; // = 0 + u8 tlv_type_terminator; // = 0x00 + u8 terminator_length; // = 0x00 + u8 reserved_90[90]; // = 0 +} marker_t; + +typedef struct marker_header { + struct ad_header ad_header; + struct marker marker; +} marker_header_t; + +#pragma pack() + +struct slave; +struct bonding; +struct ad_info; +struct port; + +#ifdef __ia64__ +#pragma pack(8) +#endif + +// aggregator structure(43.4.5 in the 802.3ad standard) +typedef struct aggregator { + struct mac_addr aggregator_mac_address; + u16 aggregator_identifier; + u16 is_individual; // BOOLEAN + u16 actor_admin_aggregator_key; + u16 actor_oper_aggregator_key; + struct mac_addr partner_system; + u16 partner_system_priority; + u16 partner_oper_aggregator_key; + u16 receive_state; // BOOLEAN + u16 transmit_state; // BOOLEAN + struct port *lag_ports; + // ****** PRIVATE PARAMETERS ****** + struct slave *slave; // pointer to the bond slave that this aggregator belongs to + u16 is_active; // BOOLEAN. Indicates if this aggregator is active + u16 num_of_ports; +} aggregator_t; + +// port structure(43.4.6 in the 802.3ad standard) +typedef struct port { + u16 actor_port_number; + u16 actor_port_priority; + struct mac_addr actor_system; // This parameter is added here although it is not specified in the standard, just for simplification + u16 actor_system_priority; // This parameter is added here although it is not specified in the standard, just for simplification + u16 actor_port_aggregator_identifier; + u16 ntt; // BOOLEAN + u16 actor_admin_port_key; + u16 actor_oper_port_key; + u8 actor_admin_port_state; + u8 actor_oper_port_state; + struct mac_addr partner_admin_system; + struct mac_addr partner_oper_system; + u16 partner_admin_system_priority; + u16 partner_oper_system_priority; + u16 partner_admin_key; + u16 partner_oper_key; + u16 partner_admin_port_number; + u16 partner_oper_port_number; + u16 partner_admin_port_priority; + u16 partner_oper_port_priority; + u8 partner_admin_port_state; + u8 partner_oper_port_state; + u16 is_enabled; // BOOLEAN + // ****** PRIVATE PARAMETERS ****** + u16 sm_vars; // all state machines variables for this port + rx_states_t sm_rx_state; // state machine rx state + u16 sm_rx_timer_counter; // state machine rx timer counter + periodic_states_t sm_periodic_state;// state machine periodic state + u16 sm_periodic_timer_counter; // state machine periodic timer counter + mux_states_t sm_mux_state; // state machine mux state + u16 sm_mux_timer_counter; // state machine mux timer counter + tx_states_t sm_tx_state; // state machine tx state + u16 sm_tx_timer_counter; // state machine tx timer counter(allways on - enter to transmit state 3 time per second) + struct slave *slave; // pointer to the bond slave that this port belongs to + struct aggregator *aggregator; // pointer to an aggregator that this port related to + struct port *next_port_in_aggregator; // Next port on the linked list of the parent aggregator + u32 transaction_id; // continuous number for identification of Marker PDU's; + struct lacpdu lacpdu; // the lacpdu that will be sent for this port +} port_t; + +// system structure +typedef struct ad_system { + u16 sys_priority; + struct mac_addr sys_mac_addr; +} ad_system_t; + +#ifdef __ia64__ +#pragma pack() +#endif + +// ================= AD Exported structures to the main bonding code ================== +#define BOND_AD_INFO(bond) ((bond)->ad_info) +#define SLAVE_AD_INFO(slave) ((slave)->ad_info) + +struct ad_bond_info { + ad_system_t system; // 802.3ad system structure + u32 agg_select_timer; // Timer to select aggregator after all adapter's hand shakes + u32 agg_select_mode; // Mode of selection of active aggregator(bandwidth/count) + int lacp_fast; /* whether fast periodic tx should be + * requested + */ + struct timer_list ad_timer; + struct packet_type ad_pkt_type; +}; + +struct ad_slave_info { + struct aggregator aggregator; // 802.3ad aggregator structure + struct port port; // 802.3ad port structure + spinlock_t rx_machine_lock; // To avoid race condition between callback and receive interrupt + u16 id; +}; + +// ================= AD Exported functions to the main bonding code ================== +void bond_3ad_initialize(struct bonding *bond, u16 tick_resolution, int lacp_fast); +int bond_3ad_bind_slave(struct slave *slave); +void bond_3ad_unbind_slave(struct slave *slave); +void bond_3ad_state_machine_handler(struct bonding *bond); +void bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, u16 length); +void bond_3ad_adapter_speed_changed(struct slave *slave); +void bond_3ad_adapter_duplex_changed(struct slave *slave); +void bond_3ad_handle_link_change(struct slave *slave, char link); +int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info); +int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev); +int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type* ptype); +#endif //__BOND_3AD_H__ + diff -Nru a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c --- /dev/null Wed Dec 31 16:00:00 1969 +++ b/drivers/net/bonding/bond_alb.c Thu Jun 19 23:46:53 2003 @@ -0,0 +1,1569 @@ +/* + * Copyright(c) 1999 - 2003 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called LICENSE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "bonding.h" +#include "bond_alb.h" + + +#define ALB_TIMER_TICKS_PER_SEC 10 /* should be a divisor of HZ */ +#define BOND_TLB_REBALANCE_INTERVAL 10 /* in seconds, periodic re-balancing + * used for division - never set + * to zero !!! + */ +#define BOND_ALB_LP_INTERVAL 1 /* in seconds periodic send of + * learning packets to the switch + */ + +#define BOND_TLB_REBALANCE_TICKS (BOND_TLB_REBALANCE_INTERVAL \ + * ALB_TIMER_TICKS_PER_SEC) + +#define BOND_ALB_LP_TICKS (BOND_ALB_LP_INTERVAL \ + * ALB_TIMER_TICKS_PER_SEC) + +#define TLB_HASH_TABLE_SIZE 256 /* The size of the clients hash table. + * Note that this value MUST NOT be smaller + * because the key hash table BYTE wide ! + */ + + +#define TLB_NULL_INDEX 0xffffffff +#define MAX_LP_RETRY 3 + +/* rlb defs */ +#define RLB_HASH_TABLE_SIZE 256 +#define RLB_NULL_INDEX 0xffffffff +#define RLB_UPDATE_DELAY 2*ALB_TIMER_TICKS_PER_SEC /* 2 seconds */ +#define RLB_ARP_BURST_SIZE 2 +#define RLB_UPDATE_RETRY 3 /* 3-ticks - must be smaller than the rlb + * rebalance interval (5 min). + */ +/* RLB_PROMISC_TIMEOUT = 10 sec equals the time that the current slave is + * promiscuous after failover + */ +#define RLB_PROMISC_TIMEOUT 10*ALB_TIMER_TICKS_PER_SEC + +#pragma pack(1) +struct learning_pkt { + u8 mac_dst[ETH_ALEN]; + u8 mac_src[ETH_ALEN]; + u16 type; + u8 padding[ETH_ZLEN - (2*ETH_ALEN + 2)]; +}; + +struct arp_pkt { + u16 hw_addr_space; + u16 prot_addr_space; + u8 hw_addr_len; + u8 prot_addr_len; + u16 op_code; + u8 mac_src[ETH_ALEN]; /* sender hardware address */ + u32 ip_src; /* sender IP address */ + u8 mac_dst[ETH_ALEN]; /* target hardware address */ + u32 ip_dst; /* target IP address */ +}; +#pragma pack() + +/* Forward declaration */ +static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]); + +static inline u8 +_simple_hash(u8 *hash_start, int hash_size) +{ + int i; + u8 hash = 0; + + for (i=0; iload_history = 1 + entry->tx_bytes / + BOND_TLB_REBALANCE_INTERVAL; + entry->tx_bytes = 0; + } + entry->tx_slave = NULL; + entry->next = TLB_NULL_INDEX; + entry->prev = TLB_NULL_INDEX; +} + +static inline void +tlb_init_slave(struct slave *slave) +{ + struct tlb_slave_info *slave_info = &(SLAVE_TLB_INFO(slave)); + + slave_info->load = 0; + slave_info->head = TLB_NULL_INDEX; +} + +/* Caller must hold bond lock for read */ +static inline void +tlb_clear_slave(struct bonding *bond, struct slave *slave, u8 save_load) +{ + struct tlb_client_info *tx_hash_table = NULL; + u32 index, next_index; + + /* clear slave from tx_hashtbl */ + _lock_tx_hashtbl(bond); + tx_hash_table = BOND_ALB_INFO(bond).tx_hashtbl; + + if (tx_hash_table) { + index = SLAVE_TLB_INFO(slave).head; + while (index != TLB_NULL_INDEX) { + next_index = tx_hash_table[index].next; + tlb_init_table_entry(bond, index, save_load); + index = next_index; + } + } + _unlock_tx_hashtbl(bond); + + tlb_init_slave(slave); +} + +/* Must be called before starting the monitor timer */ +static int +tlb_initialize(struct bonding *bond) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + int i; + size_t size; + +#if(TLB_HASH_TABLE_SIZE != 256) + /* Key to the hash table is byte wide. Check the size! */ + #error Hash Table size is wrong. +#endif + + spin_lock_init(&(bond_info->tx_hashtbl_lock)); + + _lock_tx_hashtbl(bond); + if (bond_info->tx_hashtbl != NULL) { + printk (KERN_ERR "%s: TLB hash table is not NULL\n", + bond->device->name); + _unlock_tx_hashtbl(bond); + return -1; + } + + size = TLB_HASH_TABLE_SIZE * sizeof(struct tlb_client_info); + bond_info->tx_hashtbl = kmalloc(size, GFP_KERNEL); + if (bond_info->tx_hashtbl == NULL) { + printk (KERN_ERR "%s: Failed to allocate TLB hash table\n", + bond->device->name); + _unlock_tx_hashtbl(bond); + return -1; + } + + memset(bond_info->tx_hashtbl, 0, size); + for (i=0; itx_hashtbl == NULL) { + _unlock_tx_hashtbl(bond); + return; + } + kfree(bond_info->tx_hashtbl); + bond_info->tx_hashtbl = NULL; + _unlock_tx_hashtbl(bond); +} + +/* Caller must hold bond lock for read */ +static struct slave* +tlb_get_least_loaded_slave(struct bonding *bond) +{ + struct slave *slave; + struct slave *least_loaded; + u32 curr_gap, max_gap; + + /* Find the first enabled slave */ + slave = bond_get_first_slave(bond); + while (slave) { + if (SLAVE_IS_OK(slave)) { + break; + } + slave = bond_get_next_slave(bond, slave); + } + + if (!slave) { + return NULL; + } + + least_loaded = slave; + max_gap = (slave->speed * 1000000) - + (SLAVE_TLB_INFO(slave).load * 8); + + /* Find the slave with the largest gap */ + slave = bond_get_next_slave(bond, slave); + while (slave) { + if (SLAVE_IS_OK(slave)) { + curr_gap = (slave->speed * 1000000) - + (SLAVE_TLB_INFO(slave).load * 8); + if (max_gap < curr_gap) { + least_loaded = slave; + max_gap = curr_gap; + } + } + slave = bond_get_next_slave(bond, slave); + } + + return least_loaded; +} + +/* Caller must hold bond lock for read */ +struct slave* +tlb_choose_channel(struct bonding *bond, u32 hash_index, u32 skb_len) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + struct tlb_client_info *hash_table = NULL; + struct slave *assigned_slave = NULL; + + _lock_tx_hashtbl(bond); + + hash_table = bond_info->tx_hashtbl; + if (hash_table == NULL) { + printk (KERN_ERR "%s: TLB hash table is NULL\n", + bond->device->name); + _unlock_tx_hashtbl(bond); + return NULL; + } + + assigned_slave = hash_table[hash_index].tx_slave; + if (!assigned_slave) { + assigned_slave = tlb_get_least_loaded_slave(bond); + + if (assigned_slave) { + struct tlb_slave_info *slave_info = + &(SLAVE_TLB_INFO(assigned_slave)); + u32 next_index = slave_info->head; + + hash_table[hash_index].tx_slave = assigned_slave; + hash_table[hash_index].next = next_index; + hash_table[hash_index].prev = TLB_NULL_INDEX; + + if (next_index != TLB_NULL_INDEX) { + hash_table[next_index].prev = hash_index; + } + + slave_info->head = hash_index; + slave_info->load += + hash_table[hash_index].load_history; + } + } + + if (assigned_slave) { + hash_table[hash_index].tx_bytes += skb_len; + } + + _unlock_tx_hashtbl(bond); + + return assigned_slave; +} + +/*********************** rlb specific functions ***************************/ +static inline void +_lock_rx_hashtbl(struct bonding *bond) +{ + spin_lock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock)); +} + +static inline void +_unlock_rx_hashtbl(struct bonding *bond) +{ + spin_unlock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock)); +} + +/* when an ARP REPLY is received from a client update its info + * in the rx_hashtbl + */ +static void +rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp) +{ + u32 hash_index; + struct rlb_client_info *client_info = NULL; + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + + _lock_rx_hashtbl(bond); + + if (bond_info->rx_hashtbl == NULL) { + _unlock_rx_hashtbl(bond); + return; + } + hash_index = _simple_hash((u8*)&(arp->ip_src), 4); + client_info = &(bond_info->rx_hashtbl[hash_index]); + + if ((client_info->assigned) && + (client_info->ip_src == arp->ip_dst) && + (client_info->ip_dst == arp->ip_src)) { + + /* update the clients MAC address */ + memcpy(client_info->mac_dst, arp->mac_src, ETH_ALEN); + client_info->ntt = 1; + bond_info->rx_ntt = 1; + } + + _unlock_rx_hashtbl(bond); +} + +static int +rlb_arp_recv(struct sk_buff *skb, + struct net_device *dev, + struct packet_type* ptype) +{ + struct bonding *bond = (struct bonding *)dev->priv; + int ret = NET_RX_DROP; + struct arp_pkt *arp = (struct arp_pkt *)skb->data; + + if (!(dev->flags & IFF_MASTER)) { + goto out; + } + + if (!arp) { + printk(KERN_ERR "Packet has no ARP data\n"); + goto out; + } + + if (skb->len < sizeof(struct arp_pkt)) { + printk(KERN_ERR "Packet is too small to be an ARP\n"); + goto out; + } + + if (arp->op_code == htons(ARPOP_REPLY)) { + /* update rx hash table for this ARP */ + rlb_update_entry_from_arp(bond, arp); + BOND_PRINT_DBG(("Server received an ARP Reply from client")); + } + + ret = NET_RX_SUCCESS; + +out: + dev_kfree_skb(skb); + + return ret; +} + +/* Caller must hold bond lock for read */ +static struct slave* +rlb_next_rx_slave(struct bonding *bond) +{ + struct slave *rx_slave = NULL, *slave = NULL; + unsigned int i = 0; + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + + slave = bond_info->next_rx_slave; + if (slave == NULL) { + slave = bond->next; + } + + /* this loop uses the circular linked list property of the + * slave's list to go through all slaves + */ + for (i = 0; i < bond->slave_cnt; i++, slave = slave->next) { + + if (SLAVE_IS_OK(slave)) { + if (!rx_slave) { + rx_slave = slave; + } + else if (slave->speed > rx_slave->speed) { + rx_slave = slave; + } + } + } + + if (rx_slave) { + bond_info->next_rx_slave = rx_slave->next; + } + + return rx_slave; +} + +/* teach the switch the mac of a disabled slave + * on the primary for fault tolerance + * + * Caller must hold bond->ptrlock for write or bond lock for write + */ +static void +rlb_teach_disabled_mac_on_primary(struct bonding *bond, u8 addr[]) +{ + if (!bond->current_slave) { + return; + } + if (!bond->alb_info.primary_is_promisc) { + bond->alb_info.primary_is_promisc = 1; + dev_set_promiscuity(bond->current_slave->dev, 1); + } + bond->alb_info.rlb_promisc_timeout_counter = 0; + + alb_send_learning_packets(bond->current_slave, addr); +} + +/* slave being removed should not be active at this point + * + * Caller must hold bond lock for read + */ +static void +rlb_clear_slave(struct bonding *bond, struct slave *slave) +{ + struct rlb_client_info *rx_hash_table = NULL; + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + u8 mac_bcast[ETH_ALEN] = {0xff,0xff,0xff,0xff,0xff,0xff}; + u32 index, next_index; + + /* clear slave from rx_hashtbl */ + _lock_rx_hashtbl(bond); + rx_hash_table = bond_info->rx_hashtbl; + + if (rx_hash_table == NULL) { + _unlock_rx_hashtbl(bond); + return; + } + + index = bond_info->rx_hashtbl_head; + for (; index != RLB_NULL_INDEX; index = next_index) { + next_index = rx_hash_table[index].next; + + if (rx_hash_table[index].slave == slave) { + struct slave *assigned_slave = rlb_next_rx_slave(bond); + + if (assigned_slave) { + rx_hash_table[index].slave = assigned_slave; + if (memcmp(rx_hash_table[index].mac_dst, + mac_bcast, ETH_ALEN)) { + bond_info->rx_hashtbl[index].ntt = 1; + bond_info->rx_ntt = 1; + /* A slave has been removed from the + * table because it is either disabled + * or being released. We must retry the + * update to avoid clients from not + * being updated & disconnecting when + * there is stress + */ + bond_info->rlb_update_retry_counter = + RLB_UPDATE_RETRY; + } + } else { /* there is no active slave */ + rx_hash_table[index].slave = NULL; + } + } + } + + _unlock_rx_hashtbl(bond); + + write_lock(&bond->ptrlock); + if (slave != bond->current_slave) { + rlb_teach_disabled_mac_on_primary(bond, slave->dev->dev_addr); + } + write_unlock(&bond->ptrlock); +} + +static void +rlb_update_client(struct rlb_client_info *client_info) +{ + int i = 0; + + if (client_info->slave == NULL) { + return; + } + + for (i=0; iip_dst, + client_info->slave->dev, + client_info->ip_src, + client_info->mac_dst, + client_info->slave->dev->dev_addr, + client_info->mac_dst); + } +} + +/* sends ARP REPLIES that update the clients that need updating */ +static void +rlb_update_rx_clients(struct bonding *bond) +{ + u32 hash_index; + struct rlb_client_info *client_info = NULL; + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + + _lock_rx_hashtbl(bond); + + if (bond_info->rx_hashtbl == NULL) { + _unlock_rx_hashtbl(bond); + return; + } + + hash_index = bond_info->rx_hashtbl_head; + for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) { + client_info = &(bond_info->rx_hashtbl[hash_index]); + if (client_info->ntt) { + rlb_update_client(client_info); + if (bond_info->rlb_update_retry_counter == 0) { + client_info->ntt = 0; + } + } + } + + /* do not update the entries again untill this counter is zero so that + * not to confuse the clients. + */ + bond_info->rlb_update_delay_counter = RLB_UPDATE_DELAY; + + _unlock_rx_hashtbl(bond); +} + +/* The slave was assigned a new mac address - update the clients */ +static void +rlb_req_update_slave_clients(struct bonding *bond, struct slave *slave) +{ + u32 hash_index; + u8 ntt = 0; + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + u8 mac_bcast[ETH_ALEN] = {0xff,0xff,0xff,0xff,0xff,0xff}; + struct rlb_client_info* client_info = NULL; + + _lock_rx_hashtbl(bond); + + if (bond_info->rx_hashtbl == NULL) { + _unlock_rx_hashtbl(bond); + return; + } + + hash_index = bond_info->rx_hashtbl_head; + for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) { + client_info = &(bond_info->rx_hashtbl[hash_index]); + + if ((client_info->slave == slave) && + memcmp(client_info->mac_dst, mac_bcast, ETH_ALEN)) { + client_info->ntt = 1; + ntt = 1; + } + } + + // update the team's flag only after the whole iteration + if (ntt) { + bond_info->rx_ntt = 1; + //fasten the change + bond_info->rlb_update_retry_counter = RLB_UPDATE_RETRY; + } + + _unlock_rx_hashtbl(bond); +} + +/* mark all clients using src_ip to be updated */ +static void +rlb_req_update_subnet_clients(struct bonding *bond, u32 src_ip) +{ + u32 hash_index; + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + u8 mac_bcast[ETH_ALEN] = {0xff,0xff,0xff,0xff,0xff,0xff}; + struct rlb_client_info *client_info = NULL; + + _lock_rx_hashtbl(bond); + + if (bond_info->rx_hashtbl == NULL) { + _unlock_rx_hashtbl(bond); + return; + } + + hash_index = bond_info->rx_hashtbl_head; + for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) { + client_info = &(bond_info->rx_hashtbl[hash_index]); + + if (!client_info->slave) { + printk(KERN_ERR "Bonding: Error: found a client with no" + " channel in the client's hash table\n"); + continue; + } + /*update all clients using this src_ip, that are not assigned + * to the team's address (current_slave) and have a known + * unicast mac address. + */ + if ((client_info->ip_src == src_ip) && + memcmp(client_info->slave->dev->dev_addr, + bond->device->dev_addr, ETH_ALEN) && + memcmp(client_info->mac_dst, mac_bcast, ETH_ALEN)) { + client_info->ntt = 1; + bond_info->rx_ntt = 1; + } + } + + _unlock_rx_hashtbl(bond); +} + +/* Caller must hold both bond and ptr locks for read */ +struct slave* +rlb_choose_channel(struct bonding *bond, struct arp_pkt *arp) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + struct rlb_client_info *client_info = NULL; + u32 hash_index = 0; + struct slave *assigned_slave = NULL; + u8 mac_bcast[ETH_ALEN] = {0xff,0xff,0xff,0xff,0xff,0xff}; + + _lock_rx_hashtbl(bond); + + if (bond_info->rx_hashtbl == NULL) { + _unlock_rx_hashtbl(bond); + return NULL; + } + + hash_index = _simple_hash((u8 *)&arp->ip_dst, 4); + client_info = &(bond_info->rx_hashtbl[hash_index]); + + if (client_info->assigned == 1) { + if ((client_info->ip_src == arp->ip_src) && + (client_info->ip_dst == arp->ip_dst)) { + /* the entry is already assigned to this client */ + + if (memcmp(arp->mac_dst, mac_bcast, ETH_ALEN)) { + /* update mac address from arp */ + memcpy(client_info->mac_dst, arp->mac_dst, ETH_ALEN); + } + + assigned_slave = client_info->slave; + if (assigned_slave) { + _unlock_rx_hashtbl(bond); + return assigned_slave; + } + } else { + /* the entry is already assigned to some other client, + * move the old client to primary (current_slave) so + * that the new client can be assigned to this entry. + */ + if (bond->current_slave && + client_info->slave != bond->current_slave) { + client_info->slave = bond->current_slave; + rlb_update_client(client_info); + } + } + } + /* assign a new slave */ + assigned_slave = rlb_next_rx_slave(bond); + + if (assigned_slave) { + client_info->ip_src = arp->ip_src; + client_info->ip_dst = arp->ip_dst; + /* arp->mac_dst is broadcast for arp reqeusts. + * will be updated with clients actual unicast mac address + * upon receiving an arp reply. + */ + memcpy(client_info->mac_dst, arp->mac_dst, ETH_ALEN); + client_info->slave = assigned_slave; + + if (memcmp(client_info->mac_dst, mac_bcast, ETH_ALEN)) { + client_info->ntt = 1; + bond->alb_info.rx_ntt = 1; + } + else { + client_info->ntt = 0; + } + + if (!client_info->assigned) { + u32 prev_tbl_head = bond_info->rx_hashtbl_head; + bond_info->rx_hashtbl_head = hash_index; + client_info->next = prev_tbl_head; + if (prev_tbl_head != RLB_NULL_INDEX) { + bond_info->rx_hashtbl[prev_tbl_head].prev = + hash_index; + } + client_info->assigned = 1; + } + } + + _unlock_rx_hashtbl(bond); + + return assigned_slave; +} + +/* chooses (and returns) transmit channel for arp reply + * does not choose channel for other arp types since they are + * sent on the current_slave + */ +static struct slave* +rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond) +{ + struct arp_pkt *arp = (struct arp_pkt *)skb->nh.raw; + struct slave *tx_slave = NULL; + + if (arp->op_code == __constant_htons(ARPOP_REPLY)) { + /* the arp must be sent on the selected + * rx channel + */ + tx_slave = rlb_choose_channel(bond, arp); + if (tx_slave) { + memcpy(arp->mac_src,tx_slave->dev->dev_addr, ETH_ALEN); + } + BOND_PRINT_DBG(("Server sent ARP Reply packet")); + } else if (arp->op_code == __constant_htons(ARPOP_REQUEST)) { + + /* Create an entry in the rx_hashtbl for this client as a + * place holder. + * When the arp reply is received the entry will be updated + * with the correct unicast address of the client. + */ + rlb_choose_channel(bond, arp); + + /* The ARP relpy packets must be delayed so that + * they can cancel out the influence of the ARP request. + */ + bond->alb_info.rlb_update_delay_counter = RLB_UPDATE_DELAY; + + /* arp requests are broadcast and are sent on the primary + * the arp request will collapse all clients on the subnet to + * the primary slave. We must register these clients to be + * updated with their assigned mac. + */ + rlb_req_update_subnet_clients(bond, arp->ip_src); + BOND_PRINT_DBG(("Server sent ARP Request packet")); + } + + return tx_slave; +} + +/* Caller must hold bond lock for read */ +static void +rlb_rebalance(struct bonding *bond) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + struct slave *assigned_slave = NULL; + u32 hash_index; + struct rlb_client_info *client_info = NULL; + u8 ntt = 0; + + _lock_rx_hashtbl(bond); + + if (bond_info->rx_hashtbl == NULL) { + _unlock_rx_hashtbl(bond); + return; + } + + hash_index = bond_info->rx_hashtbl_head; + for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) { + client_info = &(bond_info->rx_hashtbl[hash_index]); + assigned_slave = rlb_next_rx_slave(bond); + if (assigned_slave && (client_info->slave != assigned_slave)){ + client_info->slave = assigned_slave; + client_info->ntt = 1; + ntt = 1; + } + } + + /* update the team's flag only after the whole iteration */ + if (ntt) { + bond_info->rx_ntt = 1; + } + _unlock_rx_hashtbl(bond); +} + +/* Caller must hold rx_hashtbl lock */ +static inline void +rlb_init_table_entry(struct rlb_client_info *entry) +{ + entry->next = RLB_NULL_INDEX; + entry->prev = RLB_NULL_INDEX; + entry->assigned = 0; + entry->ntt = 0; +} + +static int +rlb_initialize(struct bonding *bond) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + struct packet_type *pk_type = &(BOND_ALB_INFO(bond).rlb_pkt_type); + int i; + size_t size; + + spin_lock_init(&(bond_info->rx_hashtbl_lock)); + + _lock_rx_hashtbl(bond); + if (bond_info->rx_hashtbl != NULL) { + printk (KERN_ERR "%s: RLB hash table is not NULL\n", + bond->device->name); + _unlock_rx_hashtbl(bond); + return -1; + } + + size = RLB_HASH_TABLE_SIZE * sizeof(struct rlb_client_info); + bond_info->rx_hashtbl = kmalloc(size, GFP_KERNEL); + if (bond_info->rx_hashtbl == NULL) { + printk (KERN_ERR "%s: Failed to allocate" + " RLB hash table\n", bond->device->name); + _unlock_rx_hashtbl(bond); + return -1; + } + + bond_info->rx_hashtbl_head = RLB_NULL_INDEX; + + for (i=0; irx_hashtbl + i); + } + _unlock_rx_hashtbl(bond); + + /* register to receive ARPs */ + + /*initialize packet type*/ + pk_type->type = __constant_htons(ETH_P_ARP); + pk_type->dev = bond->device; + pk_type->func = rlb_arp_recv; + pk_type->data = (void*)1; /* understand shared skbs */ + + dev_add_pack(pk_type); + + return 0; +} + +static void +rlb_deinitialize(struct bonding *bond) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + + dev_remove_pack(&(bond_info->rlb_pkt_type)); + + _lock_rx_hashtbl(bond); + if (bond_info->rx_hashtbl == NULL) { + _unlock_rx_hashtbl(bond); + return; + } + kfree(bond_info->rx_hashtbl); + bond_info->rx_hashtbl = NULL; + _unlock_rx_hashtbl(bond); +} + +/*********************** tlb/rlb shared functions *********************/ + +static void +alb_send_learning_packets(struct slave *slave, u8 mac_addr[]) +{ + struct sk_buff *skb = NULL; + struct learning_pkt pkt; + char *data = NULL; + int i; + unsigned int size = sizeof(struct learning_pkt); + + memset(&pkt, 0, size); + memcpy(pkt.mac_dst, mac_addr, ETH_ALEN); + memcpy(pkt.mac_src, mac_addr, ETH_ALEN); + pkt.type = __constant_htons(ETH_P_LOOP); + + for (i=0; i < MAX_LP_RETRY; i++) { + skb = NULL; + skb = dev_alloc_skb(size); + if (!skb) { + return; + } + + data = skb_put(skb, size); + memcpy(data, &pkt, size); + skb->mac.raw = data; + skb->nh.raw = data + ETH_HLEN; + skb->protocol = pkt.type; + skb->priority = TC_PRIO_CONTROL; + skb->dev = slave->dev; + dev_queue_xmit(skb); + } + +} + +/* hw is a boolean parameter that determines whether we should try and + * set the hw address of the hw as well as the hw address of the net_device + */ +static int +alb_set_mac_addr(struct slave *slave, u8 addr[], int hw) +{ + struct net_device *dev = NULL; + struct sockaddr s_addr; + + dev = slave->dev; + + if (!hw) { + memcpy(dev->dev_addr, addr, ETH_ALEN); + return 0; + } + + /* for rlb each slave must have a unique hw mac addresses so that */ + /* each slave will receive packets destined to a different mac */ + memcpy(s_addr.sa_data, addr, ETH_ALEN); + s_addr.sa_family = dev->type; + if (dev->set_mac_address(dev, &s_addr)) { + printk(KERN_DEBUG "bonding: Error: alb_set_mac_addr:" + " dev->set_mac_address of dev %s failed!" + " ALB mode requires that the base driver" + " support setting the hw address also when" + " the network device's interface is open\n", + dev->name); + return -EOPNOTSUPP; + } + return 0; +} + +/* Caller must hold bond lock for write or ptrlock for write*/ +static void +alb_swap_mac_addr(struct bonding *bond, + struct slave *slave1, + struct slave *slave2) +{ + u8 tmp_mac_addr[ETH_ALEN]; + struct slave *disabled_slave = NULL; + u8 slaves_state_differ; + + slaves_state_differ = (SLAVE_IS_OK(slave1) != SLAVE_IS_OK(slave2)); + + memcpy(tmp_mac_addr, slave1->dev->dev_addr, ETH_ALEN); + alb_set_mac_addr(slave1, slave2->dev->dev_addr, bond->alb_info.rlb_enabled); + alb_set_mac_addr(slave2, tmp_mac_addr, bond->alb_info.rlb_enabled); + + /* fasten the change in the switch */ + if (SLAVE_IS_OK(slave1)) { + alb_send_learning_packets(slave1, slave1->dev->dev_addr); + if (bond->alb_info.rlb_enabled) { + /* inform the clients that the mac address + * has changed + */ + rlb_req_update_slave_clients(bond, slave1); + } + } + else { + disabled_slave = slave1; + } + + if (SLAVE_IS_OK(slave2)) { + alb_send_learning_packets(slave2, slave2->dev->dev_addr); + if (bond->alb_info.rlb_enabled) { + /* inform the clients that the mac address + * has changed + */ + rlb_req_update_slave_clients(bond, slave2); + } + } + else { + disabled_slave = slave2; + } + + if (bond->alb_info.rlb_enabled && slaves_state_differ) { + /* A disabled slave was assigned an active mac addr */ + rlb_teach_disabled_mac_on_primary(bond, + disabled_slave->dev->dev_addr); + } +} + +/** + * alb_change_hw_addr_on_detach + * @bond: bonding we're working on + * @slave: the slave that was just detached + * + * We assume that @slave was already detached from the slave list. + * + * If @slave's permanent hw address is different both from its current + * address and from @bond's address, then somewhere in the bond there's + * a slave that has @slave's permanet address as its current address. + * We'll make sure that that slave no longer uses @slave's permanent address. + * + * Caller must hold bond lock + */ +static void +alb_change_hw_addr_on_detach(struct bonding *bond, struct slave *slave) +{ + struct slave *tmp_slave; + int perm_curr_diff; + int perm_bond_diff; + + perm_curr_diff = memcmp(slave->perm_hwaddr, + slave->dev->dev_addr, + ETH_ALEN); + perm_bond_diff = memcmp(slave->perm_hwaddr, + bond->device->dev_addr, + ETH_ALEN); + if (perm_curr_diff && perm_bond_diff) { + tmp_slave = bond_get_first_slave(bond); + while (tmp_slave) { + if (!memcmp(slave->perm_hwaddr, + tmp_slave->dev->dev_addr, + ETH_ALEN)) { + break; + } + tmp_slave = bond_get_next_slave(bond, tmp_slave); + } + + if (tmp_slave) { + alb_swap_mac_addr(bond, slave, tmp_slave); + } + } +} + +/** + * alb_handle_addr_collision_on_attach + * @bond: bonding we're working on + * @slave: the slave that was just attached + * + * checks uniqueness of slave's mac address and handles the case the + * new slave uses the bonds mac address. + * + * If the permanent hw address of @slave is @bond's hw address, we need to + * find a different hw address to give @slave, that isn't in use by any other + * slave in the bond. This address must be, of course, one of the premanent + * addresses of the other slaves. + * + * We go over the slave list, and for each slave there we compare its + * permanent hw address with the current address of all the other slaves. + * If no match was found, then we've found a slave with a permanent address + * that isn't used by any other slave in the bond, so we can assign it to + * @slave. + * + * assumption: this function is called before @slave is attached to the + * bond slave list. + * + * caller must hold the bond lock for write since the mac addresses are compared + * and may be swapped. + */ +static int +alb_handle_addr_collision_on_attach(struct bonding *bond, struct slave *slave) +{ + struct slave *tmp_slave1, *tmp_slave2; + + if (bond->slave_cnt == 0) { + /* this is the first slave */ + return 0; + } + + /* if slave's mac address differs from bond's mac address + * check uniqueness of slave's mac address against the other + * slaves in the bond. + */ + if (memcmp(slave->perm_hwaddr, bond->device->dev_addr, ETH_ALEN)) { + tmp_slave1 = bond_get_first_slave(bond); + for (; tmp_slave1; tmp_slave1 = bond_get_next_slave(bond, tmp_slave1)) { + if (!memcmp(tmp_slave1->dev->dev_addr, slave->dev->dev_addr, + ETH_ALEN)) { + break; + } + } + if (tmp_slave1) { + /* a slave was found that is using the mac address + * of the new slave + */ + printk(KERN_ERR "bonding: Warning: the hw address " + "of slave %s is not unique - cannot enslave it!" + , slave->dev->name); + return -EINVAL; + } + return 0; + } + + /* the slave's address is equal to the address of the bond + * search for a spare address in the bond for this slave. + */ + tmp_slave1 = bond_get_first_slave(bond); + for (; tmp_slave1; tmp_slave1 = bond_get_next_slave(bond, tmp_slave1)) { + + tmp_slave2 = bond_get_first_slave(bond); + for (; tmp_slave2; tmp_slave2 = bond_get_next_slave(bond, tmp_slave2)) { + + if (!memcmp(tmp_slave1->perm_hwaddr, + tmp_slave2->dev->dev_addr, + ETH_ALEN)) { + + break; + } + } + + if (!tmp_slave2) { + /* no slave has tmp_slave1's perm addr + * as its curr addr + */ + break; + } + } + + if (tmp_slave1) { + alb_set_mac_addr(slave, tmp_slave1->perm_hwaddr, + bond->alb_info.rlb_enabled); + + printk(KERN_WARNING "bonding: Warning: the hw address " + "of slave %s is in use by the bond; " + "giving it the hw address of %s\n", + slave->dev->name, tmp_slave1->dev->name); + } else { + printk(KERN_CRIT "bonding: Error: the hw address " + "of slave %s is in use by the bond; " + "couldn't find a slave with a free hw " + "address to give it (this should not have " + "happened)\n", slave->dev->name); + return -EFAULT; + } + + return 0; +} + +/************************ exported alb funcions ************************/ + +int +bond_alb_initialize(struct bonding *bond, int rlb_enabled) +{ + int res; + + res = tlb_initialize(bond); + if (res) { + return res; + } + + if (rlb_enabled) { + bond->alb_info.rlb_enabled = 1; + /* initialize rlb */ + res = rlb_initialize(bond); + if (res) { + tlb_deinitialize(bond); + return res; + } + } + + return 0; +} + +void +bond_alb_deinitialize(struct bonding *bond) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + + tlb_deinitialize(bond); + + if (bond_info->rlb_enabled) { + rlb_deinitialize(bond); + } +} + +int +bond_alb_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct bonding *bond = (struct bonding *) dev->priv; + struct ethhdr *eth_data = (struct ethhdr *)skb->data; + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + struct slave *tx_slave = NULL; + char do_tx_balance = 1; + int hash_size = 0; + u32 hash_index = 0; + u8 *hash_start = NULL; + u8 mac_bcast[ETH_ALEN] = {0xff,0xff,0xff,0xff,0xff,0xff}; + + if (!IS_UP(dev)) { /* bond down */ + dev_kfree_skb(skb); + return 0; + } + + /* make sure that the current_slave and the slaves list do + * not change during tx + */ + read_lock(&bond->lock); + + if (bond->slave_cnt == 0) { + /* no suitable interface, frame not sent */ + dev_kfree_skb(skb); + read_unlock(&bond->lock); + return 0; + } + + read_lock(&bond->ptrlock); + + switch (ntohs(skb->protocol)) { + case ETH_P_IP: + if ((memcmp(eth_data->h_dest, mac_bcast, ETH_ALEN) == 0) || + (skb->nh.iph->daddr == 0xffffffff)) { + do_tx_balance = 0; + break; + } + hash_start = (char*)&(skb->nh.iph->daddr); + hash_size = 4; + break; + + case ETH_P_IPV6: + if (memcmp(eth_data->h_dest, mac_bcast, ETH_ALEN) == 0) { + do_tx_balance = 0; + break; + } + + hash_start = (char*)&(skb->nh.ipv6h->daddr); + hash_size = 16; + break; + + case ETH_P_IPX: + if (skb->nh.ipxh->ipx_checksum != + __constant_htons(IPX_NO_CHECKSUM)) { + /* something is wrong with this packet */ + do_tx_balance = 0; + break; + } + + if (skb->nh.ipxh->ipx_type != + __constant_htons(IPX_TYPE_NCP)) { + /* The only protocol worth balancing in + * this family since it has an "ARP" like + * mechanism + */ + do_tx_balance = 0; + break; + } + + hash_start = (char*)eth_data->h_dest; + hash_size = ETH_ALEN; + break; + + case ETH_P_ARP: + do_tx_balance = 0; + if (bond_info->rlb_enabled) { + tx_slave = rlb_arp_xmit(skb, bond); + } + break; + + default: + do_tx_balance = 0; + break; + } + + if (do_tx_balance) { + hash_index = _simple_hash(hash_start, hash_size); + tx_slave = tlb_choose_channel(bond, hash_index, skb->len); + } + + if (!tx_slave) { + /* unbalanced or unassigned, send through primary */ + tx_slave = bond->current_slave; + bond_info->unbalanced_load += skb->len; + } + + if (tx_slave && SLAVE_IS_OK(tx_slave)) { + skb->dev = tx_slave->dev; + if (tx_slave != bond->current_slave) { + memcpy(eth_data->h_source, + tx_slave->dev->dev_addr, + ETH_ALEN); + } + dev_queue_xmit(skb); + } else { + /* no suitable interface, frame not sent */ + if (tx_slave) { + tlb_clear_slave(bond, tx_slave, 0); + } + dev_kfree_skb(skb); + } + + read_unlock(&bond->ptrlock); + read_unlock(&bond->lock); + return 0; +} + +void +bond_alb_monitor(struct bonding *bond) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + struct slave *slave = NULL; + + read_lock(&bond->lock); + + if ((bond->slave_cnt == 0) || !(bond->device->flags & IFF_UP)) { + bond_info->tx_rebalance_counter = 0; + bond_info->lp_counter = 0; + goto out; + } + + bond_info->tx_rebalance_counter++; + bond_info->lp_counter++; + + /* send learning packets */ + if (bond_info->lp_counter >= BOND_ALB_LP_TICKS) { + /* change of current_slave involves swapping of mac addresses. + * in order to avoid this swapping from happening while + * sending the learning packets, the ptrlock must be held for + * read. + */ + read_lock(&bond->ptrlock); + slave = bond_get_first_slave(bond); + while (slave) { + alb_send_learning_packets(slave,slave->dev->dev_addr); + slave = bond_get_next_slave(bond, slave); + } + read_unlock(&bond->ptrlock); + + bond_info->lp_counter = 0; + } + + /* rebalance tx traffic */ + if (bond_info->tx_rebalance_counter >= BOND_TLB_REBALANCE_TICKS) { + read_lock(&bond->ptrlock); + slave = bond_get_first_slave(bond); + while (slave) { + tlb_clear_slave(bond, slave, 1); + if (slave == bond->current_slave) { + SLAVE_TLB_INFO(slave).load = + bond_info->unbalanced_load / + BOND_TLB_REBALANCE_INTERVAL; + bond_info->unbalanced_load = 0; + } + slave = bond_get_next_slave(bond, slave); + } + read_unlock(&bond->ptrlock); + bond_info->tx_rebalance_counter = 0; + } + + /* handle rlb stuff */ + if (bond_info->rlb_enabled) { + /* the following code changes the promiscuity of the + * the current_slave. It needs to be locked with a + * write lock to protect from other code that also + * sets the promiscuity. + */ + write_lock(&bond->ptrlock); + if (bond_info->primary_is_promisc && + (++bond_info->rlb_promisc_timeout_counter >= + RLB_PROMISC_TIMEOUT)) { + + bond_info->rlb_promisc_timeout_counter = 0; + + /* If the primary was set to promiscuous mode + * because a slave was disabled then + * it can now leave promiscuous mode. + */ + dev_set_promiscuity(bond->current_slave->dev, -1); + bond_info->primary_is_promisc = 0; + } + write_unlock(&bond->ptrlock); + + if (bond_info->rlb_rebalance == 1) { + bond_info->rlb_rebalance = 0; + rlb_rebalance(bond); + } + + /* check if clients need updating */ + if (bond_info->rx_ntt) { + if (bond_info->rlb_update_delay_counter) { + --bond_info->rlb_update_delay_counter; + } else { + rlb_update_rx_clients(bond); + if (bond_info->rlb_update_retry_counter) { + --bond_info->rlb_update_retry_counter; + } else { + bond_info->rx_ntt = 0; + } + } + } + } + +out: + read_unlock(&bond->lock); + + if (bond->device->flags & IFF_UP) { + /* re-arm the timer */ + mod_timer(&(bond_info->alb_timer), + jiffies + (HZ/ALB_TIMER_TICKS_PER_SEC)); + } +} + +/* assumption: called before the slave is attched to the bond + * and not locked by the bond lock + */ +int +bond_alb_init_slave(struct bonding *bond, struct slave *slave) +{ + int err = 0; + + err = alb_set_mac_addr(slave, slave->perm_hwaddr, + bond->alb_info.rlb_enabled); + if (err) { + return err; + } + + /* caller must hold the bond lock for write since the mac addresses + * are compared and may be swapped. + */ + write_lock_bh(&bond->lock); + + err = alb_handle_addr_collision_on_attach(bond, slave); + + write_unlock_bh(&bond->lock); + + if (err) { + return err; + } + + tlb_init_slave(slave); + + /* order a rebalance ASAP */ + bond->alb_info.tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS; + + if (bond->alb_info.rlb_enabled) { + bond->alb_info.rlb_rebalance = 1; + } + + return 0; +} + +/* Caller must hold bond lock for write */ +void +bond_alb_deinit_slave(struct bonding *bond, struct slave *slave) +{ + if (bond->slave_cnt > 1) { + alb_change_hw_addr_on_detach(bond, slave); + } + + tlb_clear_slave(bond, slave, 0); + + if (bond->alb_info.rlb_enabled) { + bond->alb_info.next_rx_slave = NULL; + rlb_clear_slave(bond, slave); + } +} + +/* Caller must hold bond lock for read */ +void +bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, + char link) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + + if (link == BOND_LINK_DOWN) { + tlb_clear_slave(bond, slave, 0); + if (bond->alb_info.rlb_enabled) { + rlb_clear_slave(bond, slave); + } + } else if (link == BOND_LINK_UP) { + /* order a rebalance ASAP */ + bond_info->tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS; + if (bond->alb_info.rlb_enabled) { + bond->alb_info.rlb_rebalance = 1; + /* If the updelay module parameter is smaller than the + * forwarding delay of the switch the rebalance will + * not work because the rebalance arp replies will + * not be forwarded to the clients.. + */ + } + } +} + +/** + * bond_alb_assign_current_slave - assign new current_slave + * @bond: our bonding struct + * @new_slave: new slave to assign + * + * Set the bond->current_slave to @new_slave and handle + * mac address swapping and promiscuity changes as needed. + * + * Caller must hold bond ptrlock for write (or bond lock for write) + */ +void +bond_alb_assign_current_slave(struct bonding *bond, struct slave *new_slave) +{ + struct slave *swap_slave = bond->current_slave; + + if (bond->current_slave == new_slave) { + return; + } + + if (bond->current_slave && bond->alb_info.primary_is_promisc) { + dev_set_promiscuity(bond->current_slave->dev, -1); + bond->alb_info.primary_is_promisc = 0; + bond->alb_info.rlb_promisc_timeout_counter = 0; + } + + bond->current_slave = new_slave; + + if (!new_slave || (bond->slave_cnt == 0)) { + return; + } + + /* set the new current_slave to the bonds mac address + * i.e. swap mac addresses of old current_slave and new current_slave + */ + if (!swap_slave) { + /* find slave that is holding the bond's mac address */ + swap_slave = bond_get_first_slave(bond); + while (swap_slave) { + if (!memcmp(swap_slave->dev->dev_addr, + bond->device->dev_addr, ETH_ALEN)) { + break; + } + swap_slave = bond_get_next_slave(bond, swap_slave); + } + } + + /* current_slave must be set before calling alb_swap_mac_addr */ + if (swap_slave) { + /* swap mac address */ + alb_swap_mac_addr(bond, swap_slave, new_slave); + } else { + /* set the new_slave to the bond mac address */ + alb_set_mac_addr(new_slave, bond->device->dev_addr, + bond->alb_info.rlb_enabled); + /* fasten bond mac on new current slave */ + alb_send_learning_packets(new_slave, bond->device->dev_addr); + } +} + diff -Nru a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h --- /dev/null Wed Dec 31 16:00:00 1969 +++ b/drivers/net/bonding/bond_alb.h Thu Jun 19 23:46:53 2003 @@ -0,0 +1,127 @@ +/* + * Copyright(c) 1999 - 2003 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called LICENSE. + */ + +#ifndef __BOND_ALB_H__ +#define __BOND_ALB_H__ + +#include + +struct bonding; +struct slave; + +#define BOND_ALB_INFO(bond) ((bond)->alb_info) +#define SLAVE_TLB_INFO(slave) ((slave)->tlb_info) + +struct tlb_client_info { + struct slave *tx_slave; /* A pointer to slave used for transmiting + * packets to a Client that the Hash function + * gave this entry index. + */ + u32 tx_bytes; /* Each Client acumulates the BytesTx that + * were tranmitted to it, and after each + * CallBack the LoadHistory is devided + * by the balance interval + */ + u32 load_history; /* This field contains the amount of Bytes + * that were transmitted to this client by + * the server on the previous balance + * interval in Bps. + */ + u32 next; /* The next Hash table entry index, assigned + * to use the same adapter for transmit. + */ + u32 prev; /* The previous Hash table entry index, + * assigned to use the same + */ +}; + +/* ------------------------------------------------------------------------- + * struct rlb_client_info contains all info related to a specific rx client + * connection. This is the Clients Hash Table entry struct + * ------------------------------------------------------------------------- + */ +struct rlb_client_info { + u32 ip_src; /* the server IP address */ + u32 ip_dst; /* the client IP address */ + u8 mac_dst[ETH_ALEN]; /* the client MAC address */ + u32 next; /* The next Hash table entry index */ + u32 prev; /* The previous Hash table entry index */ + u8 assigned; /* checking whether this entry is assigned */ + u8 ntt; /* flag - need to transmit client info */ + struct slave *slave; /* the slave assigned to this client */ +}; + +struct tlb_slave_info { + u32 head; /* Index to the head of the bi-directional clients + * hash table entries list. The entries in the list + * are the entries that were assigned to use this + * slave for transmit. + */ + u32 load; /* Each slave sums the loadHistory of all clients + * assigned to it + */ +}; + +struct alb_bond_info { + struct timer_list alb_timer; + struct tlb_client_info *tx_hashtbl; /* Dynamically allocated */ + spinlock_t tx_hashtbl_lock; + u32 unbalanced_load; + int tx_rebalance_counter; + int lp_counter; + /* -------- rlb parameters -------- */ + int rlb_enabled; + struct packet_type rlb_pkt_type; + struct rlb_client_info *rx_hashtbl; /* Receive hash table */ + spinlock_t rx_hashtbl_lock; + u32 rx_hashtbl_head; + u8 rx_ntt; /* flag - need to transmit + * to all rx clients + */ + struct slave *next_rx_slave;/* next slave to be assigned + * to a new rx client for + */ + u32 rlb_interval_counter; + u8 primary_is_promisc; /* boolean */ + u32 rlb_promisc_timeout_counter;/* counts primary + * promiscuity time + */ + u32 rlb_update_delay_counter; + u32 rlb_update_retry_counter;/* counter of retries + * of client update + */ + u8 rlb_rebalance; /* flag - indicates that the + * rx traffic should be + * rebalanced + */ +}; + +int bond_alb_initialize(struct bonding *bond, int rlb_enabled); +void bond_alb_deinitialize(struct bonding *bond); +int bond_alb_init_slave(struct bonding *bond, struct slave *slave); +void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave); +void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char link); +void bond_alb_assign_current_slave(struct bonding *bond, struct slave *new_slave); +int bond_alb_xmit(struct sk_buff *skb, struct net_device *dev); +void bond_alb_monitor(struct bonding *bond); + +#endif /* __BOND_ALB_H__ */ + diff -Nru a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c --- /dev/null Wed Dec 31 16:00:00 1969 +++ b/drivers/net/bonding/bond_main.c Thu Jun 19 23:46:52 2003 @@ -0,0 +1,4021 @@ +/* + * originally based on the dummy device. + * + * Copyright 1999, Thomas Davis, tadavis@lbl.gov. + * Licensed under the GPL. Based on dummy.c, and eql.c devices. + * + * bonding.c: an Ethernet Bonding driver + * + * This is useful to talk to a Cisco EtherChannel compatible equipment: + * Cisco 5500 + * Sun Trunking (Solaris) + * Alteon AceDirector Trunks + * Linux Bonding + * and probably many L2 switches ... + * + * How it works: + * ifconfig bond0 ipaddress netmask up + * will setup a network device, with an ip address. No mac address + * will be assigned at this time. The hw mac address will come from + * the first slave bonded to the channel. All slaves will then use + * this hw mac address. + * + * ifconfig bond0 down + * will release all slaves, marking them as down. + * + * ifenslave bond0 eth0 + * will attach eth0 to bond0 as a slave. eth0 hw mac address will either + * a: be used as initial mac address + * b: if a hw mac address already is there, eth0's hw mac address + * will then be set from bond0. + * + * v0.1 - first working version. + * v0.2 - changed stats to be calculated by summing slaves stats. + * + * Changes: + * Arnaldo Carvalho de Melo + * - fix leaks on failure at bond_init + * + * 2000/09/30 - Willy Tarreau + * - added trivial code to release a slave device. + * - fixed security bug (CAP_NET_ADMIN not checked) + * - implemented MII link monitoring to disable dead links : + * All MII capable slaves are checked every milliseconds + * (100 ms seems good). This value can be changed by passing it to + * insmod. A value of zero disables the monitoring (default). + * - fixed an infinite loop in bond_xmit_roundrobin() when there's no + * good slave. + * - made the code hopefully SMP safe + * + * 2000/10/03 - Willy Tarreau + * - optimized slave lists based on relevant suggestions from Thomas Davis + * - implemented active-backup method to obtain HA with two switches: + * stay as long as possible on the same active interface, while we + * also monitor the backup one (MII link status) because we want to know + * if we are able to switch at any time. ( pass "mode=1" to insmod ) + * - lots of stress testings because we need it to be more robust than the + * wires ! :-> + * + * 2000/10/09 - Willy Tarreau + * - added up and down delays after link state change. + * - optimized the slaves chaining so that when we run forward, we never + * repass through the bond itself, but we can find it by searching + * backwards. Renders the deletion more difficult, but accelerates the + * scan. + * - smarter enslaving and releasing. + * - finer and more robust SMP locking + * + * 2000/10/17 - Willy Tarreau + * - fixed two potential SMP race conditions + * + * 2000/10/18 - Willy Tarreau + * - small fixes to the monitoring FSM in case of zero delays + * 2000/11/01 - Willy Tarreau + * - fixed first slave not automatically used in trunk mode. + * 2000/11/10 : spelling of "EtherChannel" corrected. + * 2000/11/13 : fixed a race condition in case of concurrent accesses to ioctl(). + * 2000/12/16 : fixed improper usage of rtnl_exlock_nowait(). + * + * 2001/1/3 - Chad N. Tindel + * - The bonding driver now simulates MII status monitoring, just like + * a normal network device. It will show that the link is down iff + * every slave in the bond shows that their links are down. If at least + * one slave is up, the bond's MII status will appear as up. + * + * 2001/2/7 - Chad N. Tindel + * - Applications can now query the bond from user space to get + * information which may be useful. They do this by calling + * the BOND_INFO_QUERY ioctl. Once the app knows how many slaves + * are in the bond, it can call the BOND_SLAVE_INFO_QUERY ioctl to + * get slave specific information (# link failures, etc). See + * for more details. The structs of interest + * are ifbond and ifslave. + * + * 2001/4/5 - Chad N. Tindel + * - Ported to 2.4 Kernel + * + * 2001/5/2 - Jeffrey E. Mast + * - When a device is detached from a bond, the slave device is no longer + * left thinking that is has a master. + * + * 2001/5/16 - Jeffrey E. Mast + * - memset did not appropriately initialized the bond rw_locks. Used + * rwlock_init to initialize to unlocked state to prevent deadlock when + * first attempting a lock + * - Called SET_MODULE_OWNER for bond device + * + * 2001/5/17 - Tim Anderson + * - 2 paths for releasing for slave release; 1 through ioctl + * and 2) through close. Both paths need to release the same way. + * - the free slave in bond release is changing slave status before + * the free. The netdev_set_master() is intended to change slave state + * so it should not be done as part of the release process. + * - Simple rule for slave state at release: only the active in A/B and + * only one in the trunked case. + * + * 2001/6/01 - Tim Anderson + * - Now call dev_close when releasing a slave so it doesn't screw up + * out routing table. + * + * 2001/6/01 - Chad N. Tindel + * - Added /proc support for getting bond and slave information. + * Information is in /proc/net//info. + * - Changed the locking when calling bond_close to prevent deadlock. + * + * 2001/8/05 - Janice Girouard + * - correct problem where refcnt of slave is not incremented in bond_ioctl + * so the system hangs when halting. + * - correct locking problem when unable to malloc in bond_enslave. + * - adding bond_xmit_xor logic. + * - adding multiple bond device support. + * + * 2001/8/13 - Erik Habbinga + * - correct locking problem with rtnl_exlock_nowait + * + * 2001/8/23 - Janice Girouard + * - bzero initial dev_bonds, to correct oops + * - convert SIOCDEVPRIVATE to new MII ioctl calls + * + * 2001/9/13 - Takao Indoh + * - Add the BOND_CHANGE_ACTIVE ioctl implementation + * + * 2001/9/14 - Mark Huth + * - Change MII_LINK_READY to not check for end of auto-negotiation, + * but only for an up link. + * + * 2001/9/20 - Chad N. Tindel + * - Add the device field to bonding_t. Previously the net_device + * corresponding to a bond wasn't available from the bonding_t + * structure. + * + * 2001/9/25 - Janice Girouard + * - add arp_monitor for active backup mode + * + * 2001/10/23 - Takao Indoh + * - Various memory leak fixes + * + * 2001/11/5 - Mark Huth + * - Don't take rtnl lock in bond_mii_monitor as it deadlocks under + * certain hotswap conditions. + * Note: this same change may be required in bond_arp_monitor ??? + * - Remove possibility of calling bond_sethwaddr with NULL slave_dev ptr + * - Handle hot swap ethernet interface deregistration events to remove + * kernel oops following hot swap of enslaved interface + * + * 2002/1/2 - Chad N. Tindel + * - Restore original slave flags at release time. + * + * 2002/02/18 - Erik Habbinga + * - bond_release(): calling kfree on our_slave after call to + * bond_restore_slave_flags, not before + * - bond_enslave(): saving slave flags into original_flags before + * call to netdev_set_master, so the IFF_SLAVE flag doesn't end + * up in original_flags + * + * 2002/04/05 - Mark Smith and + * Steve Mead + * - Port Gleb Natapov's multicast support patchs from 2.4.12 + * to 2.4.18 adding support for multicast. + * + * 2002/06/10 - Tony Cureington + * - corrected uninitialized pointer (ifr.ifr_data) in bond_check_dev_link; + * actually changed function to use MIIPHY, then MIIREG, and finally + * ETHTOOL to determine the link status + * - fixed bad ifr_data pointer assignments in bond_ioctl + * - corrected mode 1 being reported as active-backup in bond_get_info; + * also added text to distinguish type of load balancing (rr or xor) + * - change arp_ip_target module param from "1-12s" (array of 12 ptrs) + * to "s" (a single ptr) + * + * 2002/08/30 - Jay Vosburgh + * - Removed acquisition of xmit_lock in set_multicast_list; caused + * deadlock on SMP (lock is held by caller). + * - Revamped SIOCGMIIPHY, SIOCGMIIREG portion of bond_check_dev_link(). + * + * 2002/09/18 - Jay Vosburgh + * - Fixed up bond_check_dev_link() (and callers): removed some magic + * numbers, banished local MII_ defines, wrapped ioctl calls to + * prevent EFAULT errors + * + * 2002/9/30 - Jay Vosburgh + * - make sure the ip target matches the arp_target before saving the + * hw address. + * + * 2002/9/30 - Dan Eisner + * - make sure my_ip is set before taking down the link, since + * not all switches respond if the source ip is not set. + * + * 2002/10/8 - Janice Girouard + * - read in the local ip address when enslaving a device + * - add primary support + * - make sure 2*arp_interval has passed when a new device + * is brought on-line before taking it down. + * + * 2002/09/11 - Philippe De Muyter + * - Added bond_xmit_broadcast logic. + * - Added bond_mode() support function. + * + * 2002/10/26 - Laurent Deniel + * - allow to register multicast addresses only on active slave + * (useful in active-backup mode) + * - add multicast module parameter + * - fix deletion of multicast groups after unloading module + * + * 2002/11/06 - Kameshwara Rayaprolu + * - Changes to prevent panic from closing the device twice; if we close + * the device in bond_release, we must set the original_flags to down + * so it won't be closed again by the network layer. + * + * 2002/11/07 - Tony Cureington + * - Fix arp_target_hw_addr memory leak + * - Created activebackup_arp_monitor function to handle arp monitoring + * in active backup mode - the bond_arp_monitor had several problems... + * such as allowing slaves to tx arps sequentially without any delay + * for a response + * - Renamed bond_arp_monitor to loadbalance_arp_monitor and re-wrote + * this function to just handle arp monitoring in load-balancing mode; + * it is a lot more compact now + * - Changes to ensure one and only one slave transmits in active-backup + * mode + * - Robustesize parameters; warn users about bad combinations of + * parameters; also if miimon is specified and a network driver does + * not support MII or ETHTOOL, inform the user of this + * - Changes to support link_failure_count when in arp monitoring mode + * - Fix up/down delay reported in /proc + * - Added version; log version; make version available from "modinfo -d" + * - Fixed problem in bond_check_dev_link - if the first IOCTL (SIOCGMIIPH) + * failed, the ETHTOOL ioctl never got a chance + * + * 2002/11/16 - Laurent Deniel + * - fix multicast handling in activebackup_arp_monitor + * - remove one unnecessary and confusing current_slave == slave test + * in activebackup_arp_monitor + * + * 2002/11/17 - Laurent Deniel + * - fix bond_slave_info_query when slave_id = num_slaves + * + * 2002/11/19 - Janice Girouard + * - correct ifr_data reference. Update ifr_data reference + * to mii_ioctl_data struct values to avoid confusion. + * + * 2002/11/22 - Bert Barbe + * - Add support for multiple arp_ip_target + * + * 2002/12/13 - Jay Vosburgh + * - Changed to allow text strings for mode and multicast, e.g., + * insmod bonding mode=active-backup. The numbers still work. + * One change: an invalid choice will cause module load failure, + * rather than the previous behavior of just picking one. + * - Minor cleanups; got rid of dup ctype stuff, atoi function + * + * 2003/02/07 - Jay Vosburgh + * - Added use_carrier module parameter that causes miimon to + * use netif_carrier_ok() test instead of MII/ETHTOOL ioctls. + * - Minor cleanups; consolidated ioctl calls to one function. + * + * 2003/02/07 - Tony Cureington + * - Fix bond_mii_monitor() logic error that could result in + * bonding round-robin mode ignoring links after failover/recovery + * + * 2003/03/17 - Jay Vosburgh + * - kmalloc fix (GPF_KERNEL to GPF_ATOMIC) reported by + * Shmulik dot Hen at intel.com. + * - Based on discussion on mailing list, changed use of + * update_slave_cnt(), created wrapper functions for adding/removing + * slaves, changed bond_xmit_xor() to check slave_cnt instead of + * checking slave and slave->dev (which only worked by accident). + * - Misc code cleanup: get arp_send() prototype from header file, + * add max_bonds to bonding.txt. + * + * 2003/03/18 - Tsippy Mendelson and + * Shmulik Hen + * - Make sure only bond_attach_slave() and bond_detach_slave() can + * manipulate the slave list, including slave_cnt, even when in + * bond_release_all(). + * - Fixed hang in bond_release() with traffic running: + * netdev_set_master() must not be called from within the bond lock. + * + * 2003/03/18 - Tsippy Mendelson and + * Shmulik Hen + * - Fixed hang in bond_enslave() with traffic running: + * netdev_set_master() must not be called from within the bond lock. + * + * 2003/03/18 - Amir Noam + * - Added support for getting slave's speed and duplex via ethtool. + * Needed for 802.3ad and other future modes. + * + * 2003/03/18 - Tsippy Mendelson and + * Shmulik Hen + * - Enable support of modes that need to use the unique mac address of + * each slave. + * * bond_enslave(): Moved setting the slave's mac address, and + * openning it, from the application to the driver. This breaks + * backward comaptibility with old versions of ifenslave that open + * the slave before enalsving it !!!. + * * bond_release(): The driver also takes care of closing the slave + * and restoring its original mac address. + * - Removed the code that restores all base driver's flags. + * Flags are automatically restored once all undo stages are done + * properly. + * - Block possibility of enslaving before the master is up. This + * prevents putting the system in an unstable state. + * + * 2003/03/18 - Amir Noam , + * Tsippy Mendelson and + * Shmulik Hen + * - Added support for IEEE 802.3ad Dynamic link aggregation mode. + * + * 2003/05/01 - Amir Noam + * - Added ABI version control to restore compatibility between + * new/old ifenslave and new/old bonding. + * + * 2003/05/01 - Shmulik Hen + * - Fixed bug in bond_release_all(): save old value of current_slave + * before setting it to NULL. + * - Changed driver versioning scheme to include version number instead + * of release date (that is already in another field). There are 3 + * fields X.Y.Z where: + * X - Major version - big behavior changes + * Y - Minor version - addition of features + * Z - Extra version - minor changes and bug fixes + * The current version is 1.0.0 as a base line. + * + * 2003/05/01 - Tsippy Mendelson and + * Amir Noam + * - Added support for lacp_rate module param. + * - Code beautification and style changes (mainly in comments). + * new version - 1.0.1 + * + * 2003/05/01 - Shmulik Hen + * - Based on discussion on mailing list, changed locking scheme + * to use lock/unlock or lock_bh/unlock_bh appropriately instead + * of lock_irqsave/unlock_irqrestore. The new scheme helps exposing + * hidden bugs and solves system hangs that occurred due to the fact + * that holding lock_irqsave doesn't prevent softirqs from running. + * This also increases total throughput since interrupts are not + * blocked on each transmitted packets or monitor timeout. + * new version - 2.0.0 + * + * 2003/05/01 - Shmulik Hen + * - Added support for Transmit load balancing mode. + * - Concentrate all assignments of current_slave to a single point + * so specific modes can take actions when the primary adapter is + * changed. + * - Take the updelay parameter into consideration during bond_enslave + * since some adapters loose their link during setting the device. + * - Renamed bond_3ad_link_status_changed() to + * bond_3ad_handle_link_change() for compatibility with TLB. + * new version - 2.1.0 + * + * 2003/05/01 - Tsippy Mendelson + * - Added support for Adaptive load balancing mode which is + * equivalent to Transmit load balancing + Receive load balancing. + * new version - 2.2.0 + * + * 2003/05/15 - Jay Vosburgh + * - Applied fix to activebackup_arp_monitor posted to bonding-devel + * by Tony Cureington . Fixes ARP + * monitor endless failover bug. Version to 2.2.10 + * + * 2003/05/20 - Amir Noam + * - Fixed bug in ABI version control - Don't commit to a specific + * ABI version if receiving unsupported ioctl commands. + * + * 2003/05/22 - Jay Vosburgh + * - In conjunction with fix for ifenslave -c, in + * bond_change_active(), changing to the already active slave + * is no longer an error (it successfully does nothing). + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include "bonding.h" +#include "bond_3ad.h" +#include "bond_alb.h" + +#define DRV_VERSION "2.2.11" +#define DRV_RELDATE "May 29, 2003" +#define DRV_NAME "bonding" +#define DRV_DESCRIPTION "Ethernet Channel Bonding Driver" + +static const char *version = +DRV_NAME ".c:v" DRV_VERSION " (" DRV_RELDATE ")\n"; + +/* monitor all links that often (in milliseconds). <=0 disables monitoring */ +#ifndef BOND_LINK_MON_INTERV +#define BOND_LINK_MON_INTERV 0 +#endif + +#ifndef BOND_LINK_ARP_INTERV +#define BOND_LINK_ARP_INTERV 0 +#endif + +#ifndef MAX_ARP_IP_TARGETS +#define MAX_ARP_IP_TARGETS 16 +#endif + +struct bond_parm_tbl { + char *modename; + int mode; +}; + +static int arp_interval = BOND_LINK_ARP_INTERV; +static char *arp_ip_target[MAX_ARP_IP_TARGETS] = { NULL, }; +static unsigned long arp_target[MAX_ARP_IP_TARGETS] = { 0, } ; +static int arp_ip_count = 0; +static u32 my_ip = 0; +char *arp_target_hw_addr = NULL; + +static char *primary= NULL; + +static int app_abi_ver = 0; +static int orig_app_abi_ver = -1; /* This is used to save the first ABI version + * we receive from the application. Once set, + * it won't be changed, and the module will + * refuse to enslave/release interfaces if the + * command comes from an application using + * another ABI version. + */ + +static int max_bonds = BOND_DEFAULT_MAX_BONDS; +static int miimon = BOND_LINK_MON_INTERV; +static int use_carrier = 1; +static int bond_mode = BOND_MODE_ROUNDROBIN; +static int updelay = 0; +static int downdelay = 0; + +static char *mode = NULL; + +static struct bond_parm_tbl bond_mode_tbl[] = { +{ "balance-rr", BOND_MODE_ROUNDROBIN}, +{ "active-backup", BOND_MODE_ACTIVEBACKUP}, +{ "balance-xor", BOND_MODE_XOR}, +{ "broadcast", BOND_MODE_BROADCAST}, +{ "802.3ad", BOND_MODE_8023AD}, +{ "balance-tlb", BOND_MODE_TLB}, +{ "balance-alb", BOND_MODE_ALB}, +{ NULL, -1}, +}; + +static int multicast_mode = BOND_MULTICAST_ALL; +static char *multicast = NULL; + +static struct bond_parm_tbl bond_mc_tbl[] = { +{ "disabled", BOND_MULTICAST_DISABLED}, +{ "active", BOND_MULTICAST_ACTIVE}, +{ "all", BOND_MULTICAST_ALL}, +{ NULL, -1}, +}; + +static int lacp_fast = 0; +static char *lacp_rate = NULL; + +static struct bond_parm_tbl bond_lacp_tbl[] = { +{ "slow", AD_LACP_SLOW}, +{ "fast", AD_LACP_FAST}, +{ NULL, -1}, +}; + +static int first_pass = 1; +static struct bonding *these_bonds = NULL; +static struct net_device *dev_bonds = NULL; + +MODULE_PARM(max_bonds, "i"); +MODULE_PARM_DESC(max_bonds, "Max number of bonded devices"); +MODULE_PARM(miimon, "i"); +MODULE_PARM_DESC(miimon, "Link check interval in milliseconds"); +MODULE_PARM(use_carrier, "i"); +MODULE_PARM_DESC(use_carrier, "Use netif_carrier_ok (vs MII ioctls) in miimon; 09 for off, 1 for on (default)"); +MODULE_PARM(mode, "s"); +MODULE_PARM_DESC(mode, "Mode of operation : 0 for round robin, 1 for active-backup, 2 for xor"); +MODULE_PARM(arp_interval, "i"); +MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds"); +MODULE_PARM(arp_ip_target, "1-" __MODULE_STRING(MAX_ARP_IP_TARGETS) "s"); +MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form"); +MODULE_PARM(updelay, "i"); +MODULE_PARM_DESC(updelay, "Delay before considering link up, in milliseconds"); +MODULE_PARM(downdelay, "i"); +MODULE_PARM_DESC(downdelay, "Delay before considering link down, in milliseconds"); +MODULE_PARM(primary, "s"); +MODULE_PARM_DESC(primary, "Primary network device to use"); +MODULE_PARM(multicast, "s"); +MODULE_PARM_DESC(multicast, "Mode for multicast support : 0 for none, 1 for active slave, 2 for all slaves (default)"); +MODULE_PARM(lacp_rate, "s"); +MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner (slow/fast)"); + +static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *dev); +static int bond_xmit_xor(struct sk_buff *skb, struct net_device *dev); +static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *dev); +static struct net_device_stats *bond_get_stats(struct net_device *dev); +static void bond_mii_monitor(struct net_device *dev); +static void loadbalance_arp_monitor(struct net_device *dev); +static void activebackup_arp_monitor(struct net_device *dev); +static int bond_event(struct notifier_block *this, unsigned long event, void *ptr); +static void bond_mc_list_destroy(struct bonding *bond); +static void bond_mc_add(bonding_t *bond, void *addr, int alen); +static void bond_mc_delete(bonding_t *bond, void *addr, int alen); +static int bond_mc_list_copy (struct dev_mc_list *src, struct bonding *dst, int gpf_flag); +static inline int dmi_same(struct dev_mc_list *dmi1, struct dev_mc_list *dmi2); +static void bond_set_promiscuity(bonding_t *bond, int inc); +static void bond_set_allmulti(bonding_t *bond, int inc); +static struct dev_mc_list* bond_mc_list_find_dmi(struct dev_mc_list *dmi, struct dev_mc_list *mc_list); +static void bond_mc_update(bonding_t *bond, slave_t *new, slave_t *old); +static int bond_enslave(struct net_device *master, struct net_device *slave); +static int bond_release(struct net_device *master, struct net_device *slave); +static int bond_release_all(struct net_device *master); +static int bond_sethwaddr(struct net_device *master, struct net_device *slave); + +/* + * bond_get_info is the interface into the /proc filesystem. This is + * a different interface than the BOND_INFO_QUERY ioctl. That is done + * through the generic networking ioctl interface, and bond_info_query + * is the internal function which provides that information. + */ +static int bond_get_info(char *buf, char **start, off_t offset, int length); + +/* Caller must hold bond->ptrlock for write */ +static inline struct slave* +bond_assign_current_slave(struct bonding *bond,struct slave *newslave) +{ + if ((bond_mode == BOND_MODE_TLB) || + (bond_mode == BOND_MODE_ALB)) { + bond_alb_assign_current_slave(bond, newslave); + } else { + bond->current_slave = newslave; + } + + return bond->current_slave; +} + +/* #define BONDING_DEBUG 1 */ + +/* several macros */ + +static void arp_send_all(slave_t *slave) +{ + int i; + + for (i = 0; (idev, + my_ip, arp_target_hw_addr, slave->dev->dev_addr, + arp_target_hw_addr); + } +} + + +static const char * +bond_mode_name(void) +{ + switch (bond_mode) { + case BOND_MODE_ROUNDROBIN : + return "load balancing (round-robin)"; + case BOND_MODE_ACTIVEBACKUP : + return "fault-tolerance (active-backup)"; + case BOND_MODE_XOR : + return "load balancing (xor)"; + case BOND_MODE_BROADCAST : + return "fault-tolerance (broadcast)"; + case BOND_MODE_8023AD: + return "IEEE 802.3ad Dynamic link aggregation"; + case BOND_MODE_TLB: + return "transmit load balancing"; + case BOND_MODE_ALB: + return "adaptive load balancing"; + default: + return "unknown"; + } +} + +static const char * +multicast_mode_name(void) +{ + switch(multicast_mode) { + case BOND_MULTICAST_DISABLED : + return "disabled"; + case BOND_MULTICAST_ACTIVE : + return "active slave only"; + case BOND_MULTICAST_ALL : + return "all slaves"; + default : + return "unknown"; + } +} + +void bond_set_slave_inactive_flags(slave_t *slave) +{ + slave->state = BOND_STATE_BACKUP; + slave->dev->flags |= IFF_NOARP; +} + +void bond_set_slave_active_flags(slave_t *slave) +{ + slave->state = BOND_STATE_ACTIVE; + slave->dev->flags &= ~IFF_NOARP; +} + +/* + * This function counts and verifies the the number of attached + * slaves, checking the count against the expected value (given that incr + * is either 1 or -1, for add or removal of a slave). Only + * bond_xmit_xor() uses the slave_cnt value, but this is still a good + * consistency check. + */ +static inline void +update_slave_cnt(bonding_t *bond, int incr) +{ + slave_t *slave = NULL; + int expect = bond->slave_cnt + incr; + + bond->slave_cnt = 0; + for (slave = bond->prev; slave != (slave_t*)bond; + slave = slave->prev) { + bond->slave_cnt++; + } + + if (expect != bond->slave_cnt) + BUG(); +} + +/* + * Set MAC. Differs from eth_mac_addr in that we allow changes while + * netif_running(). + */ +static int +bond_set_mac_address(struct net_device *dev, void *p) +{ + struct sockaddr *addr = p; + + memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); + return 0; +} + +/* + * This function detaches the slave from the list . + * WARNING: no check is made to verify if the slave effectively + * belongs to . It returns in case it's needed. + * Nothing is freed on return, structures are just unchained. + * If the bond->current_slave pointer was pointing to , + * it's replaced with bond->next, or NULL if not applicable. + * + * bond->lock held for writing by caller. + */ +static slave_t * +bond_detach_slave(bonding_t *bond, slave_t *slave) +{ + if ((bond == NULL) || (slave == NULL) || + ((void *)bond == (void *)slave)) { + printk(KERN_ERR + "bond_detach_slave(): trying to detach " + "slave %p from bond %p\n", bond, slave); + return slave; + } + + if (bond->next == slave) { /* is the slave at the head ? */ + if (bond->prev == slave) { /* is the slave alone ? */ + bond->prev = bond->next = (slave_t *)bond; + } else { /* not alone */ + bond->next = slave->next; + slave->next->prev = (slave_t *)bond; + bond->prev->next = slave->next; + } + } else { + slave->prev->next = slave->next; + if (bond->prev == slave) { /* is this slave the last one ? */ + bond->prev = slave->prev; + } else { + slave->next->prev = slave->prev; + } + } + + update_slave_cnt(bond, -1); + + /* no need to hold ptrlock since bond lock is + * already held for writing + */ + if (slave == bond->current_slave) { + if ( bond->next != (slave_t *)bond) { /* found one slave */ + bond_assign_current_slave(bond, bond->next); + } else { + bond_assign_current_slave(bond, NULL); + } + } + + return slave; +} + +/* + * This function attaches the slave to the list . + * + * bond->lock held for writing by caller. + */ +static void +bond_attach_slave(struct bonding *bond, struct slave *new_slave) +{ + /* + * queue to the end of the slaves list, make the first element its + * successor, the last one its predecessor, and make it the bond's + * predecessor. + * + * Just to clarify, so future bonding driver hackers don't go through + * the same confusion stage I did trying to figure this out, the + * slaves are stored in a double linked circular list, sortof. + * In the ->next direction, the last slave points to the first slave, + * bypassing bond; only the slaves are in the ->next direction. + * In the ->prev direction, however, the first slave points to bond + * and bond points to the last slave. + * + * It looks like a circle with a little bubble hanging off one side + * in the ->prev direction only. + * + * When going through the list once, its best to start at bond->prev + * and go in the ->prev direction, testing for bond. Doing this + * in the ->next direction doesn't work. Trust me, I know this now. + * :) -mts 2002.03.14 + */ + new_slave->prev = bond->prev; + new_slave->prev->next = new_slave; + bond->prev = new_slave; + new_slave->next = bond->next; + + update_slave_cnt(bond, 1); +} + + +/* + * Less bad way to call ioctl from within the kernel; this needs to be + * done some other way to get the call out of interrupt context. + * Needs "ioctl" variable to be supplied by calling context. + */ +#define IOCTL(dev, arg, cmd) ({ \ + int ret; \ + mm_segment_t fs = get_fs(); \ + set_fs(get_ds()); \ + ret = ioctl(dev, arg, cmd); \ + set_fs(fs); \ + ret; }) + +/* + * Get link speed and duplex from the slave's base driver + * using ethtool. If for some reason the call fails or the + * values are invalid, fake speed and duplex to 100/Full + * and return error. + */ +static int bond_update_speed_duplex(struct slave *slave) +{ + struct net_device *dev = slave->dev; + static int (* ioctl)(struct net_device *, struct ifreq *, int); + struct ifreq ifr; + struct ethtool_cmd etool; + + ioctl = dev->do_ioctl; + if (ioctl) { + etool.cmd = ETHTOOL_GSET; + ifr.ifr_data = (char*)&etool; + if (IOCTL(dev, &ifr, SIOCETHTOOL) == 0) { + slave->speed = etool.speed; + slave->duplex = etool.duplex; + } else { + goto err_out; + } + } else { + goto err_out; + } + + switch (slave->speed) { + case SPEED_10: + case SPEED_100: + case SPEED_1000: + break; + default: + goto err_out; + } + + switch (slave->duplex) { + case DUPLEX_FULL: + case DUPLEX_HALF: + break; + default: + goto err_out; + } + + return 0; + +err_out: + /* Fake speed and duplex */ + slave->speed = SPEED_100; + slave->duplex = DUPLEX_FULL; + return -1; +} + +/* + * if supports MII link status reporting, check its link status. + * + * We either do MII/ETHTOOL ioctls, or check netif_carrier_ok(), + * depening upon the setting of the use_carrier parameter. + * + * Return either BMSR_LSTATUS, meaning that the link is up (or we + * can't tell and just pretend it is), or 0, meaning that the link is + * down. + * + * If reporting is non-zero, instead of faking link up, return -1 if + * both ETHTOOL and MII ioctls fail (meaning the device does not + * support them). If use_carrier is set, return whatever it says. + * It'd be nice if there was a good way to tell if a driver supports + * netif_carrier, but there really isn't. + */ +static int +bond_check_dev_link(struct net_device *dev, int reporting) +{ + static int (* ioctl)(struct net_device *, struct ifreq *, int); + struct ifreq ifr; + struct mii_ioctl_data *mii; + struct ethtool_value etool; + + if (use_carrier) { + return netif_carrier_ok(dev) ? BMSR_LSTATUS : 0; + } + + ioctl = dev->do_ioctl; + if (ioctl) { + /* TODO: set pointer to correct ioctl on a per team member */ + /* bases to make this more efficient. that is, once */ + /* we determine the correct ioctl, we will always */ + /* call it and not the others for that team */ + /* member. */ + + /* + * We cannot assume that SIOCGMIIPHY will also read a + * register; not all network drivers (e.g., e100) + * support that. + */ + + /* Yes, the mii is overlaid on the ifreq.ifr_ifru */ + mii = (struct mii_ioctl_data *)&ifr.ifr_data; + if (IOCTL(dev, &ifr, SIOCGMIIPHY) == 0) { + mii->reg_num = MII_BMSR; + if (IOCTL(dev, &ifr, SIOCGMIIREG) == 0) { + return mii->val_out & BMSR_LSTATUS; + } + } + + /* try SIOCETHTOOL ioctl, some drivers cache ETHTOOL_GLINK */ + /* for a period of time so we attempt to get link status */ + /* from it last if the above MII ioctls fail... */ + etool.cmd = ETHTOOL_GLINK; + ifr.ifr_data = (char*)&etool; + if (IOCTL(dev, &ifr, SIOCETHTOOL) == 0) { + if (etool.data == 1) { + return BMSR_LSTATUS; + } else { +#ifdef BONDING_DEBUG + printk(KERN_INFO + ":: SIOCETHTOOL shows link down \n"); +#endif + return 0; + } + } + + } + + /* + * If reporting, report that either there's no dev->do_ioctl, + * or both SIOCGMIIREG and SIOCETHTOOL failed (meaning that we + * cannot report link status). If not reporting, pretend + * we're ok. + */ + return reporting ? -1 : BMSR_LSTATUS; +} + +static u16 bond_check_mii_link(bonding_t *bond) +{ + int has_active_interface = 0; + + read_lock_bh(&bond->lock); + read_lock(&bond->ptrlock); + has_active_interface = (bond->current_slave != NULL); + read_unlock(&bond->ptrlock); + read_unlock_bh(&bond->lock); + + return (has_active_interface ? BMSR_LSTATUS : 0); +} + +/* register to receive lacpdus on a bond */ +static void bond_register_lacpdu(struct bonding *bond) +{ + struct packet_type* pk_type = &(BOND_AD_INFO(bond).ad_pkt_type); + + /* initialize packet type */ + pk_type->type = PKT_TYPE_LACPDU; + pk_type->dev = bond->device; + pk_type->func = bond_3ad_lacpdu_recv; + pk_type->data = (void*)1; /* understand shared skbs */ + + dev_add_pack(pk_type); +} + +/* unregister to receive lacpdus on a bond */ +static void bond_unregister_lacpdu(struct bonding *bond) +{ + dev_remove_pack(&(BOND_AD_INFO(bond).ad_pkt_type)); +} + +static int bond_open(struct net_device *dev) +{ + struct bonding *bond = (struct bonding *)(dev->priv); + struct timer_list *timer = &((struct bonding *)(dev->priv))->mii_timer; + struct timer_list *arp_timer = &((struct bonding *)(dev->priv))->arp_timer; + + if ((bond_mode == BOND_MODE_TLB) || + (bond_mode == BOND_MODE_ALB)) { + struct timer_list *alb_timer = &(BOND_ALB_INFO(bond).alb_timer); + + /* bond_alb_initialize must be called before the timer + * is started. + */ + if (bond_alb_initialize(bond, (bond_mode == BOND_MODE_ALB))) { + /* something went wrong - fail the open operation */ + return -1; + } + + init_timer(alb_timer); + alb_timer->expires = jiffies + 1; + alb_timer->data = (unsigned long)bond; + alb_timer->function = (void *)&bond_alb_monitor; + add_timer(alb_timer); + } + + MOD_INC_USE_COUNT; + + if (miimon > 0) { /* link check interval, in milliseconds. */ + init_timer(timer); + timer->expires = jiffies + (miimon * HZ / 1000); + timer->data = (unsigned long)dev; + timer->function = (void *)&bond_mii_monitor; + add_timer(timer); + } + + if (arp_interval> 0) { /* arp interval, in milliseconds. */ + init_timer(arp_timer); + arp_timer->expires = jiffies + (arp_interval * HZ / 1000); + arp_timer->data = (unsigned long)dev; + if (bond_mode == BOND_MODE_ACTIVEBACKUP) { + arp_timer->function = (void *)&activebackup_arp_monitor; + } else { + arp_timer->function = (void *)&loadbalance_arp_monitor; + } + add_timer(arp_timer); + } + + if (bond_mode == BOND_MODE_8023AD) { + struct timer_list *ad_timer = &(BOND_AD_INFO(bond).ad_timer); + init_timer(ad_timer); + ad_timer->expires = jiffies + (AD_TIMER_INTERVAL * HZ / 1000); + ad_timer->data = (unsigned long)bond; + ad_timer->function = (void *)&bond_3ad_state_machine_handler; + add_timer(ad_timer); + + /* register to receive LACPDUs */ + bond_register_lacpdu(bond); + } + + return 0; +} + +static int bond_close(struct net_device *master) +{ + bonding_t *bond = (struct bonding *) master->priv; + + write_lock_bh(&bond->lock); + + if (miimon > 0) { /* link check interval, in milliseconds. */ + del_timer(&bond->mii_timer); + } + if (arp_interval> 0) { /* arp interval, in milliseconds. */ + del_timer(&bond->arp_timer); + if (arp_target_hw_addr != NULL) { + kfree(arp_target_hw_addr); + arp_target_hw_addr = NULL; + } + } + + if (bond_mode == BOND_MODE_8023AD) { + del_timer_sync(&(BOND_AD_INFO(bond).ad_timer)); + + /* Unregister the receive of LACPDUs */ + bond_unregister_lacpdu(bond); + } + + bond_mc_list_destroy (bond); + + write_unlock_bh(&bond->lock); + + /* Release the bonded slaves */ + bond_release_all(master); + + if ((bond_mode == BOND_MODE_TLB) || + (bond_mode == BOND_MODE_ALB)) { + del_timer_sync(&(BOND_ALB_INFO(bond).alb_timer)); + + bond_alb_deinitialize(bond); + } + + MOD_DEC_USE_COUNT; + return 0; +} + +/* + * flush all members of flush->mc_list from device dev->mc_list + */ +static void bond_mc_list_flush(struct net_device *dev, struct net_device *flush) +{ + struct dev_mc_list *dmi; + + for (dmi = flush->mc_list; dmi != NULL; dmi = dmi->next) + dev_mc_delete(dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); + + if (bond_mode == BOND_MODE_8023AD) { + /* del lacpdu mc addr from mc list */ + u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR; + + dev_mc_delete(dev, lacpdu_multicast, ETH_ALEN, 0); + } +} + +/* + * Totally destroys the mc_list in bond + */ +static void bond_mc_list_destroy(struct bonding *bond) +{ + struct dev_mc_list *dmi; + + dmi = bond->mc_list; + while (dmi) { + bond->mc_list = dmi->next; + kfree(dmi); + dmi = bond->mc_list; + } +} + +/* + * Add a Multicast address to every slave in the bonding group + */ +static void bond_mc_add(bonding_t *bond, void *addr, int alen) +{ + slave_t *slave; + switch (multicast_mode) { + case BOND_MULTICAST_ACTIVE : + /* write lock already acquired */ + if (bond->current_slave != NULL) + dev_mc_add(bond->current_slave->dev, addr, alen, 0); + break; + case BOND_MULTICAST_ALL : + for (slave = bond->prev; slave != (slave_t*)bond; slave = slave->prev) + dev_mc_add(slave->dev, addr, alen, 0); + break; + case BOND_MULTICAST_DISABLED : + break; + } +} + +/* + * Remove a multicast address from every slave in the bonding group + */ +static void bond_mc_delete(bonding_t *bond, void *addr, int alen) +{ + slave_t *slave; + switch (multicast_mode) { + case BOND_MULTICAST_ACTIVE : + /* write lock already acquired */ + if (bond->current_slave != NULL) + dev_mc_delete(bond->current_slave->dev, addr, alen, 0); + break; + case BOND_MULTICAST_ALL : + for (slave = bond->prev; slave != (slave_t*)bond; slave = slave->prev) + dev_mc_delete(slave->dev, addr, alen, 0); + break; + case BOND_MULTICAST_DISABLED : + break; + } +} + +/* + * Copy all the Multicast addresses from src to the bonding device dst + */ +static int bond_mc_list_copy (struct dev_mc_list *src, struct bonding *dst, + int gpf_flag) +{ + struct dev_mc_list *dmi, *new_dmi; + + for (dmi = src; dmi != NULL; dmi = dmi->next) { + new_dmi = kmalloc(sizeof(struct dev_mc_list), gpf_flag); + + if (new_dmi == NULL) { + return -ENOMEM; + } + + new_dmi->next = dst->mc_list; + dst->mc_list = new_dmi; + + new_dmi->dmi_addrlen = dmi->dmi_addrlen; + memcpy(new_dmi->dmi_addr, dmi->dmi_addr, dmi->dmi_addrlen); + new_dmi->dmi_users = dmi->dmi_users; + new_dmi->dmi_gusers = dmi->dmi_gusers; + } + return 0; +} + +/* + * Returns 0 if dmi1 and dmi2 are the same, non-0 otherwise + */ +static inline int dmi_same(struct dev_mc_list *dmi1, struct dev_mc_list *dmi2) +{ + return memcmp(dmi1->dmi_addr, dmi2->dmi_addr, dmi1->dmi_addrlen) == 0 && + dmi1->dmi_addrlen == dmi2->dmi_addrlen; +} + +/* + * Push the promiscuity flag down to all slaves + */ +static void bond_set_promiscuity(bonding_t *bond, int inc) +{ + slave_t *slave; + switch (multicast_mode) { + case BOND_MULTICAST_ACTIVE : + /* write lock already acquired */ + if (bond->current_slave != NULL) + dev_set_promiscuity(bond->current_slave->dev, inc); + break; + case BOND_MULTICAST_ALL : + for (slave = bond->prev; slave != (slave_t*)bond; slave = slave->prev) + dev_set_promiscuity(slave->dev, inc); + break; + case BOND_MULTICAST_DISABLED : + break; + } +} + +/* + * Push the allmulti flag down to all slaves + */ +static void bond_set_allmulti(bonding_t *bond, int inc) +{ + slave_t *slave; + switch (multicast_mode) { + case BOND_MULTICAST_ACTIVE : + /* write lock already acquired */ + if (bond->current_slave != NULL) + dev_set_allmulti(bond->current_slave->dev, inc); + break; + case BOND_MULTICAST_ALL : + for (slave = bond->prev; slave != (slave_t*)bond; slave = slave->prev) + dev_set_allmulti(slave->dev, inc); + break; + case BOND_MULTICAST_DISABLED : + break; + } +} + +/* + * returns dmi entry if found, NULL otherwise + */ +static struct dev_mc_list* bond_mc_list_find_dmi(struct dev_mc_list *dmi, + struct dev_mc_list *mc_list) +{ + struct dev_mc_list *idmi; + + for (idmi = mc_list; idmi != NULL; idmi = idmi->next) { + if (dmi_same(dmi, idmi)) { + return idmi; + } + } + return NULL; +} + +static void set_multicast_list(struct net_device *master) +{ + bonding_t *bond = master->priv; + struct dev_mc_list *dmi; + + if (multicast_mode == BOND_MULTICAST_DISABLED) + return; + /* + * Lock the private data for the master + */ + write_lock_bh(&bond->lock); + + /* set promiscuity flag to slaves */ + if ( (master->flags & IFF_PROMISC) && !(bond->flags & IFF_PROMISC) ) + bond_set_promiscuity(bond, 1); + + if ( !(master->flags & IFF_PROMISC) && (bond->flags & IFF_PROMISC) ) + bond_set_promiscuity(bond, -1); + + /* set allmulti flag to slaves */ + if ( (master->flags & IFF_ALLMULTI) && !(bond->flags & IFF_ALLMULTI) ) + bond_set_allmulti(bond, 1); + + if ( !(master->flags & IFF_ALLMULTI) && (bond->flags & IFF_ALLMULTI) ) + bond_set_allmulti(bond, -1); + + bond->flags = master->flags; + + /* looking for addresses to add to slaves' mc list */ + for (dmi = master->mc_list; dmi != NULL; dmi = dmi->next) { + if (bond_mc_list_find_dmi(dmi, bond->mc_list) == NULL) + bond_mc_add(bond, dmi->dmi_addr, dmi->dmi_addrlen); + } + + /* looking for addresses to delete from slaves' list */ + for (dmi = bond->mc_list; dmi != NULL; dmi = dmi->next) { + if (bond_mc_list_find_dmi(dmi, master->mc_list) == NULL) + bond_mc_delete(bond, dmi->dmi_addr, dmi->dmi_addrlen); + } + + + /* save master's multicast list */ + bond_mc_list_destroy (bond); + bond_mc_list_copy (master->mc_list, bond, GFP_ATOMIC); + + write_unlock_bh(&bond->lock); +} + +/* + * Update the mc list and multicast-related flags for the new and + * old active slaves (if any) according to the multicast mode + */ +static void bond_mc_update(bonding_t *bond, slave_t *new, slave_t *old) +{ + struct dev_mc_list *dmi; + + switch(multicast_mode) { + case BOND_MULTICAST_ACTIVE : + if (bond->device->flags & IFF_PROMISC) { + if (old != NULL && new != old) + dev_set_promiscuity(old->dev, -1); + dev_set_promiscuity(new->dev, 1); + } + if (bond->device->flags & IFF_ALLMULTI) { + if (old != NULL && new != old) + dev_set_allmulti(old->dev, -1); + dev_set_allmulti(new->dev, 1); + } + /* first remove all mc addresses from old slave if any, + and _then_ add them to new active slave */ + if (old != NULL && new != old) { + for (dmi = bond->device->mc_list; dmi != NULL; dmi = dmi->next) + dev_mc_delete(old->dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); + } + for (dmi = bond->device->mc_list; dmi != NULL; dmi = dmi->next) + dev_mc_add(new->dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); + break; + case BOND_MULTICAST_ALL : + /* nothing to do: mc list is already up-to-date on all slaves */ + break; + case BOND_MULTICAST_DISABLED : + break; + } +} + +/* enslave device to bond device */ +static int bond_enslave(struct net_device *master_dev, + struct net_device *slave_dev) +{ + bonding_t *bond = NULL; + slave_t *new_slave = NULL; + unsigned long rflags = 0; + int err = 0; + struct dev_mc_list *dmi; + struct in_ifaddr **ifap; + struct in_ifaddr *ifa; + int link_reporting; + struct sockaddr addr; + + if (master_dev == NULL || slave_dev == NULL) { + return -ENODEV; + } + bond = (struct bonding *) master_dev->priv; + + if (slave_dev->do_ioctl == NULL) { + printk(KERN_DEBUG + "Warning : no link monitoring support for %s\n", + slave_dev->name); + } + + + /* bond must be initialized by bond_open() before enslaving */ + if (!(master_dev->flags & IFF_UP)) { +#ifdef BONDING_DEBUG + printk(KERN_CRIT "Error, master_dev is not up\n"); +#endif + return -EPERM; + } + + /* already enslaved */ + if (master_dev->flags & IFF_SLAVE || slave_dev->flags & IFF_SLAVE) { +#ifdef BONDING_DEBUG + printk(KERN_CRIT "Error, Device was already enslaved\n"); +#endif + return -EBUSY; + } + + if (app_abi_ver >= 1) { + /* The application is using an ABI, which requires the + * slave interface to be closed. + */ + if ((slave_dev->flags & IFF_UP)) { +#ifdef BONDING_DEBUG + printk(KERN_CRIT "Error, slave_dev is up\n"); +#endif + return -EPERM; + } + + if (slave_dev->set_mac_address == NULL) { + printk(KERN_CRIT + "The slave device you specified does not support" + " setting the MAC address.\n"); + printk(KERN_CRIT + "Your kernel likely does not support slave" + " devices.\n"); + + return -EOPNOTSUPP; + } + } else { + /* The application is not using an ABI, which requires the + * slave interface to be open. + */ + if (!(slave_dev->flags & IFF_UP)) { +#ifdef BONDING_DEBUG + printk(KERN_CRIT "Error, slave_dev is not running\n"); +#endif + return -EINVAL; + } + + if ((bond_mode == BOND_MODE_8023AD) || + (bond_mode == BOND_MODE_TLB) || + (bond_mode == BOND_MODE_ALB)) { + printk(KERN_ERR + "bonding: Error: to use %s mode, you must " + "upgrade ifenslave.\n", bond_mode_name()); + return -EOPNOTSUPP; + } + } + + if ((new_slave = kmalloc(sizeof(slave_t), GFP_KERNEL)) == NULL) { + return -ENOMEM; + } + memset(new_slave, 0, sizeof(slave_t)); + + /* save slave's original flags before calling + * netdev_set_master and dev_open + */ + new_slave->original_flags = slave_dev->flags; + + if (app_abi_ver >= 1) { + /* save slave's original ("permanent") mac address for + * modes that needs it, and for restoring it upon release, + * and then set it to the master's address + */ + memcpy(new_slave->perm_hwaddr, slave_dev->dev_addr, ETH_ALEN); + + if (bond->slave_cnt > 0) { + /* set slave to master's mac address + * The application already set the master's + * mac address to that of the first slave + */ + memcpy(addr.sa_data, master_dev->dev_addr, ETH_ALEN); + addr.sa_family = slave_dev->type; + err = slave_dev->set_mac_address(slave_dev, &addr); + if (err) { +#ifdef BONDING_DEBUG + printk(KERN_CRIT "Error %d calling set_mac_address\n", err); +#endif + goto err_free; + } + } + + /* open the slave since the application closed it */ + err = dev_open(slave_dev); + if (err) { +#ifdef BONDING_DEBUG + printk(KERN_CRIT "Openning slave %s failed\n", slave_dev->name); +#endif + goto err_restore_mac; + } + } + + err = netdev_set_master(slave_dev, master_dev); + if (err) { +#ifdef BONDING_DEBUG + printk(KERN_CRIT "Error %d calling netdev_set_master\n", err); +#endif + if (app_abi_ver < 1) { + goto err_free; + } else { + goto err_close; + } + } + + new_slave->dev = slave_dev; + + if ((bond_mode == BOND_MODE_TLB) || + (bond_mode == BOND_MODE_ALB)) { + /* bond_alb_init_slave() must be called before all other stages since + * it might fail and we do not want to have to undo everything + */ + err = bond_alb_init_slave(bond, new_slave); + if (err) { + goto err_unset_master; + } + } + + if (multicast_mode == BOND_MULTICAST_ALL) { + /* set promiscuity level to new slave */ + if (master_dev->flags & IFF_PROMISC) + dev_set_promiscuity(slave_dev, 1); + + /* set allmulti level to new slave */ + if (master_dev->flags & IFF_ALLMULTI) + dev_set_allmulti(slave_dev, 1); + + /* upload master's mc_list to new slave */ + for (dmi = master_dev->mc_list; dmi != NULL; dmi = dmi->next) + dev_mc_add (slave_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); + } + + if (bond_mode == BOND_MODE_8023AD) { + /* add lacpdu mc addr to mc list */ + u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR; + + dev_mc_add(slave_dev, lacpdu_multicast, ETH_ALEN, 0); + } + + write_lock_bh(&bond->lock); + + bond_attach_slave(bond, new_slave); + new_slave->delay = 0; + new_slave->link_failure_count = 0; + + if (miimon > 0 && !use_carrier) { + link_reporting = bond_check_dev_link(slave_dev, 1); + + if ((link_reporting == -1) && (arp_interval == 0)) { + /* + * miimon is set but a bonded network driver + * does not support ETHTOOL/MII and + * arp_interval is not set. Note: if + * use_carrier is enabled, we will never go + * here (because netif_carrier is always + * supported); thus, we don't need to change + * the messages for netif_carrier. + */ + printk(KERN_ERR + "bond_enslave(): MII and ETHTOOL support not " + "available for interface %s, and " + "arp_interval/arp_ip_target module parameters " + "not specified, thus bonding will not detect " + "link failures! see bonding.txt for details.\n", + slave_dev->name); + } else if (link_reporting == -1) { + /* unable get link status using mii/ethtool */ + printk(KERN_WARNING + "bond_enslave: can't get link status from " + "interface %s; the network driver associated " + "with this interface does not support " + "MII or ETHTOOL link status reporting, thus " + "miimon has no effect on this interface.\n", + slave_dev->name); + } + } + + /* check for initial state */ + if ((miimon <= 0) || + (bond_check_dev_link(slave_dev, 0) == BMSR_LSTATUS)) { + if (updelay) { +#ifdef BONDING_DEBUG + printk(KERN_CRIT "Initial state of slave_dev is " + "BOND_LINK_BACK\n"); +#endif + new_slave->link = BOND_LINK_BACK; + new_slave->delay = updelay; + } + else { +#ifdef BONDING_DEBUG + printk(KERN_DEBUG "Initial state of slave_dev is " + "BOND_LINK_UP\n"); +#endif + new_slave->link = BOND_LINK_UP; + } + new_slave->jiffies = jiffies; + } + else { +#ifdef BONDING_DEBUG + printk(KERN_CRIT "Initial state of slave_dev is " + "BOND_LINK_DOWN\n"); +#endif + new_slave->link = BOND_LINK_DOWN; + } + + if (bond_update_speed_duplex(new_slave) && + (new_slave->link != BOND_LINK_DOWN)) { + + printk(KERN_WARNING + "bond_enslave(): failed to get speed/duplex from %s, " + "speed forced to 100Mbps, duplex forced to Full.\n", + new_slave->dev->name); + if (bond_mode == BOND_MODE_8023AD) { + printk(KERN_WARNING + "Operation of 802.3ad mode requires ETHTOOL support " + "in base driver for proper aggregator selection.\n"); + } + } + + /* if we're in active-backup mode, we need one and only one active + * interface. The backup interfaces will have their NOARP flag set + * because we need them to be completely deaf and not to respond to + * any ARP request on the network to avoid fooling a switch. Thus, + * since we guarantee that current_slave always point to the last + * usable interface, we just have to verify this interface's flag. + */ + if (bond_mode == BOND_MODE_ACTIVEBACKUP) { + if (((bond->current_slave == NULL) + || (bond->current_slave->dev->flags & IFF_NOARP)) + && (new_slave->link != BOND_LINK_DOWN)) { +#ifdef BONDING_DEBUG + printk(KERN_CRIT "This is the first active slave\n"); +#endif + /* first slave or no active slave yet, and this link + is OK, so make this interface the active one */ + bond_assign_current_slave(bond, new_slave); + bond_set_slave_active_flags(new_slave); + bond_mc_update(bond, new_slave, NULL); + } + else { +#ifdef BONDING_DEBUG + printk(KERN_CRIT "This is just a backup slave\n"); +#endif + bond_set_slave_inactive_flags(new_slave); + } + read_lock_irqsave(&(((struct in_device *)slave_dev->ip_ptr)->lock), rflags); + ifap= &(((struct in_device *)slave_dev->ip_ptr)->ifa_list); + ifa = *ifap; + my_ip = ifa->ifa_address; + read_unlock_irqrestore(&(((struct in_device *)slave_dev->ip_ptr)->lock), rflags); + + /* if there is a primary slave, remember it */ + if (primary != NULL) { + if (strcmp(primary, new_slave->dev->name) == 0) { + bond->primary_slave = new_slave; + } + } + } else if (bond_mode == BOND_MODE_8023AD) { + /* in 802.3ad mode, the internal mechanism + * will activate the slaves in the selected + * aggregator + */ + bond_set_slave_inactive_flags(new_slave); + /* if this is the first slave */ + if (new_slave == bond->next) { + SLAVE_AD_INFO(new_slave).id = 1; + /* Initialize AD with the number of times that the AD timer is called in 1 second + * can be called only after the mac address of the bond is set + */ + bond_3ad_initialize(bond, 1000/AD_TIMER_INTERVAL, + lacp_fast); + } else { + SLAVE_AD_INFO(new_slave).id = + SLAVE_AD_INFO(new_slave->prev).id + 1; + } + + bond_3ad_bind_slave(new_slave); + } else if ((bond_mode == BOND_MODE_TLB) || + (bond_mode == BOND_MODE_ALB)) { + new_slave->state = BOND_STATE_ACTIVE; + if ((bond->current_slave == NULL) && (new_slave->link != BOND_LINK_DOWN)) { + /* first slave or no active slave yet, and this link + * is OK, so make this interface the active one + */ + bond_assign_current_slave(bond, new_slave); + } + + /* if there is a primary slave, remember it */ + if (primary != NULL) { + if (strcmp(primary, new_slave->dev->name) == 0) { + bond->primary_slave = new_slave; + } + } + } else { +#ifdef BONDING_DEBUG + printk(KERN_CRIT "This slave is always active in trunk mode\n"); +#endif + /* always active in trunk mode */ + new_slave->state = BOND_STATE_ACTIVE; + if (bond->current_slave == NULL) + bond_assign_current_slave(bond, new_slave); + } + + write_unlock_bh(&bond->lock); + + if (app_abi_ver < 1) { + /* + * !!! This is to support old versions of ifenslave. + * We can remove this in 2.5 because our ifenslave takes + * care of this for us. + * We check to see if the master has a mac address yet. + * If not, we'll give it the mac address of our slave device. + */ + int ndx = 0; + + for (ndx = 0; ndx < slave_dev->addr_len; ndx++) { +#ifdef BONDING_DEBUG + printk(KERN_DEBUG + "Checking ndx=%d of master_dev->dev_addr\n", ndx); +#endif + if (master_dev->dev_addr[ndx] != 0) { +#ifdef BONDING_DEBUG + printk(KERN_DEBUG + "Found non-zero byte at ndx=%d\n", ndx); +#endif + break; + } + } + if (ndx == slave_dev->addr_len) { + /* + * We got all the way through the address and it was + * all 0's. + */ +#ifdef BONDING_DEBUG + printk(KERN_DEBUG "%s doesn't have a MAC address yet. ", + master_dev->name); + printk(KERN_DEBUG "Going to give assign it from %s.\n", + slave_dev->name); +#endif + bond_sethwaddr(master_dev, slave_dev); + } + } + + printk (KERN_INFO "%s: enslaving %s as a%s interface with a%s link.\n", + master_dev->name, slave_dev->name, + new_slave->state == BOND_STATE_ACTIVE ? "n active" : " backup", + new_slave->link != BOND_LINK_DOWN ? "n up" : " down"); + + /* enslave is successful */ + return 0; + +/* Undo stages on error */ +err_unset_master: + netdev_set_master(slave_dev, NULL); + +err_close: + dev_close(slave_dev); + +err_restore_mac: + memcpy(addr.sa_data, new_slave->perm_hwaddr, ETH_ALEN); + addr.sa_family = slave_dev->type; + slave_dev->set_mac_address(slave_dev, &addr); + +err_free: + kfree(new_slave); + return err; +} + +/* + * This function changes the active slave to slave . + * It returns -EINVAL in the following cases. + * - is not found in the list. + * - There is not active slave now. + * - is already active. + * - The link state of is not BOND_LINK_UP. + * - is not running. + * In these cases, this fuction does nothing. + * In the other cases, currnt_slave pointer is changed and 0 is returned. + */ +static int bond_change_active(struct net_device *master_dev, struct net_device *slave_dev) +{ + bonding_t *bond; + slave_t *slave; + slave_t *oldactive = NULL; + slave_t *newactive = NULL; + int ret = 0; + + if (master_dev == NULL || slave_dev == NULL) { + return -ENODEV; + } + + bond = (struct bonding *) master_dev->priv; + write_lock_bh(&bond->lock); + slave = (slave_t *)bond; + oldactive = bond->current_slave; + + while ((slave = slave->prev) != (slave_t *)bond) { + if(slave_dev == slave->dev) { + newactive = slave; + break; + } + } + + /* + * Changing to the current active: do nothing; return success. + */ + if (newactive && (newactive == oldactive)) { + write_unlock_bh(&bond->lock); + return 0; + } + + if ((newactive != NULL)&& + (oldactive != NULL)&& + (newactive->link == BOND_LINK_UP)&& + IS_UP(newactive->dev)) { + bond_set_slave_inactive_flags(oldactive); + bond_set_slave_active_flags(newactive); + bond_mc_update(bond, newactive, oldactive); + bond_assign_current_slave(bond, newactive); + printk("%s : activate %s(old : %s)\n", + master_dev->name, newactive->dev->name, + oldactive->dev->name); + } else { + ret = -EINVAL; + } + write_unlock_bh(&bond->lock); + return ret; +} + +/* Choose a new valid interface from the pool, set it active + * and make it the current slave. If no valid interface is + * found, the oldest slave in BACK state is choosen and + * activated. If none is found, it's considered as no + * interfaces left so the current slave is set to NULL. + * The result is a pointer to the current slave. + * + * Since this function sends messages tails through printk, the caller + * must have started something like `printk(KERN_INFO "xxxx ");'. + * + * Warning: Caller must hold ptrlock for writing. + */ +slave_t *change_active_interface(bonding_t *bond) +{ + slave_t *newslave, *oldslave; + slave_t *bestslave = NULL; + int mintime; + + newslave = oldslave = bond->current_slave; + + if (newslave == NULL) { /* there were no active slaves left */ + if (bond->next != (slave_t *)bond) { /* found one slave */ + newslave = bond_assign_current_slave(bond, bond->next); + } else { + + printk (" but could not find any %s interface.\n", + (bond_mode == BOND_MODE_ACTIVEBACKUP) ? "backup":"other"); + bond_assign_current_slave(bond, NULL); + return NULL; /* still no slave, return NULL */ + } + } else if (bond_mode == BOND_MODE_ACTIVEBACKUP) { + /* make sure oldslave doesn't send arps - this could + * cause a ping-pong effect between interfaces since they + * would be able to tx arps - in active backup only one + * slave should be able to tx arps, and that should be + * the current_slave; the only exception is when all + * slaves have gone down, then only one non-current slave can + * send arps at a time; clearing oldslaves' mc list is handled + * later in this function. + */ + bond_set_slave_inactive_flags(oldslave); + } + + mintime = updelay; + + /* first try the primary link; if arping, a link must tx/rx traffic + * before it can be considered the current_slave - also, we would skip + * slaves between the current_slave and primary_slave that may be up + * and able to arp + */ + if ((bond->primary_slave != NULL) && (arp_interval == 0)) { + if (IS_UP(bond->primary_slave->dev)) + newslave = bond->primary_slave; + } + + do { + if (IS_UP(newslave->dev)) { + if (newslave->link == BOND_LINK_UP) { + /* this one is immediately usable */ + if (bond_mode == BOND_MODE_ACTIVEBACKUP) { + bond_set_slave_active_flags(newslave); + bond_mc_update(bond, newslave, oldslave); + printk (" and making interface %s the active one.\n", + newslave->dev->name); + } + else { + printk (" and setting pointer to interface %s.\n", + newslave->dev->name); + } + + bond_assign_current_slave(bond, newslave); + return newslave; + } + else if (newslave->link == BOND_LINK_BACK) { + /* link up, but waiting for stabilization */ + if (newslave->delay < mintime) { + mintime = newslave->delay; + bestslave = newslave; + } + } + } + } while ((newslave = newslave->next) != oldslave); + + /* no usable backup found, we'll see if we at least got a link that was + coming back for a long time, and could possibly already be usable. + */ + + if (bestslave != NULL) { + /* early take-over. */ + printk (" and making interface %s the active one %d ms earlier.\n", + bestslave->dev->name, + (updelay - bestslave->delay)*miimon); + + bestslave->delay = 0; + bestslave->link = BOND_LINK_UP; + bestslave->jiffies = jiffies; + bond_set_slave_active_flags(bestslave); + bond_mc_update(bond, bestslave, oldslave); + bond_assign_current_slave(bond, bestslave); + return bestslave; + } + + if ((bond_mode == BOND_MODE_ACTIVEBACKUP) && + (multicast_mode == BOND_MULTICAST_ACTIVE) && + (oldslave != NULL)) { + /* flush bonds (master's) mc_list from oldslave since it wasn't + * updated (and deleted) above + */ + bond_mc_list_flush(oldslave->dev, bond->device); + if (bond->device->flags & IFF_PROMISC) { + dev_set_promiscuity(oldslave->dev, -1); + } + if (bond->device->flags & IFF_ALLMULTI) { + dev_set_allmulti(oldslave->dev, -1); + } + } + + printk (" but could not find any %s interface.\n", + (bond_mode == BOND_MODE_ACTIVEBACKUP) ? "backup":"other"); + + /* absolutely nothing found. let's return NULL */ + bond_assign_current_slave(bond, NULL); + return NULL; +} + +/* + * Try to release the slave device from the bond device + * It is legal to access current_slave without a lock because all the function + * is write-locked. + * + * The rules for slave state should be: + * for Active/Backup: + * Active stays on all backups go down + * for Bonded connections: + * The first up interface should be left on and all others downed. + */ +static int bond_release(struct net_device *master, struct net_device *slave) +{ + bonding_t *bond; + slave_t *our_slave, *old_current; + struct sockaddr addr; + + if (master == NULL || slave == NULL) { + return -ENODEV; + } + + bond = (struct bonding *) master->priv; + + /* master already enslaved, or slave not enslaved, + or no slave for this master */ + if ((master->flags & IFF_SLAVE) || !(slave->flags & IFF_SLAVE)) { + printk (KERN_DEBUG "%s: cannot release %s.\n", master->name, slave->name); + return -EINVAL; + } + + write_lock_bh(&bond->lock); + bond->current_arp_slave = NULL; + our_slave = (slave_t *)bond; + old_current = bond->current_slave; + while ((our_slave = our_slave->prev) != (slave_t *)bond) { + if (our_slave->dev == slave) { + int mac_addr_differ = memcmp(bond->device->dev_addr, + our_slave->perm_hwaddr, + ETH_ALEN); + if (!mac_addr_differ && (bond->slave_cnt > 1)) { + printk(KERN_WARNING "WARNING: the permanent HWaddr of %s " + "- %02X:%02X:%02X:%02X:%02X:%02X - " + "is still in use by %s. Set the HWaddr " + "of %s to a different address " + "to avoid conflicts.\n", + slave->name, + slave->dev_addr[0], + slave->dev_addr[1], + slave->dev_addr[2], + slave->dev_addr[3], + slave->dev_addr[4], + slave->dev_addr[5], + bond->device->name, + slave->name); + } + + /* Inform AD package of unbinding of slave. */ + if (bond_mode == BOND_MODE_8023AD) { + /* must be called before the slave is + * detached from the list + */ + bond_3ad_unbind_slave(our_slave); + } + + /* release the slave from its bond */ + bond_detach_slave(bond, our_slave); + + printk (KERN_INFO "%s: releasing %s interface %s", + master->name, + (our_slave->state == BOND_STATE_ACTIVE) ? "active" : "backup", + slave->name); + + if (our_slave == old_current) { + /* find a new interface and be verbose */ + change_active_interface(bond); + } else { + printk(".\n"); + } + + if (bond->current_slave == NULL) { + printk(KERN_INFO + "%s: now running without any active interface !\n", + master->name); + } + + if (bond->primary_slave == our_slave) { + bond->primary_slave = NULL; + } + + if ((bond_mode == BOND_MODE_TLB) || + (bond_mode == BOND_MODE_ALB)) { + /* must be called only after the slave has been + * detached from the list and the current_slave + * has been replaced (if our_slave == old_current) + */ + bond_alb_deinit_slave(bond, our_slave); + } + + break; + } + + } + write_unlock_bh(&bond->lock); + + if (our_slave == (slave_t *)bond) { + /* if we get here, it's because the device was not found */ + printk (KERN_INFO "%s: %s not enslaved\n", master->name, slave->name); + return -EINVAL; + } + + /* undo settings and restore original values */ + + if (multicast_mode == BOND_MULTICAST_ALL) { + /* flush master's mc_list from slave */ + bond_mc_list_flush (slave, master); + + /* unset promiscuity level from slave */ + if (master->flags & IFF_PROMISC) + dev_set_promiscuity(slave, -1); + + /* unset allmulti level from slave */ + if (master->flags & IFF_ALLMULTI) + dev_set_allmulti(slave, -1); + } + + netdev_set_master(slave, NULL); + + /* close slave before restoring its mac address */ + dev_close(slave); + + if (app_abi_ver >= 1) { + /* restore original ("permanent") mac address */ + memcpy(addr.sa_data, our_slave->perm_hwaddr, ETH_ALEN); + addr.sa_family = slave->type; + slave->set_mac_address(slave, &addr); + } + + /* restore the original state of the + * IFF_NOARP flag that might have been + * set by bond_set_slave_inactive_flags() + */ + if ((our_slave->original_flags & IFF_NOARP) == 0) { + slave->flags &= ~IFF_NOARP; + } + + kfree(our_slave); + + /* if the last slave was removed, zero the mac address + * of the master so it will be set by the application + * to the mac address of the first slave + */ + if (bond->next == (slave_t*)bond) { + memset(master->dev_addr, 0, master->addr_len); + } + + return 0; /* deletion OK */ +} + +/* + * This function releases all slaves. + */ +static int bond_release_all(struct net_device *master) +{ + bonding_t *bond; + slave_t *our_slave, *old_current; + struct net_device *slave_dev; + struct sockaddr addr; + int err = 0; + + if (master == NULL) { + return -ENODEV; + } + + if (master->flags & IFF_SLAVE) { + return -EINVAL; + } + + bond = (struct bonding *) master->priv; + + write_lock_bh(&bond->lock); + if (bond->next == (struct slave *) bond) { + err = -EINVAL; + goto out; + } + + old_current = bond->current_slave; + bond_assign_current_slave(bond, NULL); + bond->current_arp_slave = NULL; + bond->primary_slave = NULL; + + while ((our_slave = bond->prev) != (slave_t *)bond) { + /* Inform AD package of unbinding of slave + * before slave is detached from the list. + */ + if (bond_mode == BOND_MODE_8023AD) { + bond_3ad_unbind_slave(our_slave); + } + + slave_dev = our_slave->dev; + bond_detach_slave(bond, our_slave); + + if ((bond_mode == BOND_MODE_TLB) || + (bond_mode == BOND_MODE_ALB)) { + /* must be called only after the slave + * has been detached from the list + */ + bond_alb_deinit_slave(bond, our_slave); + } + + /* now that the slave is detached, unlock and perform + * all the undo steps that should not be called from + * within a lock. + */ + write_unlock_bh(&bond->lock); + + if (multicast_mode == BOND_MULTICAST_ALL + || (multicast_mode == BOND_MULTICAST_ACTIVE + && old_current == our_slave)) { + + /* flush master's mc_list from slave */ + bond_mc_list_flush (slave_dev, master); + + /* unset promiscuity level from slave */ + if (master->flags & IFF_PROMISC) + dev_set_promiscuity(slave_dev, -1); + + /* unset allmulti level from slave */ + if (master->flags & IFF_ALLMULTI) + dev_set_allmulti(slave_dev, -1); + } + + netdev_set_master(slave_dev, NULL); + + /* close slave before restoring its mac address */ + dev_close(slave_dev); + + if (app_abi_ver >= 1) { + /* restore original ("permanent") mac address*/ + memcpy(addr.sa_data, our_slave->perm_hwaddr, ETH_ALEN); + addr.sa_family = slave_dev->type; + slave_dev->set_mac_address(slave_dev, &addr); + } + + /* restore the original state of the IFF_NOARP flag that might have + * been set by bond_set_slave_inactive_flags() + */ + if ((our_slave->original_flags & IFF_NOARP) == 0) { + slave_dev->flags &= ~IFF_NOARP; + } + + kfree(our_slave); + + /* re-acquire the lock before getting the next slave */ + write_lock_bh(&bond->lock); + } + + /* zero the mac address of the master so it will be + * set by the application to the mac address of the + * first slave + */ + memset(master->dev_addr, 0, master->addr_len); + + printk (KERN_INFO "%s: released all slaves\n", master->name); + +out: + write_unlock_bh(&bond->lock); + + return err; +} + +/* this function is called regularly to monitor each slave's link. */ +static void bond_mii_monitor(struct net_device *master) +{ + bonding_t *bond = (struct bonding *) master->priv; + slave_t *slave, *bestslave, *oldcurrent; + int slave_died = 0; + + read_lock(&bond->lock); + + /* we will try to read the link status of each of our slaves, and + * set their IFF_RUNNING flag appropriately. For each slave not + * supporting MII status, we won't do anything so that a user-space + * program could monitor the link itself if needed. + */ + + bestslave = NULL; + slave = (slave_t *)bond; + + read_lock(&bond->ptrlock); + oldcurrent = bond->current_slave; + read_unlock(&bond->ptrlock); + + while ((slave = slave->prev) != (slave_t *)bond) { + /* use updelay+1 to match an UP slave even when updelay is 0 */ + int mindelay = updelay + 1; + struct net_device *dev = slave->dev; + int link_state; + u16 old_speed = slave->speed; + u8 old_duplex = slave->duplex; + + link_state = bond_check_dev_link(dev, 0); + + switch (slave->link) { + case BOND_LINK_UP: /* the link was up */ + if (link_state == BMSR_LSTATUS) { + /* link stays up, tell that this one + is immediately available */ + if (IS_UP(dev) && (mindelay > -2)) { + /* -2 is the best case : + this slave was already up */ + mindelay = -2; + bestslave = slave; + } + break; + } + else { /* link going down */ + slave->link = BOND_LINK_FAIL; + slave->delay = downdelay; + if (slave->link_failure_count < UINT_MAX) { + slave->link_failure_count++; + } + if (downdelay > 0) { + printk (KERN_INFO + "%s: link status down for %sinterface " + "%s, disabling it in %d ms.\n", + master->name, + IS_UP(dev) + ? ((bond_mode == BOND_MODE_ACTIVEBACKUP) + ? ((slave == oldcurrent) + ? "active " : "backup ") + : "") + : "idle ", + dev->name, + downdelay * miimon); + } + } + /* no break ! fall through the BOND_LINK_FAIL test to + ensure proper action to be taken + */ + case BOND_LINK_FAIL: /* the link has just gone down */ + if (link_state != BMSR_LSTATUS) { + /* link stays down */ + if (slave->delay <= 0) { + /* link down for too long time */ + slave->link = BOND_LINK_DOWN; + /* in active/backup mode, we must + * completely disable this interface + */ + if ((bond_mode == BOND_MODE_ACTIVEBACKUP) || + (bond_mode == BOND_MODE_8023AD)) { + bond_set_slave_inactive_flags(slave); + } + printk(KERN_INFO + "%s: link status definitely down " + "for interface %s, disabling it", + master->name, + dev->name); + + /* notify ad that the link status has changed */ + if (bond_mode == BOND_MODE_8023AD) { + bond_3ad_handle_link_change(slave, BOND_LINK_DOWN); + } + + if ((bond_mode == BOND_MODE_TLB) || + (bond_mode == BOND_MODE_ALB)) { + bond_alb_handle_link_change(bond, slave, BOND_LINK_DOWN); + } + + write_lock(&bond->ptrlock); + if (slave == bond->current_slave) { + /* find a new interface and be verbose */ + change_active_interface(bond); + } else { + printk(".\n"); + } + write_unlock(&bond->ptrlock); + slave_died = 1; + } else { + slave->delay--; + } + } else { + /* link up again */ + slave->link = BOND_LINK_UP; + slave->jiffies = jiffies; + printk(KERN_INFO + "%s: link status up again after %d ms " + "for interface %s.\n", + master->name, + (downdelay - slave->delay) * miimon, + dev->name); + + if (IS_UP(dev) && (mindelay > -1)) { + /* -1 is a good case : this slave went + down only for a short time */ + mindelay = -1; + bestslave = slave; + } + } + break; + case BOND_LINK_DOWN: /* the link was down */ + if (link_state != BMSR_LSTATUS) { + /* the link stays down, nothing more to do */ + break; + } else { /* link going up */ + slave->link = BOND_LINK_BACK; + slave->delay = updelay; + + if (updelay > 0) { + /* if updelay == 0, no need to + advertise about a 0 ms delay */ + printk (KERN_INFO + "%s: link status up for interface" + " %s, enabling it in %d ms.\n", + master->name, + dev->name, + updelay * miimon); + } + } + /* no break ! fall through the BOND_LINK_BACK state in + case there's something to do. + */ + case BOND_LINK_BACK: /* the link has just come back */ + if (link_state != BMSR_LSTATUS) { + /* link down again */ + slave->link = BOND_LINK_DOWN; + printk(KERN_INFO + "%s: link status down again after %d ms " + "for interface %s.\n", + master->name, + (updelay - slave->delay) * miimon, + dev->name); + } else { + /* link stays up */ + if (slave->delay == 0) { + /* now the link has been up for long time enough */ + slave->link = BOND_LINK_UP; + slave->jiffies = jiffies; + + if (bond_mode == BOND_MODE_8023AD) { + /* prevent it from being the active one */ + slave->state = BOND_STATE_BACKUP; + } + else if (bond_mode != BOND_MODE_ACTIVEBACKUP) { + /* make it immediately active */ + slave->state = BOND_STATE_ACTIVE; + } else if (slave != bond->primary_slave) { + /* prevent it from being the active one */ + slave->state = BOND_STATE_BACKUP; + } + + printk(KERN_INFO + "%s: link status definitely up " + "for interface %s.\n", + master->name, + dev->name); + + /* notify ad that the link status has changed */ + if (bond_mode == BOND_MODE_8023AD) { + bond_3ad_handle_link_change(slave, BOND_LINK_UP); + } + + if ((bond_mode == BOND_MODE_TLB) || + (bond_mode == BOND_MODE_ALB)) { + bond_alb_handle_link_change(bond, slave, BOND_LINK_UP); + } + + write_lock(&bond->ptrlock); + if ( (bond->primary_slave != NULL) + && (slave == bond->primary_slave) ) + change_active_interface(bond); + write_unlock(&bond->ptrlock); + } + else + slave->delay--; + + /* we'll also look for the mostly eligible slave */ + if (bond->primary_slave == NULL) { + if (IS_UP(dev) && (slave->delay < mindelay)) { + mindelay = slave->delay; + bestslave = slave; + } + } else if ( (IS_UP(bond->primary_slave->dev)) || + ( (!IS_UP(bond->primary_slave->dev)) && + (IS_UP(dev) && (slave->delay < mindelay)) ) ) { + mindelay = slave->delay; + bestslave = slave; + } + } + break; + } /* end of switch */ + + bond_update_speed_duplex(slave); + + if (bond_mode == BOND_MODE_8023AD) { + if (old_speed != slave->speed) { + bond_3ad_adapter_speed_changed(slave); + } + if (old_duplex != slave->duplex) { + bond_3ad_adapter_duplex_changed(slave); + } + } + + } /* end of while */ + + /* + * if there's no active interface and we discovered that one + * of the slaves could be activated earlier, so we do it. + */ + read_lock(&bond->ptrlock); + oldcurrent = bond->current_slave; + read_unlock(&bond->ptrlock); + + /* no active interface at the moment or need to bring up the primary */ + if (oldcurrent == NULL) { /* no active interface at the moment */ + if (bestslave != NULL) { /* last chance to find one ? */ + if (bestslave->link == BOND_LINK_UP) { + printk (KERN_INFO + "%s: making interface %s the new active one.\n", + master->name, bestslave->dev->name); + } else { + printk (KERN_INFO + "%s: making interface %s the new " + "active one %d ms earlier.\n", + master->name, bestslave->dev->name, + (updelay - bestslave->delay) * miimon); + + bestslave->delay = 0; + bestslave->link = BOND_LINK_UP; + bestslave->jiffies = jiffies; + + /* notify ad that the link status has changed */ + if (bond_mode == BOND_MODE_8023AD) { + bond_3ad_handle_link_change(bestslave, BOND_LINK_UP); + } + + if ((bond_mode == BOND_MODE_TLB) || + (bond_mode == BOND_MODE_ALB)) { + bond_alb_handle_link_change(bond, bestslave, BOND_LINK_UP); + } + } + + if (bond_mode == BOND_MODE_ACTIVEBACKUP) { + bond_set_slave_active_flags(bestslave); + bond_mc_update(bond, bestslave, NULL); + } else if (bond_mode != BOND_MODE_8023AD) { + bestslave->state = BOND_STATE_ACTIVE; + } + write_lock(&bond->ptrlock); + bond_assign_current_slave(bond, bestslave); + write_unlock(&bond->ptrlock); + } else if (slave_died) { + /* print this message only once a slave has just died */ + printk(KERN_INFO + "%s: now running without any active interface !\n", + master->name); + } + } + + read_unlock(&bond->lock); + /* re-arm the timer */ + mod_timer(&bond->mii_timer, jiffies + (miimon * HZ / 1000)); +} + +/* + * this function is called regularly to monitor each slave's link + * ensuring that traffic is being sent and received when arp monitoring + * is used in load-balancing mode. if the adapter has been dormant, then an + * arp is transmitted to generate traffic. see activebackup_arp_monitor for + * arp monitoring in active backup mode. + */ +static void loadbalance_arp_monitor(struct net_device *master) +{ + bonding_t *bond; + slave_t *slave; + int the_delta_in_ticks = arp_interval * HZ / 1000; + int next_timer = jiffies + (arp_interval * HZ / 1000); + + bond = (struct bonding *) master->priv; + if (master->priv == NULL) { + mod_timer(&bond->arp_timer, next_timer); + return; + } + + /* TODO: investigate why rtnl_shlock_nowait and rtnl_exlock_nowait + * are called below and add comment why they are required... + */ + if ((!IS_UP(master)) || rtnl_shlock_nowait()) { + mod_timer(&bond->arp_timer, next_timer); + return; + } + + if (rtnl_exlock_nowait()) { + rtnl_shunlock(); + mod_timer(&bond->arp_timer, next_timer); + return; + } + + read_lock(&bond->lock); + + /* see if any of the previous devices are up now (i.e. they have + * xmt and rcv traffic). the current_slave does not come into + * the picture unless it is null. also, slave->jiffies is not needed + * here because we send an arp on each slave and give a slave as + * long as it needs to get the tx/rx within the delta. + * TODO: what about up/down delay in arp mode? it wasn't here before + * so it can wait + */ + slave = (slave_t *)bond; + while ((slave = slave->prev) != (slave_t *)bond) { + + if (slave->link != BOND_LINK_UP) { + + if (((jiffies - slave->dev->trans_start) <= + the_delta_in_ticks) && + ((jiffies - slave->dev->last_rx) <= + the_delta_in_ticks)) { + + slave->link = BOND_LINK_UP; + slave->state = BOND_STATE_ACTIVE; + + /* primary_slave has no meaning in round-robin + * mode. the window of a slave being up and + * current_slave being null after enslaving + * is closed. + */ + write_lock(&bond->ptrlock); + if (bond->current_slave == NULL) { + printk(KERN_INFO + "%s: link status definitely up " + "for interface %s, ", + master->name, + slave->dev->name); + change_active_interface(bond); + } else { + printk(KERN_INFO + "%s: interface %s is now up\n", + master->name, + slave->dev->name); + } + write_unlock(&bond->ptrlock); + } + } else { + /* slave->link == BOND_LINK_UP */ + + /* not all switches will respond to an arp request + * when the source ip is 0, so don't take the link down + * if we don't know our ip yet + */ + if (((jiffies - slave->dev->trans_start) >= + (2*the_delta_in_ticks)) || + (((jiffies - slave->dev->last_rx) >= + (2*the_delta_in_ticks)) && my_ip !=0)) { + slave->link = BOND_LINK_DOWN; + slave->state = BOND_STATE_BACKUP; + if (slave->link_failure_count < UINT_MAX) { + slave->link_failure_count++; + } + printk(KERN_INFO + "%s: interface %s is now down.\n", + master->name, + slave->dev->name); + + write_lock(&bond->ptrlock); + if (slave == bond->current_slave) { + change_active_interface(bond); + } + write_unlock(&bond->ptrlock); + } + } + + /* note: if switch is in round-robin mode, all links + * must tx arp to ensure all links rx an arp - otherwise + * links may oscillate or not come up at all; if switch is + * in something like xor mode, there is nothing we can + * do - all replies will be rx'ed on same link causing slaves + * to be unstable during low/no traffic periods + */ + if (IS_UP(slave->dev)) { + arp_send_all(slave); + } + } + + read_unlock(&bond->lock); + rtnl_exunlock(); + rtnl_shunlock(); + + /* re-arm the timer */ + mod_timer(&bond->arp_timer, next_timer); +} + +/* + * When using arp monitoring in active-backup mode, this function is + * called to determine if any backup slaves have went down or a new + * current slave needs to be found. + * The backup slaves never generate traffic, they are considered up by merely + * receiving traffic. If the current slave goes down, each backup slave will + * be given the opportunity to tx/rx an arp before being taken down - this + * prevents all slaves from being taken down due to the current slave not + * sending any traffic for the backups to receive. The arps are not necessarily + * necessary, any tx and rx traffic will keep the current slave up. While any + * rx traffic will keep the backup slaves up, the current slave is responsible + * for generating traffic to keep them up regardless of any other traffic they + * may have received. + * see loadbalance_arp_monitor for arp monitoring in load balancing mode + */ +static void activebackup_arp_monitor(struct net_device *master) +{ + bonding_t *bond; + slave_t *slave; + int the_delta_in_ticks = arp_interval * HZ / 1000; + int next_timer = jiffies + (arp_interval * HZ / 1000); + + bond = (struct bonding *) master->priv; + if (master->priv == NULL) { + mod_timer(&bond->arp_timer, next_timer); + return; + } + + if (!IS_UP(master)) { + mod_timer(&bond->arp_timer, next_timer); + return; + } + + read_lock(&bond->lock); + + /* determine if any slave has come up or any backup slave has + * gone down + * TODO: what about up/down delay in arp mode? it wasn't here before + * so it can wait + */ + slave = (slave_t *)bond; + while ((slave = slave->prev) != (slave_t *)bond) { + + if (slave->link != BOND_LINK_UP) { + if ((jiffies - slave->dev->last_rx) <= + the_delta_in_ticks) { + + slave->link = BOND_LINK_UP; + write_lock(&bond->ptrlock); + if ((bond->current_slave == NULL) && + ((jiffies - slave->dev->trans_start) <= + the_delta_in_ticks)) { + bond_assign_current_slave(bond, slave); + bond_set_slave_active_flags(slave); + bond_mc_update(bond, slave, NULL); + bond->current_arp_slave = NULL; + } else if (bond->current_slave != slave) { + /* this slave has just come up but we + * already have a current slave; this + * can also happen if bond_enslave adds + * a new slave that is up while we are + * searching for a new slave + */ + bond_set_slave_inactive_flags(slave); + bond->current_arp_slave = NULL; + } + + if (slave == bond->current_slave) { + printk(KERN_INFO + "%s: %s is up and now the " + "active interface\n", + master->name, + slave->dev->name); + } else { + printk(KERN_INFO + "%s: backup interface %s is " + "now up\n", + master->name, + slave->dev->name); + } + + write_unlock(&bond->ptrlock); + } + } else { + read_lock(&bond->ptrlock); + if ((slave != bond->current_slave) && + (bond->current_arp_slave == NULL) && + (((jiffies - slave->dev->last_rx) >= + 3*the_delta_in_ticks) && (my_ip != 0))) { + /* a backup slave has gone down; three times + * the delta allows the current slave to be + * taken out before the backup slave. + * note: a non-null current_arp_slave indicates + * the current_slave went down and we are + * searching for a new one; under this + * condition we only take the current_slave + * down - this gives each slave a chance to + * tx/rx traffic before being taken out + */ + read_unlock(&bond->ptrlock); + slave->link = BOND_LINK_DOWN; + if (slave->link_failure_count < UINT_MAX) { + slave->link_failure_count++; + } + bond_set_slave_inactive_flags(slave); + printk(KERN_INFO + "%s: backup interface %s is now down\n", + master->name, + slave->dev->name); + } else { + read_unlock(&bond->ptrlock); + } + } + } + + read_lock(&bond->ptrlock); + slave = bond->current_slave; + read_unlock(&bond->ptrlock); + + if (slave != NULL) { + + /* if we have sent traffic in the past 2*arp_intervals but + * haven't xmit and rx traffic in that time interval, select + * a different slave. slave->jiffies is only updated when + * a slave first becomes the current_slave - not necessarily + * after every arp; this ensures the slave has a full 2*delta + * before being taken out. if a primary is being used, check + * if it is up and needs to take over as the current_slave + */ + if ((((jiffies - slave->dev->trans_start) >= + (2*the_delta_in_ticks)) || + (((jiffies - slave->dev->last_rx) >= + (2*the_delta_in_ticks)) && (my_ip != 0))) && + ((jiffies - slave->jiffies) >= 2*the_delta_in_ticks)) { + + slave->link = BOND_LINK_DOWN; + if (slave->link_failure_count < UINT_MAX) { + slave->link_failure_count++; + } + printk(KERN_INFO "%s: link status down for " + "active interface %s, disabling it", + master->name, + slave->dev->name); + write_lock(&bond->ptrlock); + slave = change_active_interface(bond); + write_unlock(&bond->ptrlock); + bond->current_arp_slave = slave; + if (slave != NULL) { + slave->jiffies = jiffies; + } + + } else if ((bond->primary_slave != NULL) && + (bond->primary_slave != slave) && + (bond->primary_slave->link == BOND_LINK_UP)) { + /* at this point, slave is the current_slave */ + printk(KERN_INFO + "%s: changing from interface %s to primary " + "interface %s\n", + master->name, + slave->dev->name, + bond->primary_slave->dev->name); + + /* primary is up so switch to it */ + bond_set_slave_inactive_flags(slave); + bond_mc_update(bond, bond->primary_slave, slave); + write_lock(&bond->ptrlock); + bond_assign_current_slave(bond, bond->primary_slave); + write_unlock(&bond->ptrlock); + slave = bond->primary_slave; + bond_set_slave_active_flags(slave); + slave->jiffies = jiffies; + } else { + bond->current_arp_slave = NULL; + } + + /* the current slave must tx an arp to ensure backup slaves + * rx traffic + */ + if ((slave != NULL) && (my_ip != 0)) { + arp_send_all(slave); + } + } + + /* if we don't have a current_slave, search for the next available + * backup slave from the current_arp_slave and make it the candidate + * for becoming the current_slave + */ + if (slave == NULL) { + + if ((bond->current_arp_slave == NULL) || + (bond->current_arp_slave == (slave_t *)bond)) { + bond->current_arp_slave = bond->prev; + } + + if (bond->current_arp_slave != (slave_t *)bond) { + bond_set_slave_inactive_flags(bond->current_arp_slave); + slave = bond->current_arp_slave->next; + + /* search for next candidate */ + do { + if (IS_UP(slave->dev)) { + slave->link = BOND_LINK_BACK; + bond_set_slave_active_flags(slave); + arp_send_all(slave); + slave->jiffies = jiffies; + bond->current_arp_slave = slave; + break; + } + + /* if the link state is up at this point, we + * mark it down - this can happen if we have + * simultaneous link failures and + * change_active_interface doesn't make this + * one the current slave so it is still marked + * up when it is actually down + */ + if (slave->link == BOND_LINK_UP) { + slave->link = BOND_LINK_DOWN; + if (slave->link_failure_count < + UINT_MAX) { + slave->link_failure_count++; + } + + bond_set_slave_inactive_flags(slave); + printk(KERN_INFO + "%s: backup interface " + "%s is now down.\n", + master->name, + slave->dev->name); + } + } while ((slave = slave->next) != + bond->current_arp_slave->next); + } + } + + read_unlock(&bond->lock); + mod_timer(&bond->arp_timer, next_timer); +} + +static int bond_sethwaddr(struct net_device *master, struct net_device *slave) +{ +#ifdef BONDING_DEBUG + printk(KERN_CRIT "bond_sethwaddr: master=%x\n", (unsigned int)master); + printk(KERN_CRIT "bond_sethwaddr: slave=%x\n", (unsigned int)slave); + printk(KERN_CRIT "bond_sethwaddr: slave->addr_len=%d\n", slave->addr_len); +#endif + memcpy(master->dev_addr, slave->dev_addr, slave->addr_len); + return 0; +} + +static int bond_info_query(struct net_device *master, struct ifbond *info) +{ + bonding_t *bond = (struct bonding *) master->priv; + slave_t *slave; + + info->bond_mode = bond_mode; + info->num_slaves = 0; + info->miimon = miimon; + + read_lock_bh(&bond->lock); + for (slave = bond->prev; slave != (slave_t *)bond; slave = slave->prev) { + info->num_slaves++; + } + read_unlock_bh(&bond->lock); + + return 0; +} + +static int bond_slave_info_query(struct net_device *master, + struct ifslave *info) +{ + bonding_t *bond = (struct bonding *) master->priv; + slave_t *slave; + int cur_ndx = 0; + + if (info->slave_id < 0) { + return -ENODEV; + } + + read_lock_bh(&bond->lock); + for (slave = bond->prev; + slave != (slave_t *)bond && cur_ndx < info->slave_id; + slave = slave->prev) { + cur_ndx++; + } + read_unlock_bh(&bond->lock); + + if (slave != (slave_t *)bond) { + strcpy(info->slave_name, slave->dev->name); + info->link = slave->link; + info->state = slave->state; + info->link_failure_count = slave->link_failure_count; + } else { + return -ENODEV; + } + + return 0; +} + +static int bond_ethtool_ioctl(struct net_device *master_dev, struct ifreq *ifr) +{ + void *addr = ifr->ifr_data; + uint32_t cmd; + + if (get_user(cmd, (uint32_t *) addr)) + return -EFAULT; + + switch (cmd) { + + case ETHTOOL_GDRVINFO: + { + struct ethtool_drvinfo info; + char *endptr; + + if (copy_from_user(&info, addr, sizeof(info))) + return -EFAULT; + + if (strcmp(info.driver, "ifenslave") == 0) { + int new_abi_ver; + + new_abi_ver = simple_strtoul(info.fw_version, + &endptr, 0); + if (*endptr) { + printk(KERN_ERR + "bonding: Error: got invalid ABI" + " version from application\n"); + + return -EINVAL; + } + + if (orig_app_abi_ver == -1) { + orig_app_abi_ver = new_abi_ver; + } + + app_abi_ver = new_abi_ver; + } + + strncpy(info.driver, DRV_NAME, 32); + strncpy(info.version, DRV_VERSION, 32); + snprintf(info.fw_version, 32, "%d", BOND_ABI_VERSION); + + if (copy_to_user(addr, &info, sizeof(info))) + return -EFAULT; + + return 0; + } + break; + default: + return -EOPNOTSUPP; + } +} + +static int bond_ioctl(struct net_device *master_dev, struct ifreq *ifr, int cmd) +{ + struct net_device *slave_dev = NULL; + struct ifbond *u_binfo = NULL, k_binfo; + struct ifslave *u_sinfo = NULL, k_sinfo; + struct mii_ioctl_data *mii = NULL; + int prev_abi_ver = orig_app_abi_ver; + int ret = 0; + +#ifdef BONDING_DEBUG + printk(KERN_INFO "bond_ioctl: master=%s, cmd=%d\n", + master_dev->name, cmd); +#endif + + switch (cmd) { + case SIOCETHTOOL: + return bond_ethtool_ioctl(master_dev, ifr); + + case SIOCGMIIPHY: + mii = (struct mii_ioctl_data *)&ifr->ifr_data; + if (mii == NULL) { + return -EINVAL; + } + mii->phy_id = 0; + /* Fall Through */ + case SIOCGMIIREG: + /* + * We do this again just in case we were called by SIOCGMIIREG + * instead of SIOCGMIIPHY. + */ + mii = (struct mii_ioctl_data *)&ifr->ifr_data; + if (mii == NULL) { + return -EINVAL; + } + if (mii->reg_num == 1) { + mii->val_out = bond_check_mii_link( + (struct bonding *)master_dev->priv); + } + return 0; + case BOND_INFO_QUERY_OLD: + case SIOCBONDINFOQUERY: + u_binfo = (struct ifbond *)ifr->ifr_data; + if (copy_from_user(&k_binfo, u_binfo, sizeof(ifbond))) { + return -EFAULT; + } + ret = bond_info_query(master_dev, &k_binfo); + if (ret == 0) { + if (copy_to_user(u_binfo, &k_binfo, sizeof(ifbond))) { + return -EFAULT; + } + } + return ret; + case BOND_SLAVE_INFO_QUERY_OLD: + case SIOCBONDSLAVEINFOQUERY: + u_sinfo = (struct ifslave *)ifr->ifr_data; + if (copy_from_user(&k_sinfo, u_sinfo, sizeof(ifslave))) { + return -EFAULT; + } + ret = bond_slave_info_query(master_dev, &k_sinfo); + if (ret == 0) { + if (copy_to_user(u_sinfo, &k_sinfo, sizeof(ifslave))) { + return -EFAULT; + } + } + return ret; + } + + if (!capable(CAP_NET_ADMIN)) { + return -EPERM; + } + + if (orig_app_abi_ver == -1) { + /* no orig_app_abi_ver was provided yet, so we'll use the + * current one from now on, even if it's 0 + */ + orig_app_abi_ver = app_abi_ver; + + } else if (orig_app_abi_ver != app_abi_ver) { + printk(KERN_ERR + "bonding: Error: already using ifenslave ABI " + "version %d; to upgrade ifenslave to version %d, " + "you must first reload bonding.\n", + orig_app_abi_ver, app_abi_ver); + return -EINVAL; + } + + slave_dev = dev_get_by_name(ifr->ifr_slave); + +#ifdef BONDING_DEBUG + printk(KERN_INFO "slave_dev=%x: \n", (unsigned int)slave_dev); + printk(KERN_INFO "slave_dev->name=%s: \n", slave_dev->name); +#endif + + if (slave_dev == NULL) { + ret = -ENODEV; + } else { + switch (cmd) { + case BOND_ENSLAVE_OLD: + case SIOCBONDENSLAVE: + ret = bond_enslave(master_dev, slave_dev); + break; + case BOND_RELEASE_OLD: + case SIOCBONDRELEASE: + ret = bond_release(master_dev, slave_dev); + break; + case BOND_SETHWADDR_OLD: + case SIOCBONDSETHWADDR: + ret = bond_sethwaddr(master_dev, slave_dev); + break; + case BOND_CHANGE_ACTIVE_OLD: + case SIOCBONDCHANGEACTIVE: + if ((bond_mode == BOND_MODE_ACTIVEBACKUP) || + (bond_mode == BOND_MODE_TLB) || + (bond_mode == BOND_MODE_ALB)) { + ret = bond_change_active(master_dev, slave_dev); + } + else { + ret = -EINVAL; + } + break; + default: + ret = -EOPNOTSUPP; + } + dev_put(slave_dev); + } + + if (ret < 0) { + /* The ioctl failed, so there's no point in changing the + * orig_app_abi_ver. We'll restore it's value just in case + * we've changed it earlier in this function. + */ + orig_app_abi_ver = prev_abi_ver; + } + + return ret; +} + +#ifdef CONFIG_NET_FASTROUTE +static int bond_accept_fastpath(struct net_device *dev, struct dst_entry *dst) +{ + return -1; +} +#endif + +/* + * in broadcast mode, we send everything to all usable interfaces. + */ +static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *dev) +{ + slave_t *slave, *start_at; + struct bonding *bond = (struct bonding *) dev->priv; + struct net_device *device_we_should_send_to = 0; + + if (!IS_UP(dev)) { /* bond down */ + dev_kfree_skb(skb); + return 0; + } + + read_lock(&bond->lock); + + read_lock(&bond->ptrlock); + slave = start_at = bond->current_slave; + read_unlock(&bond->ptrlock); + + if (slave == NULL) { /* we're at the root, get the first slave */ + /* no suitable interface, frame not sent */ + read_unlock(&bond->lock); + dev_kfree_skb(skb); + return 0; + } + + do { + if (IS_UP(slave->dev) + && (slave->link == BOND_LINK_UP) + && (slave->state == BOND_STATE_ACTIVE)) { + if (device_we_should_send_to) { + struct sk_buff *skb2; + if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL) { + printk(KERN_ERR "bond_xmit_broadcast: skb_clone() failed\n"); + continue; + } + + skb2->dev = device_we_should_send_to; + skb2->priority = 1; + dev_queue_xmit(skb2); + } + device_we_should_send_to = slave->dev; + } + } while ((slave = slave->next) != start_at); + + if (device_we_should_send_to) { + skb->dev = device_we_should_send_to; + skb->priority = 1; + dev_queue_xmit(skb); + } else + dev_kfree_skb(skb); + + /* frame sent to all suitable interfaces */ + read_unlock(&bond->lock); + return 0; +} + +static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *dev) +{ + slave_t *slave, *start_at; + struct bonding *bond = (struct bonding *) dev->priv; + + if (!IS_UP(dev)) { /* bond down */ + dev_kfree_skb(skb); + return 0; + } + + read_lock(&bond->lock); + + read_lock(&bond->ptrlock); + slave = start_at = bond->current_slave; + read_unlock(&bond->ptrlock); + + if (slave == NULL) { /* we're at the root, get the first slave */ + /* no suitable interface, frame not sent */ + dev_kfree_skb(skb); + read_unlock(&bond->lock); + return 0; + } + + do { + if (IS_UP(slave->dev) + && (slave->link == BOND_LINK_UP) + && (slave->state == BOND_STATE_ACTIVE)) { + + skb->dev = slave->dev; + skb->priority = 1; + dev_queue_xmit(skb); + + write_lock(&bond->ptrlock); + bond_assign_current_slave(bond, slave->next); + write_unlock(&bond->ptrlock); + + read_unlock(&bond->lock); + return 0; + } + } while ((slave = slave->next) != start_at); + + /* no suitable interface, frame not sent */ + dev_kfree_skb(skb); + read_unlock(&bond->lock); + return 0; +} + +/* + * in XOR mode, we determine the output device by performing xor on + * the source and destination hw adresses. If this device is not + * enabled, find the next slave following this xor slave. + */ +static int bond_xmit_xor(struct sk_buff *skb, struct net_device *dev) +{ + slave_t *slave, *start_at; + struct bonding *bond = (struct bonding *) dev->priv; + struct ethhdr *data = (struct ethhdr *)skb->data; + int slave_no; + + if (!IS_UP(dev)) { /* bond down */ + dev_kfree_skb(skb); + return 0; + } + + read_lock(&bond->lock); + slave = bond->prev; + + /* we're at the root, get the first slave */ + if (bond->slave_cnt == 0) { + /* no suitable interface, frame not sent */ + dev_kfree_skb(skb); + read_unlock(&bond->lock); + return 0; + } + + slave_no = (data->h_dest[5]^slave->dev->dev_addr[5]) % bond->slave_cnt; + + while ( (slave_no > 0) && (slave != (slave_t *)bond) ) { + slave = slave->prev; + slave_no--; + } + start_at = slave; + + do { + if (IS_UP(slave->dev) + && (slave->link == BOND_LINK_UP) + && (slave->state == BOND_STATE_ACTIVE)) { + + skb->dev = slave->dev; + skb->priority = 1; + dev_queue_xmit(skb); + + read_unlock(&bond->lock); + return 0; + } + } while ((slave = slave->next) != start_at); + + /* no suitable interface, frame not sent */ + dev_kfree_skb(skb); + read_unlock(&bond->lock); + return 0; +} + +/* + * in active-backup mode, we know that bond->current_slave is always valid if + * the bond has a usable interface. + */ +static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *dev) +{ + struct bonding *bond = (struct bonding *) dev->priv; + int ret; + + if (!IS_UP(dev)) { /* bond down */ + dev_kfree_skb(skb); + return 0; + } + + /* if we are sending arp packets, try to at least + identify our own ip address */ + if ( (arp_interval > 0) && (my_ip == 0) && + (skb->protocol == __constant_htons(ETH_P_ARP) ) ) { + char *the_ip = (((char *)skb->data)) + + sizeof(struct ethhdr) + + sizeof(struct arphdr) + + ETH_ALEN; + memcpy(&my_ip, the_ip, 4); + } + + /* if we are sending arp packets and don't know + * the target hw address, save it so we don't need + * to use a broadcast address. + * don't do this if in active backup mode because the slaves must + * receive packets to stay up, and the only ones they receive are + * broadcasts. + */ + if ( (bond_mode != BOND_MODE_ACTIVEBACKUP) && + (arp_ip_count == 1) && + (arp_interval > 0) && (arp_target_hw_addr == NULL) && + (skb->protocol == __constant_htons(ETH_P_IP) ) ) { + struct ethhdr *eth_hdr = + (struct ethhdr *) (((char *)skb->data)); + struct iphdr *ip_hdr = (struct iphdr *)(eth_hdr + 1); + + if (arp_target[0] == ip_hdr->daddr) { + arp_target_hw_addr = kmalloc(ETH_ALEN, GFP_KERNEL); + if (arp_target_hw_addr != NULL) + memcpy(arp_target_hw_addr, eth_hdr->h_dest, ETH_ALEN); + } + } + + read_lock(&bond->lock); + + read_lock(&bond->ptrlock); + if (bond->current_slave != NULL) { /* one usable interface */ + skb->dev = bond->current_slave->dev; + read_unlock(&bond->ptrlock); + skb->priority = 1; + ret = dev_queue_xmit(skb); + read_unlock(&bond->lock); + return 0; + } + else { + read_unlock(&bond->ptrlock); + } + + /* no suitable interface, frame not sent */ +#ifdef BONDING_DEBUG + printk(KERN_INFO "There was no suitable interface, so we don't transmit\n"); +#endif + dev_kfree_skb(skb); + read_unlock(&bond->lock); + return 0; +} + +static struct net_device_stats *bond_get_stats(struct net_device *dev) +{ + bonding_t *bond = dev->priv; + struct net_device_stats *stats = bond->stats, *sstats; + slave_t *slave; + + memset(bond->stats, 0, sizeof(struct net_device_stats)); + + read_lock_bh(&bond->lock); + + for (slave = bond->prev; slave != (slave_t *)bond; slave = slave->prev) { + sstats = slave->dev->get_stats(slave->dev); + + stats->rx_packets += sstats->rx_packets; + stats->rx_bytes += sstats->rx_bytes; + stats->rx_errors += sstats->rx_errors; + stats->rx_dropped += sstats->rx_dropped; + + stats->tx_packets += sstats->tx_packets; + stats->tx_bytes += sstats->tx_bytes; + stats->tx_errors += sstats->tx_errors; + stats->tx_dropped += sstats->tx_dropped; + + stats->multicast += sstats->multicast; + stats->collisions += sstats->collisions; + + stats->rx_length_errors += sstats->rx_length_errors; + stats->rx_over_errors += sstats->rx_over_errors; + stats->rx_crc_errors += sstats->rx_crc_errors; + stats->rx_frame_errors += sstats->rx_frame_errors; + stats->rx_fifo_errors += sstats->rx_fifo_errors; + stats->rx_missed_errors += sstats->rx_missed_errors; + + stats->tx_aborted_errors += sstats->tx_aborted_errors; + stats->tx_carrier_errors += sstats->tx_carrier_errors; + stats->tx_fifo_errors += sstats->tx_fifo_errors; + stats->tx_heartbeat_errors += sstats->tx_heartbeat_errors; + stats->tx_window_errors += sstats->tx_window_errors; + + } + + read_unlock_bh(&bond->lock); + return stats; +} + +static int bond_get_info(char *buf, char **start, off_t offset, int length) +{ + bonding_t *bond = these_bonds; + int len = 0; + off_t begin = 0; + u16 link; + slave_t *slave = NULL; + + len += sprintf(buf + len, "%s\n", version); + + while (bond != NULL) { + /* + * This function locks the mutex, so we can't lock it until + * afterwards + */ + link = bond_check_mii_link(bond); + + len += sprintf(buf + len, "Bonding Mode: %s\n", + bond_mode_name()); + + if ((bond_mode == BOND_MODE_ACTIVEBACKUP) || + (bond_mode == BOND_MODE_TLB) || + (bond_mode == BOND_MODE_ALB)) { + read_lock_bh(&bond->lock); + read_lock(&bond->ptrlock); + if (bond->current_slave != NULL) { + len += sprintf(buf + len, + "Currently Active Slave: %s\n", + bond->current_slave->dev->name); + } + read_unlock(&bond->ptrlock); + read_unlock_bh(&bond->lock); + } + + len += sprintf(buf + len, "MII Status: "); + len += sprintf(buf + len, + link == BMSR_LSTATUS ? "up\n" : "down\n"); + len += sprintf(buf + len, "MII Polling Interval (ms): %d\n", + miimon); + len += sprintf(buf + len, "Up Delay (ms): %d\n", + updelay * miimon); + len += sprintf(buf + len, "Down Delay (ms): %d\n", + downdelay * miimon); + len += sprintf(buf + len, "Multicast Mode: %s\n", + multicast_mode_name()); + + read_lock_bh(&bond->lock); + + if (bond_mode == BOND_MODE_8023AD) { + struct ad_info ad_info; + + len += sprintf(buf + len, "\n802.3ad info\n"); + + if (bond_3ad_get_active_agg_info(bond, &ad_info)) { + len += sprintf(buf + len, "bond %s has no active aggregator\n", bond->device->name); + } else { + len += sprintf(buf + len, "Active Aggregator Info:\n"); + + len += sprintf(buf + len, "\tAggregator ID: %d\n", ad_info.aggregator_id); + len += sprintf(buf + len, "\tNumber of ports: %d\n", ad_info.ports); + len += sprintf(buf + len, "\tActor Key: %d\n", ad_info.actor_key); + len += sprintf(buf + len, "\tPartner Key: %d\n", ad_info.partner_key); + len += sprintf(buf + len, "\tPartner Mac Address: %02x:%02x:%02x:%02x:%02x:%02x\n", + ad_info.partner_system[0], + ad_info.partner_system[1], + ad_info.partner_system[2], + ad_info.partner_system[3], + ad_info.partner_system[4], + ad_info.partner_system[5]); + } + } + + for (slave = bond->prev; slave != (slave_t *)bond; + slave = slave->prev) { + len += sprintf(buf + len, "\nSlave Interface: %s\n", slave->dev->name); + + len += sprintf(buf + len, "MII Status: "); + + len += sprintf(buf + len, + slave->link == BOND_LINK_UP ? + "up\n" : "down\n"); + len += sprintf(buf + len, "Link Failure Count: %d\n", + slave->link_failure_count); + + if (app_abi_ver >= 1) { + len += sprintf(buf + len, + "Permanent HW addr: %02x:%02x:%02x:%02x:%02x:%02x\n", + slave->perm_hwaddr[0], + slave->perm_hwaddr[1], + slave->perm_hwaddr[2], + slave->perm_hwaddr[3], + slave->perm_hwaddr[4], + slave->perm_hwaddr[5]); + } + + if (bond_mode == BOND_MODE_8023AD) { + struct aggregator *agg = SLAVE_AD_INFO(slave).port.aggregator; + + if (agg) { + len += sprintf(buf + len, "Aggregator ID: %d\n", + agg->aggregator_identifier); + } else { + len += sprintf(buf + len, "Aggregator ID: N/A\n"); + } + } + } + read_unlock_bh(&bond->lock); + + /* + * Figure out the calcs for the /proc/net interface + */ + *start = buf + (offset - begin); + len -= (offset - begin); + if (len > length) { + len = length; + } + if (len < 0) { + len = 0; + } + + + bond = bond->next_bond; + } + return len; +} + +static int bond_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct bonding *this_bond = (struct bonding *)these_bonds; + struct bonding *last_bond; + struct net_device *event_dev = (struct net_device *)ptr; + + /* while there are bonds configured */ + while (this_bond != NULL) { + if (this_bond == event_dev->priv ) { + switch (event) { + case NETDEV_UNREGISTER: + /* + * remove this bond from a linked list of + * bonds + */ + if (this_bond == these_bonds) { + these_bonds = this_bond->next_bond; + } else { + for (last_bond = these_bonds; + last_bond != NULL; + last_bond = last_bond->next_bond) { + if (last_bond->next_bond == + this_bond) { + last_bond->next_bond = + this_bond->next_bond; + } + } + } + return NOTIFY_DONE; + + default: + return NOTIFY_DONE; + } + } else if (this_bond->device == event_dev->master) { + switch (event) { + case NETDEV_UNREGISTER: + bond_release(this_bond->device, event_dev); + break; + } + return NOTIFY_DONE; + } + this_bond = this_bond->next_bond; + } + return NOTIFY_DONE; +} + +static struct notifier_block bond_netdev_notifier = { + notifier_call: bond_event, +}; + +static int __init bond_init(struct net_device *dev) +{ + bonding_t *bond, *this_bond, *last_bond; + int count; + +#ifdef BONDING_DEBUG + printk (KERN_INFO "Begin bond_init for %s\n", dev->name); +#endif + bond = kmalloc(sizeof(struct bonding), GFP_KERNEL); + if (bond == NULL) { + return -ENOMEM; + } + memset(bond, 0, sizeof(struct bonding)); + + /* initialize rwlocks */ + rwlock_init(&bond->lock); + rwlock_init(&bond->ptrlock); + + bond->stats = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL); + if (bond->stats == NULL) { + kfree(bond); + return -ENOMEM; + } + memset(bond->stats, 0, sizeof(struct net_device_stats)); + + bond->next = bond->prev = (slave_t *)bond; + bond->current_slave = NULL; + bond->current_arp_slave = NULL; + bond->device = dev; + dev->priv = bond; + + /* Initialize the device structure. */ + switch (bond_mode) { + case BOND_MODE_ACTIVEBACKUP: + dev->hard_start_xmit = bond_xmit_activebackup; + break; + case BOND_MODE_ROUNDROBIN: + dev->hard_start_xmit = bond_xmit_roundrobin; + break; + case BOND_MODE_XOR: + dev->hard_start_xmit = bond_xmit_xor; + break; + case BOND_MODE_BROADCAST: + dev->hard_start_xmit = bond_xmit_broadcast; + break; + case BOND_MODE_8023AD: + dev->hard_start_xmit = bond_3ad_xmit_xor; + break; + case BOND_MODE_TLB: + case BOND_MODE_ALB: + dev->hard_start_xmit = bond_alb_xmit; + break; + default: + printk(KERN_ERR "Unknown bonding mode %d\n", bond_mode); + kfree(bond->stats); + kfree(bond); + return -EINVAL; + } + + dev->get_stats = bond_get_stats; + dev->open = bond_open; + dev->stop = bond_close; + dev->set_multicast_list = set_multicast_list; + dev->do_ioctl = bond_ioctl; + + /* + * Fill in the fields of the device structure with ethernet-generic + * values. + */ + + ether_setup(dev); + + dev->set_mac_address = bond_set_mac_address; + dev->tx_queue_len = 0; + dev->flags |= IFF_MASTER|IFF_MULTICAST; +#ifdef CONFIG_NET_FASTROUTE + dev->accept_fastpath = bond_accept_fastpath; +#endif + + printk(KERN_INFO "%s registered with", dev->name); + if (miimon > 0) { + printk(" MII link monitoring set to %d ms", miimon); + updelay /= miimon; + downdelay /= miimon; + } else { + printk("out MII link monitoring"); + } + printk(", in %s mode.\n", bond_mode_name()); + + printk(KERN_INFO "%s registered with", dev->name); + if (arp_interval > 0) { + printk(" ARP monitoring set to %d ms with %d target(s):", + arp_interval, arp_ip_count); + for (count=0 ; countbond_proc_dir = proc_mkdir(dev->name, proc_net); + if (bond->bond_proc_dir == NULL) { + printk(KERN_ERR "%s: Cannot init /proc/net/%s/\n", + dev->name, dev->name); + kfree(bond->stats); + kfree(bond); + return -ENOMEM; + } + bond->bond_proc_info_file = + create_proc_info_entry("info", 0, bond->bond_proc_dir, + bond_get_info); + if (bond->bond_proc_info_file == NULL) { + printk(KERN_ERR "%s: Cannot init /proc/net/%s/info\n", + dev->name, dev->name); + remove_proc_entry(dev->name, proc_net); + kfree(bond->stats); + kfree(bond); + return -ENOMEM; + } +#endif /* CONFIG_PROC_FS */ + + if (first_pass == 1) { + these_bonds = bond; + register_netdevice_notifier(&bond_netdev_notifier); + first_pass = 0; + } else { + last_bond = these_bonds; + this_bond = these_bonds->next_bond; + while (this_bond != NULL) { + last_bond = this_bond; + this_bond = this_bond->next_bond; + } + last_bond->next_bond = bond; + } + + return 0; +} + +/* +static int __init bond_probe(struct net_device *dev) +{ + bond_init(dev); + return 0; +} + */ + +/* + * Convert string input module parms. Accept either the + * number of the mode or its string name. + */ +static inline int +bond_parse_parm(char *mode_arg, struct bond_parm_tbl *tbl) +{ + int i; + + for (i = 0; tbl[i].modename != NULL; i++) { + if ((isdigit(*mode_arg) && + tbl[i].mode == simple_strtol(mode_arg, NULL, 0)) || + (0 == strncmp(mode_arg, tbl[i].modename, + strlen(tbl[i].modename)))) { + return tbl[i].mode; + } + } + + return -1; +} + + +static int __init bonding_init(void) +{ + int no; + int err; + + /* Find a name for this unit */ + static struct net_device *dev_bond = NULL; + + printk(KERN_INFO "%s", version); + + /* + * Convert string parameters. + */ + if (mode) { + bond_mode = bond_parse_parm(mode, bond_mode_tbl); + if (bond_mode == -1) { + printk(KERN_WARNING + "bonding_init(): Invalid bonding mode \"%s\"\n", + mode == NULL ? "NULL" : mode); + return -EINVAL; + } + } + + if (multicast) { + multicast_mode = bond_parse_parm(multicast, bond_mc_tbl); + if (multicast_mode == -1) { + printk(KERN_WARNING + "bonding_init(): Invalid multicast mode \"%s\"\n", + multicast == NULL ? "NULL" : multicast); + return -EINVAL; + } + } + + if (lacp_rate) { + if (bond_mode != BOND_MODE_8023AD) { + printk(KERN_WARNING + "lacp_rate param is irrelevant in mode %s\n", + bond_mode_name()); + } else { + lacp_fast = bond_parse_parm(lacp_rate, bond_lacp_tbl); + if (lacp_fast == -1) { + printk(KERN_WARNING + "bonding_init(): Invalid lacp rate " + "\"%s\"\n", + lacp_rate == NULL ? "NULL" : lacp_rate); + + return -EINVAL; + } + } + } + + if (max_bonds < 1 || max_bonds > INT_MAX) { + printk(KERN_WARNING + "bonding_init(): max_bonds (%d) not in range %d-%d, " + "so it was reset to BOND_DEFAULT_MAX_BONDS (%d)", + max_bonds, 1, INT_MAX, BOND_DEFAULT_MAX_BONDS); + max_bonds = BOND_DEFAULT_MAX_BONDS; + } + dev_bond = dev_bonds = kmalloc(max_bonds*sizeof(struct net_device), + GFP_KERNEL); + if (dev_bond == NULL) { + return -ENOMEM; + } + memset(dev_bonds, 0, max_bonds*sizeof(struct net_device)); + + if (miimon < 0) { + printk(KERN_WARNING + "bonding_init(): miimon module parameter (%d), " + "not in range 0-%d, so it was reset to %d\n", + miimon, INT_MAX, BOND_LINK_MON_INTERV); + miimon = BOND_LINK_MON_INTERV; + } + + if (updelay < 0) { + printk(KERN_WARNING + "bonding_init(): updelay module parameter (%d), " + "not in range 0-%d, so it was reset to 0\n", + updelay, INT_MAX); + updelay = 0; + } + + if (downdelay < 0) { + printk(KERN_WARNING + "bonding_init(): downdelay module parameter (%d), " + "not in range 0-%d, so it was reset to 0\n", + downdelay, INT_MAX); + downdelay = 0; + } + + /* reset values for 802.3ad */ + if (bond_mode == BOND_MODE_8023AD) { + if (arp_interval != 0) { + printk(KERN_WARNING "bonding_init(): ARP monitoring" + "can't be used simultaneously with 802.3ad, " + "disabling ARP monitoring\n"); + arp_interval = 0; + } + + if (miimon == 0) { + printk(KERN_ERR + "bonding_init(): miimon must be specified, " + "otherwise bonding will not detect link failure, " + "speed and duplex which are essential " + "for 802.3ad operation\n"); + printk(KERN_ERR "Forcing miimon to 100msec\n"); + miimon = 100; + } + + if (multicast_mode != BOND_MULTICAST_ALL) { + printk(KERN_ERR + "bonding_init(): Multicast mode must " + "be set to ALL for 802.3ad\n"); + printk(KERN_ERR "Forcing Multicast mode to ALL\n"); + multicast_mode = BOND_MULTICAST_ALL; + } + } + + /* reset values for TLB/ALB */ + if ((bond_mode == BOND_MODE_TLB) || + (bond_mode == BOND_MODE_ALB)) { + if (miimon == 0) { + printk(KERN_ERR + "bonding_init(): miimon must be specified, " + "otherwise bonding will not detect link failure " + "and link speed which are essential " + "for TLB/ALB load balancing\n"); + printk(KERN_ERR "Forcing miimon to 100msec\n"); + miimon = 100; + } + + if (multicast_mode != BOND_MULTICAST_ACTIVE) { + printk(KERN_ERR + "bonding_init(): Multicast mode must " + "be set to ACTIVE for TLB/ALB\n"); + printk(KERN_ERR "Forcing Multicast mode to ACTIVE\n"); + multicast_mode = BOND_MULTICAST_ACTIVE; + } + } + + if (bond_mode == BOND_MODE_ALB) { + printk(KERN_INFO + "In ALB mode you might experience client disconnections" + " upon reconnection of a link if the bonding module" + " updelay parameter (%d msec) is incompatible with the" + " forwarding delay time of the switch\n", updelay); + } + + if (miimon == 0) { + if ((updelay != 0) || (downdelay != 0)) { + /* just warn the user the up/down delay will have + * no effect since miimon is zero... + */ + printk(KERN_WARNING + "bonding_init(): miimon module parameter not " + "set and updelay (%d) or downdelay (%d) module " + "parameter is set; updelay and downdelay have " + "no effect unless miimon is set\n", + updelay, downdelay); + } + } else { + /* don't allow arp monitoring */ + if (arp_interval != 0) { + printk(KERN_WARNING + "bonding_init(): miimon (%d) and arp_interval " + "(%d) can't be used simultaneously, " + "disabling ARP monitoring\n", + miimon, arp_interval); + arp_interval = 0; + } + + if ((updelay % miimon) != 0) { + /* updelay will be rounded in bond_init() when it + * is divided by miimon, we just inform user here + */ + printk(KERN_WARNING + "bonding_init(): updelay (%d) is not a multiple " + "of miimon (%d), updelay rounded to %d ms\n", + updelay, miimon, (updelay / miimon) * miimon); + } + + if ((downdelay % miimon) != 0) { + /* downdelay will be rounded in bond_init() when it + * is divided by miimon, we just inform user here + */ + printk(KERN_WARNING + "bonding_init(): downdelay (%d) is not a " + "multiple of miimon (%d), downdelay rounded " + "to %d ms\n", + downdelay, miimon, + (downdelay / miimon) * miimon); + } + } + + if (arp_interval < 0) { + printk(KERN_WARNING + "bonding_init(): arp_interval module parameter (%d), " + "not in range 0-%d, so it was reset to %d\n", + arp_interval, INT_MAX, BOND_LINK_ARP_INTERV); + arp_interval = BOND_LINK_ARP_INTERV; + } + + for (arp_ip_count=0 ; + (arp_ip_count < MAX_ARP_IP_TARGETS) && arp_ip_target[arp_ip_count]; + arp_ip_count++ ) { + /* not complete check, but should be good enough to + catch mistakes */ + if (!isdigit(arp_ip_target[arp_ip_count][0])) { + printk(KERN_WARNING + "bonding_init(): bad arp_ip_target module " + "parameter (%s), ARP monitoring will not be " + "performed\n", + arp_ip_target[arp_ip_count]); + arp_interval = 0; + } else { + u32 ip = in_aton(arp_ip_target[arp_ip_count]); + *(u32 *)(arp_ip_target[arp_ip_count]) = ip; + } + } + + + if ( (arp_interval > 0) && (arp_ip_count==0)) { + /* don't allow arping if no arp_ip_target given... */ + printk(KERN_WARNING + "bonding_init(): arp_interval module parameter " + "(%d) specified without providing an arp_ip_target " + "parameter, arp_interval was reset to 0\n", + arp_interval); + arp_interval = 0; + } + + if ((miimon == 0) && (arp_interval == 0)) { + /* miimon and arp_interval not set, we need one so things + * work as expected, see bonding.txt for details + */ + printk(KERN_ERR + "bonding_init(): either miimon or " + "arp_interval and arp_ip_target module parameters " + "must be specified, otherwise bonding will not detect " + "link failures! see bonding.txt for details.\n"); + } + + if ((primary != NULL) && (bond_mode != BOND_MODE_ACTIVEBACKUP) && + (bond_mode != BOND_MODE_TLB) && + (bond_mode != BOND_MODE_ALB)){ + /* currently, using a primary only makes sense + * in active backup, TLB or ALB modes + */ + printk(KERN_WARNING + "bonding_init(): %s primary device specified but has " + "no effect in %s mode\n", + primary, bond_mode_name()); + primary = NULL; + } + + + for (no = 0; no < max_bonds; no++) { + dev_bond->init = bond_init; + + err = dev_alloc_name(dev_bond,"bond%d"); + if (err < 0) { + kfree(dev_bonds); + return err; + } + SET_MODULE_OWNER(dev_bond); + if (register_netdev(dev_bond) != 0) { + kfree(dev_bonds); + return -EIO; + } + dev_bond++; + } + return 0; +} + +static void __exit bonding_exit(void) +{ + struct net_device *dev_bond = dev_bonds; + struct bonding *bond; + int no; + + unregister_netdevice_notifier(&bond_netdev_notifier); + + for (no = 0; no < max_bonds; no++) { + +#ifdef CONFIG_PROC_FS + bond = (struct bonding *) dev_bond->priv; + remove_proc_entry("info", bond->bond_proc_dir); + remove_proc_entry(dev_bond->name, proc_net); +#endif + unregister_netdev(dev_bond); + kfree(bond->stats); + kfree(dev_bond->priv); + + dev_bond->priv = NULL; + dev_bond++; + } + kfree(dev_bonds); +} + +module_init(bonding_init); +module_exit(bonding_exit); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION(DRV_DESCRIPTION ", v" DRV_VERSION); + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ diff -Nru a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h --- /dev/null Wed Dec 31 16:00:00 1969 +++ b/drivers/net/bonding/bonding.h Thu Jun 19 23:46:53 2003 @@ -0,0 +1,181 @@ +/* + * Bond several ethernet interfaces into a Cisco, running 'Etherchannel'. + * + * Portions are (c) Copyright 1995 Simon "Guru Aleph-Null" Janes + * NCM: Network and Communications Management, Inc. + * + * BUT, I'm the one who modified it for ethernet, so: + * (c) Copyright 1999, Thomas Davis, tadavis@lbl.gov + * + * This software may be used and distributed according to the terms + * of the GNU Public License, incorporated herein by reference. + * + * + * 2003/03/18 - Amir Noam , + * Tsippy Mendelson and + * Shmulik Hen + * - Added support for IEEE 802.3ad Dynamic link aggregation mode. + * + * 2003/05/01 - Tsippy Mendelson and + * Amir Noam + * - Code beautification and style changes (mainly in comments). + * + * 2003/05/01 - Shmulik Hen + * - Added support for Transmit load balancing mode. + */ + +#ifndef _LINUX_BONDING_H +#define _LINUX_BONDING_H + +#include +#include +#include "bond_3ad.h" +#include "bond_alb.h" + +#ifdef BONDING_DEBUG + +// use this like so: BOND_PRINT_DBG(("foo = %d, bar = %d", foo, bar)); +#define BOND_PRINT_DBG(X) \ +do { \ + printk(KERN_DEBUG "%s (%d)", __FUNCTION__, __LINE__); \ + printk X; \ + printk("\n"); \ +} while(0) + +#else +#define BOND_PRINT_DBG(X) +#endif /* BONDING_DEBUG */ + +#define IS_UP(dev) ((((dev)->flags & (IFF_UP)) == (IFF_UP)) && \ + (netif_running(dev) && netif_carrier_ok(dev))) + +/* Checks whether the dev is ready for transmit. We do not check netif_running + * since a device can be stopped by the driver for short periods of time for + * maintainance. dev_queue_xmit() handles this by queing the packet until the + * the dev is running again. Keeping packets ordering requires sticking the + * same dev as much as possible + */ +#define SLAVE_IS_OK(slave) \ + ((((slave)->dev->flags & (IFF_UP)) == (IFF_UP)) && \ + netif_carrier_ok((slave)->dev) && \ + ((slave)->link == BOND_LINK_UP) && \ + ((slave)->state == BOND_STATE_ACTIVE)) + + +typedef struct slave { + struct slave *next; + struct slave *prev; + struct net_device *dev; + short delay; + unsigned long jiffies; + char link; /* one of BOND_LINK_XXXX */ + char state; /* one of BOND_STATE_XXXX */ + unsigned short original_flags; + u32 link_failure_count; + u16 speed; + u8 duplex; + u8 perm_hwaddr[ETH_ALEN]; + struct ad_slave_info ad_info; /* HUGE - better to dynamically alloc */ + struct tlb_slave_info tlb_info; +} slave_t; + +/* + * Here are the locking policies for the two bonding locks: + * + * 1) Get bond->lock when reading/writing slave list. + * 2) Get bond->ptrlock when reading/writing bond->current_slave. + * (It is unnecessary when the write-lock is put with bond->lock.) + * 3) When we lock with bond->ptrlock, we must lock with bond->lock + * beforehand. + */ +typedef struct bonding { + slave_t *next; + slave_t *prev; + slave_t *current_slave; + slave_t *primary_slave; + slave_t *current_arp_slave; + __s32 slave_cnt; + rwlock_t lock; + rwlock_t ptrlock; + struct timer_list mii_timer; + struct timer_list arp_timer; + struct net_device_stats *stats; +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *bond_proc_dir; + struct proc_dir_entry *bond_proc_info_file; +#endif /* CONFIG_PROC_FS */ + struct bonding *next_bond; + struct net_device *device; + struct dev_mc_list *mc_list; + unsigned short flags; + struct ad_bond_info ad_info; + struct alb_bond_info alb_info; +} bonding_t; + +/* Forward declarations */ +void bond_set_slave_active_flags(slave_t *slave); +void bond_set_slave_inactive_flags(slave_t *slave); + +/** + * These functions can be used for iterating the slave list + * (which is circular) + * Caller must hold bond lock for read + */ +extern inline struct slave* +bond_get_first_slave(struct bonding *bond) +{ + /* if there are no slaves return NULL */ + if (bond->next == (slave_t *)bond) { + return NULL; + } + return bond->next; +} + +/** + * Caller must hold bond lock for read + */ +extern inline struct slave* +bond_get_next_slave(struct bonding *bond, struct slave *slave) +{ + /* If we have reached the last slave return NULL */ + if (slave->next == bond->next) { + return NULL; + } + return slave->next; +} + +/** + * Returns NULL if the net_device does not belong to any of the bond's slaves + * + * Caller must hold bond lock for read + */ +extern inline struct slave* +bond_get_slave_by_dev(struct bonding *bond, struct net_device *slave_dev) +{ + struct slave *our_slave = bond->next; + + /* check if the list of slaves is empty */ + if (our_slave == (slave_t *)bond) { + return NULL; + } + + for (; our_slave; our_slave = bond_get_next_slave(bond, our_slave)) { + if (our_slave->dev == slave_dev) { + break; + } + } + return our_slave; +} + +extern inline struct bonding* +bond_get_bond_by_slave(struct slave *slave) +{ + if (!slave || !slave->dev->master) { + return NULL; + } + + return (struct bonding *)(slave->dev->master->priv); +} + +#endif /* _LINUX_BONDING_H */ + diff -Nru a/drivers/net/bonding.c b/drivers/net/bonding.c --- a/drivers/net/bonding.c Thu Jun 19 23:46:52 2003 +++ /dev/null Wed Dec 31 16:00:00 1969 @@ -1,3302 +0,0 @@ -/* - * originally based on the dummy device. - * - * Copyright 1999, Thomas Davis, tadavis@lbl.gov. - * Licensed under the GPL. Based on dummy.c, and eql.c devices. - * - * bonding.c: an Ethernet Bonding driver - * - * This is useful to talk to a Cisco EtherChannel compatible equipment: - * Cisco 5500 - * Sun Trunking (Solaris) - * Alteon AceDirector Trunks - * Linux Bonding - * and probably many L2 switches ... - * - * How it works: - * ifconfig bond0 ipaddress netmask up - * will setup a network device, with an ip address. No mac address - * will be assigned at this time. The hw mac address will come from - * the first slave bonded to the channel. All slaves will then use - * this hw mac address. - * - * ifconfig bond0 down - * will release all slaves, marking them as down. - * - * ifenslave bond0 eth0 - * will attach eth0 to bond0 as a slave. eth0 hw mac address will either - * a: be used as initial mac address - * b: if a hw mac address already is there, eth0's hw mac address - * will then be set from bond0. - * - * v0.1 - first working version. - * v0.2 - changed stats to be calculated by summing slaves stats. - * - * Changes: - * Arnaldo Carvalho de Melo - * - fix leaks on failure at bond_init - * - * 2000/09/30 - Willy Tarreau - * - added trivial code to release a slave device. - * - fixed security bug (CAP_NET_ADMIN not checked) - * - implemented MII link monitoring to disable dead links : - * All MII capable slaves are checked every milliseconds - * (100 ms seems good). This value can be changed by passing it to - * insmod. A value of zero disables the monitoring (default). - * - fixed an infinite loop in bond_xmit_roundrobin() when there's no - * good slave. - * - made the code hopefully SMP safe - * - * 2000/10/03 - Willy Tarreau - * - optimized slave lists based on relevant suggestions from Thomas Davis - * - implemented active-backup method to obtain HA with two switches: - * stay as long as possible on the same active interface, while we - * also monitor the backup one (MII link status) because we want to know - * if we are able to switch at any time. ( pass "mode=1" to insmod ) - * - lots of stress testings because we need it to be more robust than the - * wires ! :-> - * - * 2000/10/09 - Willy Tarreau - * - added up and down delays after link state change. - * - optimized the slaves chaining so that when we run forward, we never - * repass through the bond itself, but we can find it by searching - * backwards. Renders the deletion more difficult, but accelerates the - * scan. - * - smarter enslaving and releasing. - * - finer and more robust SMP locking - * - * 2000/10/17 - Willy Tarreau - * - fixed two potential SMP race conditions - * - * 2000/10/18 - Willy Tarreau - * - small fixes to the monitoring FSM in case of zero delays - * 2000/11/01 - Willy Tarreau - * - fixed first slave not automatically used in trunk mode. - * 2000/11/10 : spelling of "EtherChannel" corrected. - * 2000/11/13 : fixed a race condition in case of concurrent accesses to ioctl(). - * 2000/12/16 : fixed improper usage of rtnl_exlock_nowait(). - * - * 2001/1/3 - Chad N. Tindel - * - The bonding driver now simulates MII status monitoring, just like - * a normal network device. It will show that the link is down iff - * every slave in the bond shows that their links are down. If at least - * one slave is up, the bond's MII status will appear as up. - * - * 2001/2/7 - Chad N. Tindel - * - Applications can now query the bond from user space to get - * information which may be useful. They do this by calling - * the BOND_INFO_QUERY ioctl. Once the app knows how many slaves - * are in the bond, it can call the BOND_SLAVE_INFO_QUERY ioctl to - * get slave specific information (# link failures, etc). See - * for more details. The structs of interest - * are ifbond and ifslave. - * - * 2001/4/5 - Chad N. Tindel - * - Ported to 2.4 Kernel - * - * 2001/5/2 - Jeffrey E. Mast - * - When a device is detached from a bond, the slave device is no longer - * left thinking that is has a master. - * - * 2001/5/16 - Jeffrey E. Mast - * - memset did not appropriately initialized the bond rw_locks. Used - * rwlock_init to initialize to unlocked state to prevent deadlock when - * first attempting a lock - * - Called SET_MODULE_OWNER for bond device - * - * 2001/5/17 - Tim Anderson - * - 2 paths for releasing for slave release; 1 through ioctl - * and 2) through close. Both paths need to release the same way. - * - the free slave in bond release is changing slave status before - * the free. The netdev_set_master() is intended to change slave state - * so it should not be done as part of the release process. - * - Simple rule for slave state at release: only the active in A/B and - * only one in the trunked case. - * - * 2001/6/01 - Tim Anderson - * - Now call dev_close when releasing a slave so it doesn't screw up - * out routing table. - * - * 2001/6/01 - Chad N. Tindel - * - Added /proc support for getting bond and slave information. - * Information is in /proc/net//info. - * - Changed the locking when calling bond_close to prevent deadlock. - * - * 2001/8/05 - Janice Girouard - * - correct problem where refcnt of slave is not incremented in bond_ioctl - * so the system hangs when halting. - * - correct locking problem when unable to malloc in bond_enslave. - * - adding bond_xmit_xor logic. - * - adding multiple bond device support. - * - * 2001/8/13 - Erik Habbinga - * - correct locking problem with rtnl_exlock_nowait - * - * 2001/8/23 - Janice Girouard - * - bzero initial dev_bonds, to correct oops - * - convert SIOCDEVPRIVATE to new MII ioctl calls - * - * 2001/9/13 - Takao Indoh - * - Add the BOND_CHANGE_ACTIVE ioctl implementation - * - * 2001/9/14 - Mark Huth - * - Change MII_LINK_READY to not check for end of auto-negotiation, - * but only for an up link. - * - * 2001/9/20 - Chad N. Tindel - * - Add the device field to bonding_t. Previously the net_device - * corresponding to a bond wasn't available from the bonding_t - * structure. - * - * 2001/9/25 - Janice Girouard - * - add arp_monitor for active backup mode - * - * 2001/10/23 - Takao Indoh - * - Various memory leak fixes - * - * 2001/11/5 - Mark Huth - * - Don't take rtnl lock in bond_mii_monitor as it deadlocks under - * certain hotswap conditions. - * Note: this same change may be required in bond_arp_monitor ??? - * - Remove possibility of calling bond_sethwaddr with NULL slave_dev ptr - * - Handle hot swap ethernet interface deregistration events to remove - * kernel oops following hot swap of enslaved interface - * - * 2002/1/2 - Chad N. Tindel - * - Restore original slave flags at release time. - * - * 2002/02/18 - Erik Habbinga - * - bond_release(): calling kfree on our_slave after call to - * bond_restore_slave_flags, not before - * - bond_enslave(): saving slave flags into original_flags before - * call to netdev_set_master, so the IFF_SLAVE flag doesn't end - * up in original_flags - * - * 2002/04/05 - Mark Smith and - * Steve Mead - * - Port Gleb Natapov's multicast support patchs from 2.4.12 - * to 2.4.18 adding support for multicast. - * - * 2002/06/10 - Tony Cureington - * - corrected uninitialized pointer (ifr.ifr_data) in bond_check_dev_link; - * actually changed function to use MIIPHY, then MIIREG, and finally - * ETHTOOL to determine the link status - * - fixed bad ifr_data pointer assignments in bond_ioctl - * - corrected mode 1 being reported as active-backup in bond_get_info; - * also added text to distinguish type of load balancing (rr or xor) - * - change arp_ip_target module param from "1-12s" (array of 12 ptrs) - * to "s" (a single ptr) - * - * 2002/08/30 - Jay Vosburgh - * - Removed acquisition of xmit_lock in set_multicast_list; caused - * deadlock on SMP (lock is held by caller). - * - Revamped SIOCGMIIPHY, SIOCGMIIREG portion of bond_check_dev_link(). - * - * 2002/09/18 - Jay Vosburgh - * - Fixed up bond_check_dev_link() (and callers): removed some magic - * numbers, banished local MII_ defines, wrapped ioctl calls to - * prevent EFAULT errors - * - * 2002/9/30 - Jay Vosburgh - * - make sure the ip target matches the arp_target before saving the - * hw address. - * - * 2002/9/30 - Dan Eisner - * - make sure my_ip is set before taking down the link, since - * not all switches respond if the source ip is not set. - * - * 2002/10/8 - Janice Girouard - * - read in the local ip address when enslaving a device - * - add primary support - * - make sure 2*arp_interval has passed when a new device - * is brought on-line before taking it down. - * - * 2002/09/11 - Philippe De Muyter - * - Added bond_xmit_broadcast logic. - * - Added bond_mode() support function. - * - * 2002/10/26 - Laurent Deniel - * - allow to register multicast addresses only on active slave - * (useful in active-backup mode) - * - add multicast module parameter - * - fix deletion of multicast groups after unloading module - * - * 2002/11/06 - Kameshwara Rayaprolu - * - Changes to prevent panic from closing the device twice; if we close - * the device in bond_release, we must set the original_flags to down - * so it won't be closed again by the network layer. - * - * 2002/11/07 - Tony Cureington - * - Fix arp_target_hw_addr memory leak - * - Created activebackup_arp_monitor function to handle arp monitoring - * in active backup mode - the bond_arp_monitor had several problems... - * such as allowing slaves to tx arps sequentially without any delay - * for a response - * - Renamed bond_arp_monitor to loadbalance_arp_monitor and re-wrote - * this function to just handle arp monitoring in load-balancing mode; - * it is a lot more compact now - * - Changes to ensure one and only one slave transmits in active-backup - * mode - * - Robustesize parameters; warn users about bad combinations of - * parameters; also if miimon is specified and a network driver does - * not support MII or ETHTOOL, inform the user of this - * - Changes to support link_failure_count when in arp monitoring mode - * - Fix up/down delay reported in /proc - * - Added version; log version; make version available from "modinfo -d" - * - Fixed problem in bond_check_dev_link - if the first IOCTL (SIOCGMIIPH) - * failed, the ETHTOOL ioctl never got a chance - * - * 2002/11/16 - Laurent Deniel - * - fix multicast handling in activebackup_arp_monitor - * - remove one unnecessary and confusing current_slave == slave test - * in activebackup_arp_monitor - * - * 2002/11/17 - Laurent Deniel - * - fix bond_slave_info_query when slave_id = num_slaves - * - * 2002/11/19 - Janice Girouard - * - correct ifr_data reference. Update ifr_data reference - * to mii_ioctl_data struct values to avoid confusion. - * - * 2002/11/22 - Bert Barbe - * - Add support for multiple arp_ip_target - * - * 2002/12/13 - Jay Vosburgh - * - Changed to allow text strings for mode and multicast, e.g., - * insmod bonding mode=active-backup. The numbers still work. - * One change: an invalid choice will cause module load failure, - * rather than the previous behavior of just picking one. - * - Minor cleanups; got rid of dup ctype stuff, atoi function - * - * 2003/02/07 - Jay Vosburgh - * - Added use_carrier module parameter that causes miimon to - * use netif_carrier_ok() test instead of MII/ETHTOOL ioctls. - * - Minor cleanups; consolidated ioctl calls to one function. - * - * 2003/02/07 - Tony Cureington - * - Fix bond_mii_monitor() logic error that could result in - * bonding round-robin mode ignoring links after failover/recovery - * - * 2003/03/17 - Jay Vosburgh - * - kmalloc fix (GPF_KERNEL to GPF_ATOMIC) reported by - * Shmulik dot Hen at intel.com. - * - Based on discussion on mailing list, changed use of - * update_slave_cnt(), created wrapper functions for adding/removing - * slaves, changed bond_xmit_xor() to check slave_cnt instead of - * checking slave and slave->dev (which only worked by accident). - * - Misc code cleanup: get arp_send() prototype from header file, - * add max_bonds to bonding.txt. - * - * 2003/03/18 - Tsippy Mendelson and - * Shmulik Hen - * - Make sure only bond_attach_slave() and bond_detach_slave() can - * manipulate the slave list, including slave_cnt, even when in - * bond_release_all(). - * - Fixed hang in bond_release() while traffic is running. - * netdev_set_master() must not be called from within the bond lock. - * - * 2003/03/18 - Tsippy Mendelson and - * Shmulik Hen - * - Fixed hang in bond_enslave(): netdev_set_master() must not be - * called from within the bond lock while traffic is running. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#define DRV_VERSION "2.4.20-20030320" -#define DRV_RELDATE "March 20, 2003" -#define DRV_NAME "bonding" -#define DRV_DESCRIPTION "Ethernet Channel Bonding Driver" - -static const char *version = -DRV_NAME ".c:v" DRV_VERSION " (" DRV_RELDATE ")\n"; - -/* monitor all links that often (in milliseconds). <=0 disables monitoring */ -#ifndef BOND_LINK_MON_INTERV -#define BOND_LINK_MON_INTERV 0 -#endif - -#ifndef BOND_LINK_ARP_INTERV -#define BOND_LINK_ARP_INTERV 0 -#endif - -#ifndef MAX_ARP_IP_TARGETS -#define MAX_ARP_IP_TARGETS 16 -#endif - -static int arp_interval = BOND_LINK_ARP_INTERV; -static char *arp_ip_target[MAX_ARP_IP_TARGETS] = { NULL, }; -static unsigned long arp_target[MAX_ARP_IP_TARGETS] = { 0, } ; -static int arp_ip_count = 0; -static u32 my_ip = 0; -char *arp_target_hw_addr = NULL; - -static char *primary= NULL; - -static int max_bonds = BOND_DEFAULT_MAX_BONDS; -static int miimon = BOND_LINK_MON_INTERV; -static int use_carrier = 1; -static int bond_mode = BOND_MODE_ROUNDROBIN; -static int updelay = 0; -static int downdelay = 0; - -static char *mode = NULL; - -static struct bond_parm_tbl bond_mode_tbl[] = { -{ "balance-rr", BOND_MODE_ROUNDROBIN}, -{ "active-backup", BOND_MODE_ACTIVEBACKUP}, -{ "balance-xor", BOND_MODE_XOR}, -{ "broadcast", BOND_MODE_BROADCAST}, -{ NULL, -1}, -}; - -static int multicast_mode = BOND_MULTICAST_ALL; -static char *multicast = NULL; - -static struct bond_parm_tbl bond_mc_tbl[] = { -{ "disabled", BOND_MULTICAST_DISABLED}, -{ "active", BOND_MULTICAST_ACTIVE}, -{ "all", BOND_MULTICAST_ALL}, -{ NULL, -1}, -}; - -static int first_pass = 1; -static struct bonding *these_bonds = NULL; -static struct net_device *dev_bonds = NULL; - -MODULE_PARM(max_bonds, "i"); -MODULE_PARM_DESC(max_bonds, "Max number of bonded devices"); -MODULE_PARM(miimon, "i"); -MODULE_PARM_DESC(miimon, "Link check interval in milliseconds"); -MODULE_PARM(use_carrier, "i"); -MODULE_PARM_DESC(use_carrier, "Use netif_carrier_ok (vs MII ioctls) in miimon; 09 for off, 1 for on (default)"); -MODULE_PARM(mode, "s"); -MODULE_PARM_DESC(mode, "Mode of operation : 0 for round robin, 1 for active-backup, 2 for xor"); -MODULE_PARM(arp_interval, "i"); -MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds"); -MODULE_PARM(arp_ip_target, "1-" __MODULE_STRING(MAX_ARP_IP_TARGETS) "s"); -MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form"); -MODULE_PARM(updelay, "i"); -MODULE_PARM_DESC(updelay, "Delay before considering link up, in milliseconds"); -MODULE_PARM(downdelay, "i"); -MODULE_PARM_DESC(downdelay, "Delay before considering link down, in milliseconds"); -MODULE_PARM(primary, "s"); -MODULE_PARM_DESC(primary, "Primary network device to use"); -MODULE_PARM(multicast, "s"); -MODULE_PARM_DESC(multicast, "Mode for multicast support : 0 for none, 1 for active slave, 2 for all slaves (default)"); - -static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *dev); -static int bond_xmit_xor(struct sk_buff *skb, struct net_device *dev); -static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *dev); -static struct net_device_stats *bond_get_stats(struct net_device *dev); -static void bond_mii_monitor(struct net_device *dev); -static void loadbalance_arp_monitor(struct net_device *dev); -static void activebackup_arp_monitor(struct net_device *dev); -static int bond_event(struct notifier_block *this, unsigned long event, void *ptr); -static void bond_restore_slave_flags(slave_t *slave); -static void bond_mc_list_destroy(struct bonding *bond); -static void bond_mc_add(bonding_t *bond, void *addr, int alen); -static void bond_mc_delete(bonding_t *bond, void *addr, int alen); -static int bond_mc_list_copy (struct dev_mc_list *src, struct bonding *dst, int gpf_flag); -static inline int dmi_same(struct dev_mc_list *dmi1, struct dev_mc_list *dmi2); -static void bond_set_promiscuity(bonding_t *bond, int inc); -static void bond_set_allmulti(bonding_t *bond, int inc); -static struct dev_mc_list* bond_mc_list_find_dmi(struct dev_mc_list *dmi, struct dev_mc_list *mc_list); -static void bond_mc_update(bonding_t *bond, slave_t *new, slave_t *old); -static void bond_set_slave_inactive_flags(slave_t *slave); -static void bond_set_slave_active_flags(slave_t *slave); -static int bond_enslave(struct net_device *master, struct net_device *slave); -static int bond_release(struct net_device *master, struct net_device *slave); -static int bond_release_all(struct net_device *master); -static int bond_sethwaddr(struct net_device *master, struct net_device *slave); - -/* - * bond_get_info is the interface into the /proc filesystem. This is - * a different interface than the BOND_INFO_QUERY ioctl. That is done - * through the generic networking ioctl interface, and bond_info_query - * is the internal function which provides that information. - */ -static int bond_get_info(char *buf, char **start, off_t offset, int length); - -/* #define BONDING_DEBUG 1 */ - -/* several macros */ - -#define IS_UP(dev) ((((dev)->flags & (IFF_UP)) == (IFF_UP)) && \ - (netif_running(dev) && netif_carrier_ok(dev))) - -static void arp_send_all(slave_t *slave) -{ - int i; - - for (i = 0; (idev, - my_ip, arp_target_hw_addr, slave->dev->dev_addr, - arp_target_hw_addr); - } -} - - -static const char * -bond_mode_name(void) -{ - switch (bond_mode) { - case BOND_MODE_ROUNDROBIN : - return "load balancing (round-robin)"; - case BOND_MODE_ACTIVEBACKUP : - return "fault-tolerance (active-backup)"; - case BOND_MODE_XOR : - return "load balancing (xor)"; - case BOND_MODE_BROADCAST : - return "fault-tolerance (broadcast)"; - default : - return "unknown"; - } -} - -static const char * -multicast_mode_name(void) -{ - switch(multicast_mode) { - case BOND_MULTICAST_DISABLED : - return "disabled"; - case BOND_MULTICAST_ACTIVE : - return "active slave only"; - case BOND_MULTICAST_ALL : - return "all slaves"; - default : - return "unknown"; - } -} - -static void bond_restore_slave_flags(slave_t *slave) -{ - slave->dev->flags = slave->original_flags; -} - -static void bond_set_slave_inactive_flags(slave_t *slave) -{ - slave->state = BOND_STATE_BACKUP; - slave->dev->flags |= IFF_NOARP; -} - -static void bond_set_slave_active_flags(slave_t *slave) -{ - slave->state = BOND_STATE_ACTIVE; - slave->dev->flags &= ~IFF_NOARP; -} - -/* - * This function counts and verifies the the number of attached - * slaves, checking the count against the expected value (given that incr - * is either 1 or -1, for add or removal of a slave). Only - * bond_xmit_xor() uses the slave_cnt value, but this is still a good - * consistency check. - */ -static inline void -update_slave_cnt(bonding_t *bond, int incr) -{ - slave_t *slave = NULL; - int expect = bond->slave_cnt + incr; - - bond->slave_cnt = 0; - for (slave = bond->prev; slave != (slave_t*)bond; - slave = slave->prev) { - bond->slave_cnt++; - } - - if (expect != bond->slave_cnt) - BUG(); -} - -/* - * This function detaches the slave from the list . - * WARNING: no check is made to verify if the slave effectively - * belongs to . It returns in case it's needed. - * Nothing is freed on return, structures are just unchained. - * If the bond->current_slave pointer was pointing to , - * it's replaced with slave->next, or if not applicable. - * - * bond->lock held by caller. - */ -static slave_t * -bond_detach_slave(bonding_t *bond, slave_t *slave) -{ - if ((bond == NULL) || (slave == NULL) || - ((void *)bond == (void *)slave)) { - printk(KERN_ERR - "bond_detach_slave(): trying to detach " - "slave %p from bond %p\n", bond, slave); - return slave; - } - - if (bond->next == slave) { /* is the slave at the head ? */ - if (bond->prev == slave) { /* is the slave alone ? */ - write_lock(&bond->ptrlock); - bond->current_slave = NULL; /* no slave anymore */ - write_unlock(&bond->ptrlock); - bond->prev = bond->next = (slave_t *)bond; - } else { /* not alone */ - bond->next = slave->next; - slave->next->prev = (slave_t *)bond; - bond->prev->next = slave->next; - - write_lock(&bond->ptrlock); - if (bond->current_slave == slave) { - bond->current_slave = slave->next; - } - write_unlock(&bond->ptrlock); - } - } else { - slave->prev->next = slave->next; - if (bond->prev == slave) { /* is this slave the last one ? */ - bond->prev = slave->prev; - } else { - slave->next->prev = slave->prev; - } - - write_lock(&bond->ptrlock); - if (bond->current_slave == slave) { - bond->current_slave = slave->next; - } - write_unlock(&bond->ptrlock); - } - - update_slave_cnt(bond, -1); - - return slave; -} - -static void -bond_attach_slave(struct bonding *bond, struct slave *new_slave) -{ - /* - * queue to the end of the slaves list, make the first element its - * successor, the last one its predecessor, and make it the bond's - * predecessor. - * - * Just to clarify, so future bonding driver hackers don't go through - * the same confusion stage I did trying to figure this out, the - * slaves are stored in a double linked circular list, sortof. - * In the ->next direction, the last slave points to the first slave, - * bypassing bond; only the slaves are in the ->next direction. - * In the ->prev direction, however, the first slave points to bond - * and bond points to the last slave. - * - * It looks like a circle with a little bubble hanging off one side - * in the ->prev direction only. - * - * When going through the list once, its best to start at bond->prev - * and go in the ->prev direction, testing for bond. Doing this - * in the ->next direction doesn't work. Trust me, I know this now. - * :) -mts 2002.03.14 - */ - new_slave->prev = bond->prev; - new_slave->prev->next = new_slave; - bond->prev = new_slave; - new_slave->next = bond->next; - - update_slave_cnt(bond, 1); -} - - -/* - * Less bad way to call ioctl from within the kernel; this needs to be - * done some other way to get the call out of interrupt context. - * Needs "ioctl" variable to be supplied by calling context. - */ -#define IOCTL(dev, arg, cmd) ({ \ - int ret; \ - mm_segment_t fs = get_fs(); \ - set_fs(get_ds()); \ - ret = ioctl(dev, arg, cmd); \ - set_fs(fs); \ - ret; }) - -/* - * if supports MII link status reporting, check its link status. - * - * We either do MII/ETHTOOL ioctls, or check netif_carrier_ok(), - * depening upon the setting of the use_carrier parameter. - * - * Return either BMSR_LSTATUS, meaning that the link is up (or we - * can't tell and just pretend it is), or 0, meaning that the link is - * down. - * - * If reporting is non-zero, instead of faking link up, return -1 if - * both ETHTOOL and MII ioctls fail (meaning the device does not - * support them). If use_carrier is set, return whatever it says. - * It'd be nice if there was a good way to tell if a driver supports - * netif_carrier, but there really isn't. - */ -static int -bond_check_dev_link(struct net_device *dev, int reporting) -{ - static int (* ioctl)(struct net_device *, struct ifreq *, int); - struct ifreq ifr; - struct mii_ioctl_data *mii; - struct ethtool_value etool; - - if (use_carrier) { - return netif_carrier_ok(dev) ? BMSR_LSTATUS : 0; - } - - ioctl = dev->do_ioctl; - if (ioctl) { - /* TODO: set pointer to correct ioctl on a per team member */ - /* bases to make this more efficient. that is, once */ - /* we determine the correct ioctl, we will always */ - /* call it and not the others for that team */ - /* member. */ - - /* - * We cannot assume that SIOCGMIIPHY will also read a - * register; not all network drivers (e.g., e100) - * support that. - */ - - /* Yes, the mii is overlaid on the ifreq.ifr_ifru */ - mii = (struct mii_ioctl_data *)&ifr.ifr_data; - if (IOCTL(dev, &ifr, SIOCGMIIPHY) == 0) { - mii->reg_num = MII_BMSR; - if (IOCTL(dev, &ifr, SIOCGMIIREG) == 0) { - return mii->val_out & BMSR_LSTATUS; - } - } - - /* try SIOCETHTOOL ioctl, some drivers cache ETHTOOL_GLINK */ - /* for a period of time so we attempt to get link status */ - /* from it last if the above MII ioctls fail... */ - etool.cmd = ETHTOOL_GLINK; - ifr.ifr_data = (char*)&etool; - if (IOCTL(dev, &ifr, SIOCETHTOOL) == 0) { - if (etool.data == 1) { - return BMSR_LSTATUS; - } else { -#ifdef BONDING_DEBUG - printk(KERN_INFO - ":: SIOCETHTOOL shows link down \n"); -#endif - return 0; - } - } - - } - - /* - * If reporting, report that either there's no dev->do_ioctl, - * or both SIOCGMIIREG and SIOCETHTOOL failed (meaning that we - * cannot report link status). If not reporting, pretend - * we're ok. - */ - return reporting ? -1 : BMSR_LSTATUS; -} - -static u16 bond_check_mii_link(bonding_t *bond) -{ - int has_active_interface = 0; - unsigned long flags; - - read_lock_irqsave(&bond->lock, flags); - read_lock(&bond->ptrlock); - has_active_interface = (bond->current_slave != NULL); - read_unlock(&bond->ptrlock); - read_unlock_irqrestore(&bond->lock, flags); - - return (has_active_interface ? BMSR_LSTATUS : 0); -} - -static int bond_open(struct net_device *dev) -{ - struct timer_list *timer = &((struct bonding *)(dev->priv))->mii_timer; - struct timer_list *arp_timer = &((struct bonding *)(dev->priv))->arp_timer; - MOD_INC_USE_COUNT; - - if (miimon > 0) { /* link check interval, in milliseconds. */ - init_timer(timer); - timer->expires = jiffies + (miimon * HZ / 1000); - timer->data = (unsigned long)dev; - timer->function = (void *)&bond_mii_monitor; - add_timer(timer); - } - - if (arp_interval> 0) { /* arp interval, in milliseconds. */ - init_timer(arp_timer); - arp_timer->expires = jiffies + (arp_interval * HZ / 1000); - arp_timer->data = (unsigned long)dev; - if (bond_mode == BOND_MODE_ACTIVEBACKUP) { - arp_timer->function = (void *)&activebackup_arp_monitor; - } else { - arp_timer->function = (void *)&loadbalance_arp_monitor; - } - add_timer(arp_timer); - } - return 0; -} - -static int bond_close(struct net_device *master) -{ - bonding_t *bond = (struct bonding *) master->priv; - unsigned long flags; - - write_lock_irqsave(&bond->lock, flags); - - if (miimon > 0) { /* link check interval, in milliseconds. */ - del_timer(&bond->mii_timer); - } - if (arp_interval> 0) { /* arp interval, in milliseconds. */ - del_timer(&bond->arp_timer); - if (arp_target_hw_addr != NULL) { - kfree(arp_target_hw_addr); - arp_target_hw_addr = NULL; - } - } - - /* Release the bonded slaves */ - bond_release_all(master); - bond_mc_list_destroy (bond); - - write_unlock_irqrestore(&bond->lock, flags); - - MOD_DEC_USE_COUNT; - return 0; -} - -/* - * flush all members of flush->mc_list from device dev->mc_list - */ -static void bond_mc_list_flush(struct net_device *dev, struct net_device *flush) -{ - struct dev_mc_list *dmi; - - for (dmi = flush->mc_list; dmi != NULL; dmi = dmi->next) - dev_mc_delete(dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); -} - -/* - * Totally destroys the mc_list in bond - */ -static void bond_mc_list_destroy(struct bonding *bond) -{ - struct dev_mc_list *dmi; - - dmi = bond->mc_list; - while (dmi) { - bond->mc_list = dmi->next; - kfree(dmi); - dmi = bond->mc_list; - } -} - -/* - * Add a Multicast address to every slave in the bonding group - */ -static void bond_mc_add(bonding_t *bond, void *addr, int alen) -{ - slave_t *slave; - switch (multicast_mode) { - case BOND_MULTICAST_ACTIVE : - /* write lock already acquired */ - if (bond->current_slave != NULL) - dev_mc_add(bond->current_slave->dev, addr, alen, 0); - break; - case BOND_MULTICAST_ALL : - for (slave = bond->prev; slave != (slave_t*)bond; slave = slave->prev) - dev_mc_add(slave->dev, addr, alen, 0); - break; - case BOND_MULTICAST_DISABLED : - break; - } -} - -/* - * Remove a multicast address from every slave in the bonding group - */ -static void bond_mc_delete(bonding_t *bond, void *addr, int alen) -{ - slave_t *slave; - switch (multicast_mode) { - case BOND_MULTICAST_ACTIVE : - /* write lock already acquired */ - if (bond->current_slave != NULL) - dev_mc_delete(bond->current_slave->dev, addr, alen, 0); - break; - case BOND_MULTICAST_ALL : - for (slave = bond->prev; slave != (slave_t*)bond; slave = slave->prev) - dev_mc_delete(slave->dev, addr, alen, 0); - break; - case BOND_MULTICAST_DISABLED : - break; - } -} - -/* - * Copy all the Multicast addresses from src to the bonding device dst - */ -static int bond_mc_list_copy (struct dev_mc_list *src, struct bonding *dst, - int gpf_flag) -{ - struct dev_mc_list *dmi, *new_dmi; - - for (dmi = src; dmi != NULL; dmi = dmi->next) { - new_dmi = kmalloc(sizeof(struct dev_mc_list), gpf_flag); - - if (new_dmi == NULL) { - return -ENOMEM; - } - - new_dmi->next = dst->mc_list; - dst->mc_list = new_dmi; - - new_dmi->dmi_addrlen = dmi->dmi_addrlen; - memcpy(new_dmi->dmi_addr, dmi->dmi_addr, dmi->dmi_addrlen); - new_dmi->dmi_users = dmi->dmi_users; - new_dmi->dmi_gusers = dmi->dmi_gusers; - } - return 0; -} - -/* - * Returns 0 if dmi1 and dmi2 are the same, non-0 otherwise - */ -static inline int dmi_same(struct dev_mc_list *dmi1, struct dev_mc_list *dmi2) -{ - return memcmp(dmi1->dmi_addr, dmi2->dmi_addr, dmi1->dmi_addrlen) == 0 && - dmi1->dmi_addrlen == dmi2->dmi_addrlen; -} - -/* - * Push the promiscuity flag down to all slaves - */ -static void bond_set_promiscuity(bonding_t *bond, int inc) -{ - slave_t *slave; - switch (multicast_mode) { - case BOND_MULTICAST_ACTIVE : - /* write lock already acquired */ - if (bond->current_slave != NULL) - dev_set_promiscuity(bond->current_slave->dev, inc); - break; - case BOND_MULTICAST_ALL : - for (slave = bond->prev; slave != (slave_t*)bond; slave = slave->prev) - dev_set_promiscuity(slave->dev, inc); - break; - case BOND_MULTICAST_DISABLED : - break; - } -} - -/* - * Push the allmulti flag down to all slaves - */ -static void bond_set_allmulti(bonding_t *bond, int inc) -{ - slave_t *slave; - switch (multicast_mode) { - case BOND_MULTICAST_ACTIVE : - /* write lock already acquired */ - if (bond->current_slave != NULL) - dev_set_allmulti(bond->current_slave->dev, inc); - break; - case BOND_MULTICAST_ALL : - for (slave = bond->prev; slave != (slave_t*)bond; slave = slave->prev) - dev_set_allmulti(slave->dev, inc); - break; - case BOND_MULTICAST_DISABLED : - break; - } -} - -/* - * returns dmi entry if found, NULL otherwise - */ -static struct dev_mc_list* bond_mc_list_find_dmi(struct dev_mc_list *dmi, - struct dev_mc_list *mc_list) -{ - struct dev_mc_list *idmi; - - for (idmi = mc_list; idmi != NULL; idmi = idmi->next) { - if (dmi_same(dmi, idmi)) { - return idmi; - } - } - return NULL; -} - -static void set_multicast_list(struct net_device *master) -{ - bonding_t *bond = master->priv; - struct dev_mc_list *dmi; - unsigned long flags = 0; - - if (multicast_mode == BOND_MULTICAST_DISABLED) - return; - /* - * Lock the private data for the master - */ - write_lock_irqsave(&bond->lock, flags); - - /* set promiscuity flag to slaves */ - if ( (master->flags & IFF_PROMISC) && !(bond->flags & IFF_PROMISC) ) - bond_set_promiscuity(bond, 1); - - if ( !(master->flags & IFF_PROMISC) && (bond->flags & IFF_PROMISC) ) - bond_set_promiscuity(bond, -1); - - /* set allmulti flag to slaves */ - if ( (master->flags & IFF_ALLMULTI) && !(bond->flags & IFF_ALLMULTI) ) - bond_set_allmulti(bond, 1); - - if ( !(master->flags & IFF_ALLMULTI) && (bond->flags & IFF_ALLMULTI) ) - bond_set_allmulti(bond, -1); - - bond->flags = master->flags; - - /* looking for addresses to add to slaves' mc list */ - for (dmi = master->mc_list; dmi != NULL; dmi = dmi->next) { - if (bond_mc_list_find_dmi(dmi, bond->mc_list) == NULL) - bond_mc_add(bond, dmi->dmi_addr, dmi->dmi_addrlen); - } - - /* looking for addresses to delete from slaves' list */ - for (dmi = bond->mc_list; dmi != NULL; dmi = dmi->next) { - if (bond_mc_list_find_dmi(dmi, master->mc_list) == NULL) - bond_mc_delete(bond, dmi->dmi_addr, dmi->dmi_addrlen); - } - - - /* save master's multicast list */ - bond_mc_list_destroy (bond); - bond_mc_list_copy (master->mc_list, bond, GFP_ATOMIC); - - write_unlock_irqrestore(&bond->lock, flags); -} - -/* - * Update the mc list and multicast-related flags for the new and - * old active slaves (if any) according to the multicast mode - */ -static void bond_mc_update(bonding_t *bond, slave_t *new, slave_t *old) -{ - struct dev_mc_list *dmi; - - switch(multicast_mode) { - case BOND_MULTICAST_ACTIVE : - if (bond->device->flags & IFF_PROMISC) { - if (old != NULL && new != old) - dev_set_promiscuity(old->dev, -1); - dev_set_promiscuity(new->dev, 1); - } - if (bond->device->flags & IFF_ALLMULTI) { - if (old != NULL && new != old) - dev_set_allmulti(old->dev, -1); - dev_set_allmulti(new->dev, 1); - } - /* first remove all mc addresses from old slave if any, - and _then_ add them to new active slave */ - if (old != NULL && new != old) { - for (dmi = bond->device->mc_list; dmi != NULL; dmi = dmi->next) - dev_mc_delete(old->dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); - } - for (dmi = bond->device->mc_list; dmi != NULL; dmi = dmi->next) - dev_mc_add(new->dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); - break; - case BOND_MULTICAST_ALL : - /* nothing to do: mc list is already up-to-date on all slaves */ - break; - case BOND_MULTICAST_DISABLED : - break; - } -} - -/* enslave device to bond device */ -static int bond_enslave(struct net_device *master_dev, - struct net_device *slave_dev) -{ - bonding_t *bond = NULL; - slave_t *new_slave = NULL; - unsigned long flags = 0; - unsigned long rflags = 0; - int ndx = 0; - int err = 0; - struct dev_mc_list *dmi; - struct in_ifaddr **ifap; - struct in_ifaddr *ifa; - int link_reporting; - - if (master_dev == NULL || slave_dev == NULL) { - return -ENODEV; - } - bond = (struct bonding *) master_dev->priv; - - if (slave_dev->do_ioctl == NULL) { - printk(KERN_DEBUG - "Warning : no link monitoring support for %s\n", - slave_dev->name); - } - - /* not running. */ - if ((slave_dev->flags & IFF_UP) != IFF_UP) { -#ifdef BONDING_DEBUG - printk(KERN_CRIT "Error, slave_dev is not running\n"); -#endif - return -EINVAL; - } - - /* already enslaved */ - if (master_dev->flags & IFF_SLAVE || slave_dev->flags & IFF_SLAVE) { -#ifdef BONDING_DEBUG - printk(KERN_CRIT "Error, Device was already enslaved\n"); -#endif - return -EBUSY; - } - - if ((new_slave = kmalloc(sizeof(slave_t), GFP_ATOMIC)) == NULL) { - return -ENOMEM; - } - memset(new_slave, 0, sizeof(slave_t)); - - /* save flags before call to netdev_set_master */ - new_slave->original_flags = slave_dev->flags; - err = netdev_set_master(slave_dev, master_dev); - - if (err) { -#ifdef BONDING_DEBUG - printk(KERN_CRIT "Error %d calling netdev_set_master\n", err); -#endif - goto err_free; - } - - new_slave->dev = slave_dev; - - if (multicast_mode == BOND_MULTICAST_ALL) { - /* set promiscuity level to new slave */ - if (master_dev->flags & IFF_PROMISC) - dev_set_promiscuity(slave_dev, 1); - - /* set allmulti level to new slave */ - if (master_dev->flags & IFF_ALLMULTI) - dev_set_allmulti(slave_dev, 1); - - /* upload master's mc_list to new slave */ - for (dmi = master_dev->mc_list; dmi != NULL; dmi = dmi->next) - dev_mc_add (slave_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); - } - - write_lock_irqsave(&bond->lock, flags); - - bond_attach_slave(bond, new_slave); - new_slave->delay = 0; - new_slave->link_failure_count = 0; - - if (miimon > 0 && !use_carrier) { - link_reporting = bond_check_dev_link(slave_dev, 1); - - if ((link_reporting == -1) && (arp_interval == 0)) { - /* - * miimon is set but a bonded network driver - * does not support ETHTOOL/MII and - * arp_interval is not set. Note: if - * use_carrier is enabled, we will never go - * here (because netif_carrier is always - * supported); thus, we don't need to change - * the messages for netif_carrier. - */ - printk(KERN_ERR - "bond_enslave(): MII and ETHTOOL support not " - "available for interface %s, and " - "arp_interval/arp_ip_target module parameters " - "not specified, thus bonding will not detect " - "link failures! see bonding.txt for details.\n", - slave_dev->name); - } else if (link_reporting == -1) { - /* unable get link status using mii/ethtool */ - printk(KERN_WARNING - "bond_enslave: can't get link status from " - "interface %s; the network driver associated " - "with this interface does not support " - "MII or ETHTOOL link status reporting, thus " - "miimon has no effect on this interface.\n", - slave_dev->name); - } - } - - /* check for initial state */ - if ((miimon <= 0) || - (bond_check_dev_link(slave_dev, 0) == BMSR_LSTATUS)) { -#ifdef BONDING_DEBUG - printk(KERN_CRIT "Initial state of slave_dev is BOND_LINK_UP\n"); -#endif - new_slave->link = BOND_LINK_UP; - new_slave->jiffies = jiffies; - } - else { -#ifdef BONDING_DEBUG - printk(KERN_CRIT "Initial state of slave_dev is BOND_LINK_DOWN\n"); -#endif - new_slave->link = BOND_LINK_DOWN; - } - - /* if we're in active-backup mode, we need one and only one active - * interface. The backup interfaces will have their NOARP flag set - * because we need them to be completely deaf and not to respond to - * any ARP request on the network to avoid fooling a switch. Thus, - * since we guarantee that current_slave always point to the last - * usable interface, we just have to verify this interface's flag. - */ - if (bond_mode == BOND_MODE_ACTIVEBACKUP) { - if (((bond->current_slave == NULL) - || (bond->current_slave->dev->flags & IFF_NOARP)) - && (new_slave->link == BOND_LINK_UP)) { -#ifdef BONDING_DEBUG - printk(KERN_CRIT "This is the first active slave\n"); -#endif - /* first slave or no active slave yet, and this link - is OK, so make this interface the active one */ - bond->current_slave = new_slave; - bond_set_slave_active_flags(new_slave); - bond_mc_update(bond, new_slave, NULL); - } - else { -#ifdef BONDING_DEBUG - printk(KERN_CRIT "This is just a backup slave\n"); -#endif - bond_set_slave_inactive_flags(new_slave); - } - read_lock_irqsave(&(((struct in_device *)slave_dev->ip_ptr)->lock), rflags); - ifap= &(((struct in_device *)slave_dev->ip_ptr)->ifa_list); - ifa = *ifap; - my_ip = ifa->ifa_address; - read_unlock_irqrestore(&(((struct in_device *)slave_dev->ip_ptr)->lock), rflags); - - /* if there is a primary slave, remember it */ - if (primary != NULL) - if( strcmp(primary, new_slave->dev->name) == 0) - bond->primary_slave = new_slave; - } else { -#ifdef BONDING_DEBUG - printk(KERN_CRIT "This slave is always active in trunk mode\n"); -#endif - /* always active in trunk mode */ - new_slave->state = BOND_STATE_ACTIVE; - if (bond->current_slave == NULL) - bond->current_slave = new_slave; - } - - write_unlock_irqrestore(&bond->lock, flags); - - /* - * !!! This is to support old versions of ifenslave. We can remove - * this in 2.5 because our ifenslave takes care of this for us. - * We check to see if the master has a mac address yet. If not, - * we'll give it the mac address of our slave device. - */ - for (ndx = 0; ndx < slave_dev->addr_len; ndx++) { -#ifdef BONDING_DEBUG - printk(KERN_CRIT "Checking ndx=%d of master_dev->dev_addr\n", - ndx); -#endif - if (master_dev->dev_addr[ndx] != 0) { -#ifdef BONDING_DEBUG - printk(KERN_CRIT "Found non-zero byte at ndx=%d\n", - ndx); -#endif - break; - } - } - if (ndx == slave_dev->addr_len) { - /* - * We got all the way through the address and it was - * all 0's. - */ -#ifdef BONDING_DEBUG - printk(KERN_CRIT "%s doesn't have a MAC address yet. ", - master_dev->name); - printk(KERN_CRIT "Going to give assign it from %s.\n", - slave_dev->name); -#endif - bond_sethwaddr(master_dev, slave_dev); - } - - printk (KERN_INFO "%s: enslaving %s as a%s interface with a%s link.\n", - master_dev->name, slave_dev->name, - new_slave->state == BOND_STATE_ACTIVE ? "n active" : " backup", - new_slave->link == BOND_LINK_UP ? "n up" : " down"); - - /* enslave is successful */ - return 0; -err_free: - kfree(new_slave); - return err; -} - -/* - * This function changes the active slave to slave . - * It returns -EINVAL in the following cases. - * - is not found in the list. - * - There is not active slave now. - * - is already active. - * - The link state of is not BOND_LINK_UP. - * - is not running. - * In these cases, this fuction does nothing. - * In the other cases, currnt_slave pointer is changed and 0 is returned. - */ -static int bond_change_active(struct net_device *master_dev, struct net_device *slave_dev) -{ - bonding_t *bond; - slave_t *slave; - slave_t *oldactive = NULL; - slave_t *newactive = NULL; - unsigned long flags; - int ret = 0; - - if (master_dev == NULL || slave_dev == NULL) { - return -ENODEV; - } - - bond = (struct bonding *) master_dev->priv; - write_lock_irqsave(&bond->lock, flags); - slave = (slave_t *)bond; - oldactive = bond->current_slave; - - while ((slave = slave->prev) != (slave_t *)bond) { - if(slave_dev == slave->dev) { - newactive = slave; - break; - } - } - - if ((newactive != NULL)&& - (oldactive != NULL)&& - (newactive != oldactive)&& - (newactive->link == BOND_LINK_UP)&& - IS_UP(newactive->dev)) { - bond_set_slave_inactive_flags(oldactive); - bond_set_slave_active_flags(newactive); - bond_mc_update(bond, newactive, oldactive); - bond->current_slave = newactive; - printk("%s : activate %s(old : %s)\n", - master_dev->name, newactive->dev->name, - oldactive->dev->name); - } - else { - ret = -EINVAL; - } - write_unlock_irqrestore(&bond->lock, flags); - return ret; -} - -/* Choose a new valid interface from the pool, set it active - * and make it the current slave. If no valid interface is - * found, the oldest slave in BACK state is choosen and - * activated. If none is found, it's considered as no - * interfaces left so the current slave is set to NULL. - * The result is a pointer to the current slave. - * - * Since this function sends messages tails through printk, the caller - * must have started something like `printk(KERN_INFO "xxxx ");'. - * - * Warning: must put locks around the call to this function if needed. - */ -slave_t *change_active_interface(bonding_t *bond) -{ - slave_t *newslave, *oldslave; - slave_t *bestslave = NULL; - int mintime; - - read_lock(&bond->ptrlock); - newslave = oldslave = bond->current_slave; - read_unlock(&bond->ptrlock); - - if (newslave == NULL) { /* there were no active slaves left */ - if (bond->next != (slave_t *)bond) { /* found one slave */ - write_lock(&bond->ptrlock); - newslave = bond->current_slave = bond->next; - write_unlock(&bond->ptrlock); - } else { - - printk (" but could not find any %s interface.\n", - (bond_mode == BOND_MODE_ACTIVEBACKUP) ? "backup":"other"); - write_lock(&bond->ptrlock); - bond->current_slave = (slave_t *)NULL; - write_unlock(&bond->ptrlock); - return NULL; /* still no slave, return NULL */ - } - } else if (bond_mode == BOND_MODE_ACTIVEBACKUP) { - /* make sure oldslave doesn't send arps - this could - * cause a ping-pong effect between interfaces since they - * would be able to tx arps - in active backup only one - * slave should be able to tx arps, and that should be - * the current_slave; the only exception is when all - * slaves have gone down, then only one non-current slave can - * send arps at a time; clearing oldslaves' mc list is handled - * later in this function. - */ - bond_set_slave_inactive_flags(oldslave); - } - - mintime = updelay; - - /* first try the primary link; if arping, a link must tx/rx traffic - * before it can be considered the current_slave - also, we would skip - * slaves between the current_slave and primary_slave that may be up - * and able to arp - */ - if ((bond->primary_slave != NULL) && (arp_interval == 0)) { - if (IS_UP(bond->primary_slave->dev)) - newslave = bond->primary_slave; - } - - do { - if (IS_UP(newslave->dev)) { - if (newslave->link == BOND_LINK_UP) { - /* this one is immediately usable */ - if (bond_mode == BOND_MODE_ACTIVEBACKUP) { - bond_set_slave_active_flags(newslave); - bond_mc_update(bond, newslave, oldslave); - printk (" and making interface %s the active one.\n", - newslave->dev->name); - } - else { - printk (" and setting pointer to interface %s.\n", - newslave->dev->name); - } - - write_lock(&bond->ptrlock); - bond->current_slave = newslave; - write_unlock(&bond->ptrlock); - return newslave; - } - else if (newslave->link == BOND_LINK_BACK) { - /* link up, but waiting for stabilization */ - if (newslave->delay < mintime) { - mintime = newslave->delay; - bestslave = newslave; - } - } - } - } while ((newslave = newslave->next) != oldslave); - - /* no usable backup found, we'll see if we at least got a link that was - coming back for a long time, and could possibly already be usable. - */ - - if (bestslave != NULL) { - /* early take-over. */ - printk (" and making interface %s the active one %d ms earlier.\n", - bestslave->dev->name, - (updelay - bestslave->delay)*miimon); - - bestslave->delay = 0; - bestslave->link = BOND_LINK_UP; - bestslave->jiffies = jiffies; - bond_set_slave_active_flags(bestslave); - bond_mc_update(bond, bestslave, oldslave); - write_lock(&bond->ptrlock); - bond->current_slave = bestslave; - write_unlock(&bond->ptrlock); - return bestslave; - } - - if ((bond_mode == BOND_MODE_ACTIVEBACKUP) && - (multicast_mode == BOND_MULTICAST_ACTIVE) && - (oldslave != NULL)) { - /* flush bonds (master's) mc_list from oldslave since it wasn't - * updated (and deleted) above - */ - bond_mc_list_flush(oldslave->dev, bond->device); - if (bond->device->flags & IFF_PROMISC) { - dev_set_promiscuity(oldslave->dev, -1); - } - if (bond->device->flags & IFF_ALLMULTI) { - dev_set_allmulti(oldslave->dev, -1); - } - } - - printk (" but could not find any %s interface.\n", - (bond_mode == BOND_MODE_ACTIVEBACKUP) ? "backup":"other"); - - /* absolutely nothing found. let's return NULL */ - write_lock(&bond->ptrlock); - bond->current_slave = (slave_t *)NULL; - write_unlock(&bond->ptrlock); - return NULL; -} - -/* - * Try to release the slave device from the bond device - * It is legal to access current_slave without a lock because all the function - * is write-locked. - * - * The rules for slave state should be: - * for Active/Backup: - * Active stays on all backups go down - * for Bonded connections: - * The first up interface should be left on and all others downed. - */ -static int bond_release(struct net_device *master, struct net_device *slave) -{ - bonding_t *bond; - slave_t *our_slave, *old_current; - unsigned long flags; - - if (master == NULL || slave == NULL) { - return -ENODEV; - } - - bond = (struct bonding *) master->priv; - - /* master already enslaved, or slave not enslaved, - or no slave for this master */ - if ((master->flags & IFF_SLAVE) || !(slave->flags & IFF_SLAVE)) { - printk (KERN_DEBUG "%s: cannot release %s.\n", master->name, slave->name); - return -EINVAL; - } - - write_lock_irqsave(&bond->lock, flags); - bond->current_arp_slave = NULL; - our_slave = (slave_t *)bond; - old_current = bond->current_slave; - while ((our_slave = our_slave->prev) != (slave_t *)bond) { - if (our_slave->dev == slave) { - bond_detach_slave(bond, our_slave); - - printk (KERN_INFO "%s: releasing %s interface %s", - master->name, - (our_slave->state == BOND_STATE_ACTIVE) ? "active" : "backup", - slave->name); - - if (our_slave == old_current) { - /* find a new interface and be verbose */ - change_active_interface(bond); - } else { - printk(".\n"); - } - - if (bond->current_slave == NULL) { - printk(KERN_INFO - "%s: now running without any active interface !\n", - master->name); - } - - if (bond->primary_slave == our_slave) { - bond->primary_slave = NULL; - } - - break; - } - - } - write_unlock_irqrestore(&bond->lock, flags); - - if (our_slave == (slave_t *)bond) { - /* if we get here, it's because the device was not found */ - printk (KERN_INFO "%s: %s not enslaved\n", master->name, slave->name); - return -EINVAL; - } - - /* undo settings and restore original values */ - - if (multicast_mode == BOND_MULTICAST_ALL) { - /* flush master's mc_list from slave */ - bond_mc_list_flush (slave, master); - - /* unset promiscuity level from slave */ - if (master->flags & IFF_PROMISC) - dev_set_promiscuity(slave, -1); - - /* unset allmulti level from slave */ - if (master->flags & IFF_ALLMULTI) - dev_set_allmulti(slave, -1); - } - - netdev_set_master(slave, NULL); - - /* only restore its RUNNING flag if monitoring set it down */ - if (slave->flags & IFF_UP) { - slave->flags |= IFF_RUNNING; - } - - if (slave->flags & IFF_NOARP || - bond->current_slave != NULL) { - dev_close(slave); - our_slave->original_flags &= ~IFF_UP; - } - - bond_restore_slave_flags(our_slave); - - kfree(our_slave); - - return 0; /* deletion OK */ -} - -/* - * This function releases all slaves. - * Warning: must put write-locks around the call to this function. - */ -static int bond_release_all(struct net_device *master) -{ - bonding_t *bond; - slave_t *our_slave; - struct net_device *slave_dev; - - if (master == NULL) { - return -ENODEV; - } - - if (master->flags & IFF_SLAVE) { - return -EINVAL; - } - - bond = (struct bonding *) master->priv; - bond->current_arp_slave = NULL; - bond->current_slave = NULL; - bond->primary_slave = NULL; - - while ((our_slave = bond->prev) != (slave_t *)bond) { - slave_dev = our_slave->dev; - bond_detach_slave(bond, our_slave); - - if (multicast_mode == BOND_MULTICAST_ALL - || (multicast_mode == BOND_MULTICAST_ACTIVE - && bond->current_slave == our_slave)) { - - /* flush master's mc_list from slave */ - bond_mc_list_flush (slave_dev, master); - - /* unset promiscuity level from slave */ - if (master->flags & IFF_PROMISC) - dev_set_promiscuity(slave_dev, -1); - - /* unset allmulti level from slave */ - if (master->flags & IFF_ALLMULTI) - dev_set_allmulti(slave_dev, -1); - } - - kfree(our_slave); - - /* - * Can be safely called from inside the bond lock - * since traffic and timers have already stopped - */ - netdev_set_master(slave_dev, NULL); - - /* only restore its RUNNING flag if monitoring set it down */ - if (slave_dev->flags & IFF_UP) - slave_dev->flags |= IFF_RUNNING; - - if (slave_dev->flags & IFF_NOARP) - dev_close(slave_dev); - } - - printk (KERN_INFO "%s: released all slaves\n", master->name); - - return 0; -} - -/* this function is called regularly to monitor each slave's link. */ -static void bond_mii_monitor(struct net_device *master) -{ - bonding_t *bond = (struct bonding *) master->priv; - slave_t *slave, *bestslave, *oldcurrent; - unsigned long flags; - int slave_died = 0; - - read_lock_irqsave(&bond->lock, flags); - - /* we will try to read the link status of each of our slaves, and - * set their IFF_RUNNING flag appropriately. For each slave not - * supporting MII status, we won't do anything so that a user-space - * program could monitor the link itself if needed. - */ - - bestslave = NULL; - slave = (slave_t *)bond; - - read_lock(&bond->ptrlock); - oldcurrent = bond->current_slave; - read_unlock(&bond->ptrlock); - - while ((slave = slave->prev) != (slave_t *)bond) { - /* use updelay+1 to match an UP slave even when updelay is 0 */ - int mindelay = updelay + 1; - struct net_device *dev = slave->dev; - int link_state; - - link_state = bond_check_dev_link(dev, 0); - - switch (slave->link) { - case BOND_LINK_UP: /* the link was up */ - if (link_state == BMSR_LSTATUS) { - /* link stays up, tell that this one - is immediately available */ - if (IS_UP(dev) && (mindelay > -2)) { - /* -2 is the best case : - this slave was already up */ - mindelay = -2; - bestslave = slave; - } - break; - } - else { /* link going down */ - slave->link = BOND_LINK_FAIL; - slave->delay = downdelay; - if (slave->link_failure_count < UINT_MAX) { - slave->link_failure_count++; - } - if (downdelay > 0) { - printk (KERN_INFO - "%s: link status down for %sinterface " - "%s, disabling it in %d ms.\n", - master->name, - IS_UP(dev) - ? ((bond_mode == BOND_MODE_ACTIVEBACKUP) - ? ((slave == oldcurrent) - ? "active " : "backup ") - : "") - : "idle ", - dev->name, - downdelay * miimon); - } - } - /* no break ! fall through the BOND_LINK_FAIL test to - ensure proper action to be taken - */ - case BOND_LINK_FAIL: /* the link has just gone down */ - if (link_state != BMSR_LSTATUS) { - /* link stays down */ - if (slave->delay <= 0) { - /* link down for too long time */ - slave->link = BOND_LINK_DOWN; - /* in active/backup mode, we must - completely disable this interface */ - if (bond_mode == BOND_MODE_ACTIVEBACKUP) { - bond_set_slave_inactive_flags(slave); - } - printk(KERN_INFO - "%s: link status definitely down " - "for interface %s, disabling it", - master->name, - dev->name); - - read_lock(&bond->ptrlock); - if (slave == bond->current_slave) { - read_unlock(&bond->ptrlock); - /* find a new interface and be verbose */ - change_active_interface(bond); - } else { - read_unlock(&bond->ptrlock); - printk(".\n"); - } - slave_died = 1; - } else { - slave->delay--; - } - } else { - /* link up again */ - slave->link = BOND_LINK_UP; - slave->jiffies = jiffies; - printk(KERN_INFO - "%s: link status up again after %d ms " - "for interface %s.\n", - master->name, - (downdelay - slave->delay) * miimon, - dev->name); - - if (IS_UP(dev) && (mindelay > -1)) { - /* -1 is a good case : this slave went - down only for a short time */ - mindelay = -1; - bestslave = slave; - } - } - break; - case BOND_LINK_DOWN: /* the link was down */ - if (link_state != BMSR_LSTATUS) { - /* the link stays down, nothing more to do */ - break; - } else { /* link going up */ - slave->link = BOND_LINK_BACK; - slave->delay = updelay; - - if (updelay > 0) { - /* if updelay == 0, no need to - advertise about a 0 ms delay */ - printk (KERN_INFO - "%s: link status up for interface" - " %s, enabling it in %d ms.\n", - master->name, - dev->name, - updelay * miimon); - } - } - /* no break ! fall through the BOND_LINK_BACK state in - case there's something to do. - */ - case BOND_LINK_BACK: /* the link has just come back */ - if (link_state != BMSR_LSTATUS) { - /* link down again */ - slave->link = BOND_LINK_DOWN; - printk(KERN_INFO - "%s: link status down again after %d ms " - "for interface %s.\n", - master->name, - (updelay - slave->delay) * miimon, - dev->name); - } else { - /* link stays up */ - if (slave->delay == 0) { - /* now the link has been up for long time enough */ - slave->link = BOND_LINK_UP; - slave->jiffies = jiffies; - - if (bond_mode != BOND_MODE_ACTIVEBACKUP) { - /* make it immediately active */ - slave->state = BOND_STATE_ACTIVE; - } else if (slave != bond->primary_slave) { - /* prevent it from being the active one */ - slave->state = BOND_STATE_BACKUP; - } - - printk(KERN_INFO - "%s: link status definitely up " - "for interface %s.\n", - master->name, - dev->name); - - if ( (bond->primary_slave != NULL) - && (slave == bond->primary_slave) ) - change_active_interface(bond); - } - else - slave->delay--; - - /* we'll also look for the mostly eligible slave */ - if (bond->primary_slave == NULL) { - if (IS_UP(dev) && (slave->delay < mindelay)) { - mindelay = slave->delay; - bestslave = slave; - } - } else if ( (IS_UP(bond->primary_slave->dev)) || - ( (!IS_UP(bond->primary_slave->dev)) && - (IS_UP(dev) && (slave->delay < mindelay)) ) ) { - mindelay = slave->delay; - bestslave = slave; - } - } - break; - } /* end of switch */ - } /* end of while */ - - /* - * if there's no active interface and we discovered that one - * of the slaves could be activated earlier, so we do it. - */ - read_lock(&bond->ptrlock); - oldcurrent = bond->current_slave; - read_unlock(&bond->ptrlock); - - /* no active interface at the moment or need to bring up the primary */ - if (oldcurrent == NULL) { /* no active interface at the moment */ - if (bestslave != NULL) { /* last chance to find one ? */ - if (bestslave->link == BOND_LINK_UP) { - printk (KERN_INFO - "%s: making interface %s the new active one.\n", - master->name, bestslave->dev->name); - } else { - printk (KERN_INFO - "%s: making interface %s the new " - "active one %d ms earlier.\n", - master->name, bestslave->dev->name, - (updelay - bestslave->delay) * miimon); - - bestslave->delay = 0; - bestslave->link = BOND_LINK_UP; - bestslave->jiffies = jiffies; - } - - if (bond_mode == BOND_MODE_ACTIVEBACKUP) { - bond_set_slave_active_flags(bestslave); - bond_mc_update(bond, bestslave, NULL); - } else { - bestslave->state = BOND_STATE_ACTIVE; - } - write_lock(&bond->ptrlock); - bond->current_slave = bestslave; - write_unlock(&bond->ptrlock); - } else if (slave_died) { - /* print this message only once a slave has just died */ - printk(KERN_INFO - "%s: now running without any active interface !\n", - master->name); - } - } - - read_unlock_irqrestore(&bond->lock, flags); - /* re-arm the timer */ - mod_timer(&bond->mii_timer, jiffies + (miimon * HZ / 1000)); -} - -/* - * this function is called regularly to monitor each slave's link - * ensuring that traffic is being sent and received when arp monitoring - * is used in load-balancing mode. if the adapter has been dormant, then an - * arp is transmitted to generate traffic. see activebackup_arp_monitor for - * arp monitoring in active backup mode. - */ -static void loadbalance_arp_monitor(struct net_device *master) -{ - bonding_t *bond; - unsigned long flags; - slave_t *slave; - int the_delta_in_ticks = arp_interval * HZ / 1000; - int next_timer = jiffies + (arp_interval * HZ / 1000); - - bond = (struct bonding *) master->priv; - if (master->priv == NULL) { - mod_timer(&bond->arp_timer, next_timer); - return; - } - - read_lock_irqsave(&bond->lock, flags); - - /* TODO: investigate why rtnl_shlock_nowait and rtnl_exlock_nowait - * are called below and add comment why they are required... - */ - if ((!IS_UP(master)) || rtnl_shlock_nowait()) { - mod_timer(&bond->arp_timer, next_timer); - read_unlock_irqrestore(&bond->lock, flags); - return; - } - - if (rtnl_exlock_nowait()) { - rtnl_shunlock(); - mod_timer(&bond->arp_timer, next_timer); - read_unlock_irqrestore(&bond->lock, flags); - return; - } - - /* see if any of the previous devices are up now (i.e. they have - * xmt and rcv traffic). the current_slave does not come into - * the picture unless it is null. also, slave->jiffies is not needed - * here because we send an arp on each slave and give a slave as - * long as it needs to get the tx/rx within the delta. - * TODO: what about up/down delay in arp mode? it wasn't here before - * so it can wait - */ - slave = (slave_t *)bond; - while ((slave = slave->prev) != (slave_t *)bond) { - - if (slave->link != BOND_LINK_UP) { - - if (((jiffies - slave->dev->trans_start) <= - the_delta_in_ticks) && - ((jiffies - slave->dev->last_rx) <= - the_delta_in_ticks)) { - - slave->link = BOND_LINK_UP; - slave->state = BOND_STATE_ACTIVE; - - /* primary_slave has no meaning in round-robin - * mode. the window of a slave being up and - * current_slave being null after enslaving - * is closed. - */ - read_lock(&bond->ptrlock); - if (bond->current_slave == NULL) { - read_unlock(&bond->ptrlock); - printk(KERN_INFO - "%s: link status definitely up " - "for interface %s, ", - master->name, - slave->dev->name); - change_active_interface(bond); - } else { - read_unlock(&bond->ptrlock); - printk(KERN_INFO - "%s: interface %s is now up\n", - master->name, - slave->dev->name); - } - } - } else { - /* slave->link == BOND_LINK_UP */ - - /* not all switches will respond to an arp request - * when the source ip is 0, so don't take the link down - * if we don't know our ip yet - */ - if (((jiffies - slave->dev->trans_start) >= - (2*the_delta_in_ticks)) || - (((jiffies - slave->dev->last_rx) >= - (2*the_delta_in_ticks)) && my_ip !=0)) { - slave->link = BOND_LINK_DOWN; - slave->state = BOND_STATE_BACKUP; - if (slave->link_failure_count < UINT_MAX) { - slave->link_failure_count++; - } - printk(KERN_INFO - "%s: interface %s is now down.\n", - master->name, - slave->dev->name); - - read_lock(&bond->ptrlock); - if (slave == bond->current_slave) { - read_unlock(&bond->ptrlock); - change_active_interface(bond); - } else { - read_unlock(&bond->ptrlock); - } - } - } - - /* note: if switch is in round-robin mode, all links - * must tx arp to ensure all links rx an arp - otherwise - * links may oscillate or not come up at all; if switch is - * in something like xor mode, there is nothing we can - * do - all replies will be rx'ed on same link causing slaves - * to be unstable during low/no traffic periods - */ - if (IS_UP(slave->dev)) { - arp_send_all(slave); - } - } - - rtnl_exunlock(); - rtnl_shunlock(); - read_unlock_irqrestore(&bond->lock, flags); - - /* re-arm the timer */ - mod_timer(&bond->arp_timer, next_timer); -} - -/* - * When using arp monitoring in active-backup mode, this function is - * called to determine if any backup slaves have went down or a new - * current slave needs to be found. - * The backup slaves never generate traffic, they are considered up by merely - * receiving traffic. If the current slave goes down, each backup slave will - * be given the opportunity to tx/rx an arp before being taken down - this - * prevents all slaves from being taken down due to the current slave not - * sending any traffic for the backups to receive. The arps are not necessarily - * necessary, any tx and rx traffic will keep the current slave up. While any - * rx traffic will keep the backup slaves up, the current slave is responsible - * for generating traffic to keep them up regardless of any other traffic they - * may have received. - * see loadbalance_arp_monitor for arp monitoring in load balancing mode - */ -static void activebackup_arp_monitor(struct net_device *master) -{ - bonding_t *bond; - unsigned long flags; - slave_t *slave; - int the_delta_in_ticks = arp_interval * HZ / 1000; - int next_timer = jiffies + (arp_interval * HZ / 1000); - - bond = (struct bonding *) master->priv; - if (master->priv == NULL) { - mod_timer(&bond->arp_timer, next_timer); - return; - } - - read_lock_irqsave(&bond->lock, flags); - - if (!IS_UP(master)) { - mod_timer(&bond->arp_timer, next_timer); - read_unlock_irqrestore(&bond->lock, flags); - return; - } - - /* determine if any slave has come up or any backup slave has - * gone down - * TODO: what about up/down delay in arp mode? it wasn't here before - * so it can wait - */ - slave = (slave_t *)bond; - while ((slave = slave->prev) != (slave_t *)bond) { - - if (slave->link != BOND_LINK_UP) { - if ((jiffies - slave->dev->last_rx) <= - the_delta_in_ticks) { - - slave->link = BOND_LINK_UP; - write_lock(&bond->ptrlock); - if ((bond->current_slave == NULL) && - ((jiffies - slave->dev->trans_start) <= - the_delta_in_ticks)) { - bond->current_slave = slave; - bond_set_slave_active_flags(slave); - bond_mc_update(bond, slave, NULL); - bond->current_arp_slave = NULL; - } else if (bond->current_slave != slave) { - /* this slave has just come up but we - * already have a current slave; this - * can also happen if bond_enslave adds - * a new slave that is up while we are - * searching for a new slave - */ - bond_set_slave_inactive_flags(slave); - bond->current_arp_slave = NULL; - } - - if (slave == bond->current_slave) { - printk(KERN_INFO - "%s: %s is up and now the " - "active interface\n", - master->name, - slave->dev->name); - } else { - printk(KERN_INFO - "%s: backup interface %s is " - "now up\n", - master->name, - slave->dev->name); - } - - write_unlock(&bond->ptrlock); - } - } else { - read_lock(&bond->ptrlock); - if ((slave != bond->current_slave) && - (bond->current_arp_slave == NULL) && - (((jiffies - slave->dev->last_rx) >= - 3*the_delta_in_ticks) && (my_ip != 0))) { - /* a backup slave has gone down; three times - * the delta allows the current slave to be - * taken out before the backup slave. - * note: a non-null current_arp_slave indicates - * the current_slave went down and we are - * searching for a new one; under this - * condition we only take the current_slave - * down - this gives each slave a chance to - * tx/rx traffic before being taken out - */ - read_unlock(&bond->ptrlock); - slave->link = BOND_LINK_DOWN; - if (slave->link_failure_count < UINT_MAX) { - slave->link_failure_count++; - } - bond_set_slave_inactive_flags(slave); - printk(KERN_INFO - "%s: backup interface %s is now down\n", - master->name, - slave->dev->name); - } else { - read_unlock(&bond->ptrlock); - } - } - } - - read_lock(&bond->ptrlock); - slave = bond->current_slave; - read_unlock(&bond->ptrlock); - - if (slave != NULL) { - - /* if we have sent traffic in the past 2*arp_intervals but - * haven't xmit and rx traffic in that time interval, select - * a different slave. slave->jiffies is only updated when - * a slave first becomes the current_slave - not necessarily - * after every arp; this ensures the slave has a full 2*delta - * before being taken out. if a primary is being used, check - * if it is up and needs to take over as the current_slave - */ - if ((((jiffies - slave->dev->trans_start) >= - (2*the_delta_in_ticks)) || - (((jiffies - slave->dev->last_rx) >= - (2*the_delta_in_ticks)) && (my_ip != 0))) && - ((jiffies - slave->jiffies) >= 2*the_delta_in_ticks)) { - - slave->link = BOND_LINK_DOWN; - if (slave->link_failure_count < UINT_MAX) { - slave->link_failure_count++; - } - printk(KERN_INFO "%s: link status down for " - "active interface %s, disabling it", - master->name, - slave->dev->name); - slave = change_active_interface(bond); - bond->current_arp_slave = slave; - if (slave != NULL) { - slave->jiffies = jiffies; - } - - } else if ((bond->primary_slave != NULL) && - (bond->primary_slave != slave) && - (bond->primary_slave->link == BOND_LINK_UP)) { - /* at this point, slave is the current_slave */ - printk(KERN_INFO - "%s: changing from interface %s to primary " - "interface %s\n", - master->name, - slave->dev->name, - bond->primary_slave->dev->name); - - /* primary is up so switch to it */ - bond_set_slave_inactive_flags(slave); - bond_mc_update(bond, bond->primary_slave, slave); - write_lock(&bond->ptrlock); - bond->current_slave = bond->primary_slave; - write_unlock(&bond->ptrlock); - slave = bond->primary_slave; - bond_set_slave_active_flags(slave); - slave->jiffies = jiffies; - } else { - bond->current_arp_slave = NULL; - } - - /* the current slave must tx an arp to ensure backup slaves - * rx traffic - */ - if ((slave != NULL) && - (((jiffies - slave->dev->last_rx) >= the_delta_in_ticks) && - (my_ip != 0))) { - arp_send_all(slave); - } - } - - /* if we don't have a current_slave, search for the next available - * backup slave from the current_arp_slave and make it the candidate - * for becoming the current_slave - */ - if (slave == NULL) { - - if ((bond->current_arp_slave == NULL) || - (bond->current_arp_slave == (slave_t *)bond)) { - bond->current_arp_slave = bond->prev; - } - - if (bond->current_arp_slave != (slave_t *)bond) { - bond_set_slave_inactive_flags(bond->current_arp_slave); - slave = bond->current_arp_slave->next; - - /* search for next candidate */ - do { - if (IS_UP(slave->dev)) { - slave->link = BOND_LINK_BACK; - bond_set_slave_active_flags(slave); - arp_send_all(slave); - slave->jiffies = jiffies; - bond->current_arp_slave = slave; - break; - } - - /* if the link state is up at this point, we - * mark it down - this can happen if we have - * simultaneous link failures and - * change_active_interface doesn't make this - * one the current slave so it is still marked - * up when it is actually down - */ - if (slave->link == BOND_LINK_UP) { - slave->link = BOND_LINK_DOWN; - if (slave->link_failure_count < - UINT_MAX) { - slave->link_failure_count++; - } - - bond_set_slave_inactive_flags(slave); - printk(KERN_INFO - "%s: backup interface " - "%s is now down.\n", - master->name, - slave->dev->name); - } - } while ((slave = slave->next) != - bond->current_arp_slave->next); - } - } - - mod_timer(&bond->arp_timer, next_timer); - read_unlock_irqrestore(&bond->lock, flags); -} - -typedef uint32_t in_addr_t; - -int -my_inet_aton(char *cp, unsigned long *the_addr) { - static const in_addr_t max[4] = { 0xffffffff, 0xffffff, 0xffff, 0xff }; - in_addr_t val; - char c; - union iaddr { - uint8_t bytes[4]; - uint32_t word; - } res; - uint8_t *pp = res.bytes; - int digit,base; - - res.word = 0; - - c = *cp; - for (;;) { - /* - * Collect number up to ``.''. - * Values are specified as for C: - * 0x=hex, 0=octal, isdigit=decimal. - */ - if (!isdigit(c)) goto ret_0; - val = 0; base = 10; digit = 0; - for (;;) { - if (isdigit(c)) { - val = (val * base) + (c - '0'); - c = *++cp; - digit = 1; - } else { - break; - } - } - if (c == '.') { - /* - * Internet format: - * a.b.c.d - * a.b.c (with c treated as 16 bits) - * a.b (with b treated as 24 bits) - */ - if (pp > res.bytes + 2 || val > 0xff) { - goto ret_0; - } - *pp++ = val; - c = *++cp; - } else - break; - } - /* - * Check for trailing characters. - */ - if (c != '\0' && (!isascii(c) || !isspace(c))) { - goto ret_0; - } - /* - * Did we get a valid digit? - */ - if (!digit) { - goto ret_0; - } - - /* Check whether the last part is in its limits depending on - the number of parts in total. */ - if (val > max[pp - res.bytes]) { - goto ret_0; - } - - if (the_addr != NULL) { - *the_addr = res.word | htonl (val); - } - - return (1); - -ret_0: - return (0); -} - -static int bond_sethwaddr(struct net_device *master, struct net_device *slave) -{ -#ifdef BONDING_DEBUG - printk(KERN_CRIT "bond_sethwaddr: master=%x\n", (unsigned int)master); - printk(KERN_CRIT "bond_sethwaddr: slave=%x\n", (unsigned int)slave); - printk(KERN_CRIT "bond_sethwaddr: slave->addr_len=%d\n", slave->addr_len); -#endif - memcpy(master->dev_addr, slave->dev_addr, slave->addr_len); - return 0; -} - -static int bond_info_query(struct net_device *master, struct ifbond *info) -{ - bonding_t *bond = (struct bonding *) master->priv; - slave_t *slave; - unsigned long flags; - - info->bond_mode = bond_mode; - info->num_slaves = 0; - info->miimon = miimon; - - read_lock_irqsave(&bond->lock, flags); - for (slave = bond->prev; slave != (slave_t *)bond; slave = slave->prev) { - info->num_slaves++; - } - read_unlock_irqrestore(&bond->lock, flags); - - return 0; -} - -static int bond_slave_info_query(struct net_device *master, - struct ifslave *info) -{ - bonding_t *bond = (struct bonding *) master->priv; - slave_t *slave; - int cur_ndx = 0; - unsigned long flags; - - if (info->slave_id < 0) { - return -ENODEV; - } - - read_lock_irqsave(&bond->lock, flags); - for (slave = bond->prev; - slave != (slave_t *)bond && cur_ndx < info->slave_id; - slave = slave->prev) { - cur_ndx++; - } - read_unlock_irqrestore(&bond->lock, flags); - - if (slave != (slave_t *)bond) { - strcpy(info->slave_name, slave->dev->name); - info->link = slave->link; - info->state = slave->state; - info->link_failure_count = slave->link_failure_count; - } else { - return -ENODEV; - } - - return 0; -} - -static int bond_ioctl(struct net_device *master_dev, struct ifreq *ifr, int cmd) -{ - struct net_device *slave_dev = NULL; - struct ifbond *u_binfo = NULL, k_binfo; - struct ifslave *u_sinfo = NULL, k_sinfo; - struct mii_ioctl_data *mii = NULL; - int ret = 0; - -#ifdef BONDING_DEBUG - printk(KERN_INFO "bond_ioctl: master=%s, cmd=%d\n", - master_dev->name, cmd); -#endif - - switch (cmd) { - case SIOCGMIIPHY: - mii = (struct mii_ioctl_data *)&ifr->ifr_data; - if (mii == NULL) { - return -EINVAL; - } - mii->phy_id = 0; - /* Fall Through */ - case SIOCGMIIREG: - /* - * We do this again just in case we were called by SIOCGMIIREG - * instead of SIOCGMIIPHY. - */ - mii = (struct mii_ioctl_data *)&ifr->ifr_data; - if (mii == NULL) { - return -EINVAL; - } - if (mii->reg_num == 1) { - mii->val_out = bond_check_mii_link( - (struct bonding *)master_dev->priv); - } - return 0; - case BOND_INFO_QUERY_OLD: - case SIOCBONDINFOQUERY: - u_binfo = (struct ifbond *)ifr->ifr_data; - if (copy_from_user(&k_binfo, u_binfo, sizeof(ifbond))) { - return -EFAULT; - } - ret = bond_info_query(master_dev, &k_binfo); - if (ret == 0) { - if (copy_to_user(u_binfo, &k_binfo, sizeof(ifbond))) { - return -EFAULT; - } - } - return ret; - case BOND_SLAVE_INFO_QUERY_OLD: - case SIOCBONDSLAVEINFOQUERY: - u_sinfo = (struct ifslave *)ifr->ifr_data; - if (copy_from_user(&k_sinfo, u_sinfo, sizeof(ifslave))) { - return -EFAULT; - } - ret = bond_slave_info_query(master_dev, &k_sinfo); - if (ret == 0) { - if (copy_to_user(u_sinfo, &k_sinfo, sizeof(ifslave))) { - return -EFAULT; - } - } - return ret; - } - - if (!capable(CAP_NET_ADMIN)) { - return -EPERM; - } - - slave_dev = dev_get_by_name(ifr->ifr_slave); - -#ifdef BONDING_DEBUG - printk(KERN_INFO "slave_dev=%x: \n", (unsigned int)slave_dev); - printk(KERN_INFO "slave_dev->name=%s: \n", slave_dev->name); -#endif - - if (slave_dev == NULL) { - ret = -ENODEV; - } else { - switch (cmd) { - case BOND_ENSLAVE_OLD: - case SIOCBONDENSLAVE: - ret = bond_enslave(master_dev, slave_dev); - break; - case BOND_RELEASE_OLD: - case SIOCBONDRELEASE: - ret = bond_release(master_dev, slave_dev); - break; - case BOND_SETHWADDR_OLD: - case SIOCBONDSETHWADDR: - ret = bond_sethwaddr(master_dev, slave_dev); - break; - case BOND_CHANGE_ACTIVE_OLD: - case SIOCBONDCHANGEACTIVE: - if (bond_mode == BOND_MODE_ACTIVEBACKUP) { - ret = bond_change_active(master_dev, slave_dev); - } - else { - ret = -EINVAL; - } - break; - default: - ret = -EOPNOTSUPP; - } - dev_put(slave_dev); - } - return ret; -} - -#ifdef CONFIG_NET_FASTROUTE -static int bond_accept_fastpath(struct net_device *dev, struct dst_entry *dst) -{ - return -1; -} -#endif - -/* - * in broadcast mode, we send everything to all usable interfaces. - */ -static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *dev) -{ - slave_t *slave, *start_at; - struct bonding *bond = (struct bonding *) dev->priv; - unsigned long flags; - struct net_device *device_we_should_send_to = 0; - - if (!IS_UP(dev)) { /* bond down */ - dev_kfree_skb(skb); - return 0; - } - - read_lock_irqsave(&bond->lock, flags); - - read_lock(&bond->ptrlock); - slave = start_at = bond->current_slave; - read_unlock(&bond->ptrlock); - - if (slave == NULL) { /* we're at the root, get the first slave */ - /* no suitable interface, frame not sent */ - read_unlock_irqrestore(&bond->lock, flags); - dev_kfree_skb(skb); - return 0; - } - - do { - if (IS_UP(slave->dev) - && (slave->link == BOND_LINK_UP) - && (slave->state == BOND_STATE_ACTIVE)) { - if (device_we_should_send_to) { - struct sk_buff *skb2; - if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL) { - printk(KERN_ERR "bond_xmit_broadcast: skb_clone() failed\n"); - continue; - } - - skb2->dev = device_we_should_send_to; - skb2->priority = 1; - dev_queue_xmit(skb2); - } - device_we_should_send_to = slave->dev; - } - } while ((slave = slave->next) != start_at); - - if (device_we_should_send_to) { - skb->dev = device_we_should_send_to; - skb->priority = 1; - dev_queue_xmit(skb); - } else - dev_kfree_skb(skb); - - /* frame sent to all suitable interfaces */ - read_unlock_irqrestore(&bond->lock, flags); - return 0; -} - -static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *dev) -{ - slave_t *slave, *start_at; - struct bonding *bond = (struct bonding *) dev->priv; - unsigned long flags; - - if (!IS_UP(dev)) { /* bond down */ - dev_kfree_skb(skb); - return 0; - } - - read_lock_irqsave(&bond->lock, flags); - - read_lock(&bond->ptrlock); - slave = start_at = bond->current_slave; - read_unlock(&bond->ptrlock); - - if (slave == NULL) { /* we're at the root, get the first slave */ - /* no suitable interface, frame not sent */ - dev_kfree_skb(skb); - read_unlock_irqrestore(&bond->lock, flags); - return 0; - } - - do { - if (IS_UP(slave->dev) - && (slave->link == BOND_LINK_UP) - && (slave->state == BOND_STATE_ACTIVE)) { - - skb->dev = slave->dev; - skb->priority = 1; - dev_queue_xmit(skb); - - write_lock(&bond->ptrlock); - bond->current_slave = slave->next; - write_unlock(&bond->ptrlock); - - read_unlock_irqrestore(&bond->lock, flags); - return 0; - } - } while ((slave = slave->next) != start_at); - - /* no suitable interface, frame not sent */ - dev_kfree_skb(skb); - read_unlock_irqrestore(&bond->lock, flags); - return 0; -} - -/* - * in XOR mode, we determine the output device by performing xor on - * the source and destination hw adresses. If this device is not - * enabled, find the next slave following this xor slave. - */ -static int bond_xmit_xor(struct sk_buff *skb, struct net_device *dev) -{ - slave_t *slave, *start_at; - struct bonding *bond = (struct bonding *) dev->priv; - unsigned long flags; - struct ethhdr *data = (struct ethhdr *)skb->data; - int slave_no; - - if (!IS_UP(dev)) { /* bond down */ - dev_kfree_skb(skb); - return 0; - } - - read_lock_irqsave(&bond->lock, flags); - slave = bond->prev; - - /* we're at the root, get the first slave */ - if (bond->slave_cnt == 0) { - /* no suitable interface, frame not sent */ - dev_kfree_skb(skb); - read_unlock_irqrestore(&bond->lock, flags); - return 0; - } - - slave_no = (data->h_dest[5]^slave->dev->dev_addr[5]) % bond->slave_cnt; - - while ( (slave_no > 0) && (slave != (slave_t *)bond) ) { - slave = slave->prev; - slave_no--; - } - start_at = slave; - - do { - if (IS_UP(slave->dev) - && (slave->link == BOND_LINK_UP) - && (slave->state == BOND_STATE_ACTIVE)) { - - skb->dev = slave->dev; - skb->priority = 1; - dev_queue_xmit(skb); - - read_unlock_irqrestore(&bond->lock, flags); - return 0; - } - } while ((slave = slave->next) != start_at); - - /* no suitable interface, frame not sent */ - dev_kfree_skb(skb); - read_unlock_irqrestore(&bond->lock, flags); - return 0; -} - -/* - * in active-backup mode, we know that bond->current_slave is always valid if - * the bond has a usable interface. - */ -static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *dev) -{ - struct bonding *bond = (struct bonding *) dev->priv; - unsigned long flags; - int ret; - - if (!IS_UP(dev)) { /* bond down */ - dev_kfree_skb(skb); - return 0; - } - - /* if we are sending arp packets, try to at least - identify our own ip address */ - if ( (arp_interval > 0) && (my_ip == 0) && - (skb->protocol == __constant_htons(ETH_P_ARP) ) ) { - char *the_ip = (((char *)skb->data)) - + sizeof(struct ethhdr) - + sizeof(struct arphdr) + - ETH_ALEN; - memcpy(&my_ip, the_ip, 4); - } - - /* if we are sending arp packets and don't know - * the target hw address, save it so we don't need - * to use a broadcast address. - * don't do this if in active backup mode because the slaves must - * receive packets to stay up, and the only ones they receive are - * broadcasts. - */ - if ( (bond_mode != BOND_MODE_ACTIVEBACKUP) && - (arp_ip_count == 1) && - (arp_interval > 0) && (arp_target_hw_addr == NULL) && - (skb->protocol == __constant_htons(ETH_P_IP) ) ) { - struct ethhdr *eth_hdr = - (struct ethhdr *) (((char *)skb->data)); - struct iphdr *ip_hdr = (struct iphdr *)(eth_hdr + 1); - - if (arp_target[0] == ip_hdr->daddr) { - arp_target_hw_addr = kmalloc(ETH_ALEN, GFP_KERNEL); - if (arp_target_hw_addr != NULL) - memcpy(arp_target_hw_addr, eth_hdr->h_dest, ETH_ALEN); - } - } - - read_lock_irqsave(&bond->lock, flags); - - read_lock(&bond->ptrlock); - if (bond->current_slave != NULL) { /* one usable interface */ - skb->dev = bond->current_slave->dev; - read_unlock(&bond->ptrlock); - skb->priority = 1; - ret = dev_queue_xmit(skb); - read_unlock_irqrestore(&bond->lock, flags); - return 0; - } - else { - read_unlock(&bond->ptrlock); - } - - /* no suitable interface, frame not sent */ -#ifdef BONDING_DEBUG - printk(KERN_INFO "There was no suitable interface, so we don't transmit\n"); -#endif - dev_kfree_skb(skb); - read_unlock_irqrestore(&bond->lock, flags); - return 0; -} - -static struct net_device_stats *bond_get_stats(struct net_device *dev) -{ - bonding_t *bond = dev->priv; - struct net_device_stats *stats = bond->stats, *sstats; - slave_t *slave; - unsigned long flags; - - memset(bond->stats, 0, sizeof(struct net_device_stats)); - - read_lock_irqsave(&bond->lock, flags); - - for (slave = bond->prev; slave != (slave_t *)bond; slave = slave->prev) { - sstats = slave->dev->get_stats(slave->dev); - - stats->rx_packets += sstats->rx_packets; - stats->rx_bytes += sstats->rx_bytes; - stats->rx_errors += sstats->rx_errors; - stats->rx_dropped += sstats->rx_dropped; - - stats->tx_packets += sstats->tx_packets; - stats->tx_bytes += sstats->tx_bytes; - stats->tx_errors += sstats->tx_errors; - stats->tx_dropped += sstats->tx_dropped; - - stats->multicast += sstats->multicast; - stats->collisions += sstats->collisions; - - stats->rx_length_errors += sstats->rx_length_errors; - stats->rx_over_errors += sstats->rx_over_errors; - stats->rx_crc_errors += sstats->rx_crc_errors; - stats->rx_frame_errors += sstats->rx_frame_errors; - stats->rx_fifo_errors += sstats->rx_fifo_errors; - stats->rx_missed_errors += sstats->rx_missed_errors; - - stats->tx_aborted_errors += sstats->tx_aborted_errors; - stats->tx_carrier_errors += sstats->tx_carrier_errors; - stats->tx_fifo_errors += sstats->tx_fifo_errors; - stats->tx_heartbeat_errors += sstats->tx_heartbeat_errors; - stats->tx_window_errors += sstats->tx_window_errors; - - } - - read_unlock_irqrestore(&bond->lock, flags); - return stats; -} - -static int bond_get_info(char *buf, char **start, off_t offset, int length) -{ - bonding_t *bond = these_bonds; - int len = 0; - off_t begin = 0; - u16 link; - slave_t *slave = NULL; - unsigned long flags; - - while (bond != NULL) { - /* - * This function locks the mutex, so we can't lock it until - * afterwards - */ - link = bond_check_mii_link(bond); - - len += sprintf(buf + len, "Bonding Mode: %s\n", - bond_mode_name()); - - if (bond_mode == BOND_MODE_ACTIVEBACKUP) { - read_lock_irqsave(&bond->lock, flags); - read_lock(&bond->ptrlock); - if (bond->current_slave != NULL) { - len += sprintf(buf + len, - "Currently Active Slave: %s\n", - bond->current_slave->dev->name); - } - read_unlock(&bond->ptrlock); - read_unlock_irqrestore(&bond->lock, flags); - } - - len += sprintf(buf + len, "MII Status: "); - len += sprintf(buf + len, - link == BMSR_LSTATUS ? "up\n" : "down\n"); - len += sprintf(buf + len, "MII Polling Interval (ms): %d\n", - miimon); - len += sprintf(buf + len, "Up Delay (ms): %d\n", - updelay * miimon); - len += sprintf(buf + len, "Down Delay (ms): %d\n", - downdelay * miimon); - len += sprintf(buf + len, "Multicast Mode: %s\n", - multicast_mode_name()); - - read_lock_irqsave(&bond->lock, flags); - for (slave = bond->prev; slave != (slave_t *)bond; - slave = slave->prev) { - len += sprintf(buf + len, "\nSlave Interface: %s\n", slave->dev->name); - - len += sprintf(buf + len, "MII Status: "); - - len += sprintf(buf + len, - slave->link == BOND_LINK_UP ? - "up\n" : "down\n"); - len += sprintf(buf + len, "Link Failure Count: %d\n", - slave->link_failure_count); - } - read_unlock_irqrestore(&bond->lock, flags); - - /* - * Figure out the calcs for the /proc/net interface - */ - *start = buf + (offset - begin); - len -= (offset - begin); - if (len > length) { - len = length; - } - if (len < 0) { - len = 0; - } - - - bond = bond->next_bond; - } - return len; -} - -static int bond_event(struct notifier_block *this, unsigned long event, - void *ptr) -{ - struct bonding *this_bond = (struct bonding *)these_bonds; - struct bonding *last_bond; - struct net_device *event_dev = (struct net_device *)ptr; - - /* while there are bonds configured */ - while (this_bond != NULL) { - if (this_bond == event_dev->priv ) { - switch (event) { - case NETDEV_UNREGISTER: - /* - * remove this bond from a linked list of - * bonds - */ - if (this_bond == these_bonds) { - these_bonds = this_bond->next_bond; - } else { - for (last_bond = these_bonds; - last_bond != NULL; - last_bond = last_bond->next_bond) { - if (last_bond->next_bond == - this_bond) { - last_bond->next_bond = - this_bond->next_bond; - } - } - } - return NOTIFY_DONE; - - default: - return NOTIFY_DONE; - } - } else if (this_bond->device == event_dev->master) { - switch (event) { - case NETDEV_UNREGISTER: - bond_release(this_bond->device, event_dev); - break; - } - return NOTIFY_DONE; - } - this_bond = this_bond->next_bond; - } - return NOTIFY_DONE; -} - -static struct notifier_block bond_netdev_notifier = { - notifier_call: bond_event, -}; - -static int __init bond_init(struct net_device *dev) -{ - bonding_t *bond, *this_bond, *last_bond; - int count; - -#ifdef BONDING_DEBUG - printk (KERN_INFO "Begin bond_init for %s\n", dev->name); -#endif - bond = kmalloc(sizeof(struct bonding), GFP_KERNEL); - if (bond == NULL) { - return -ENOMEM; - } - memset(bond, 0, sizeof(struct bonding)); - - /* initialize rwlocks */ - rwlock_init(&bond->lock); - rwlock_init(&bond->ptrlock); - - bond->stats = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL); - if (bond->stats == NULL) { - kfree(bond); - return -ENOMEM; - } - memset(bond->stats, 0, sizeof(struct net_device_stats)); - - bond->next = bond->prev = (slave_t *)bond; - bond->current_slave = NULL; - bond->current_arp_slave = NULL; - bond->device = dev; - dev->priv = bond; - - /* Initialize the device structure. */ - switch (bond_mode) { - case BOND_MODE_ACTIVEBACKUP: - dev->hard_start_xmit = bond_xmit_activebackup; - break; - case BOND_MODE_ROUNDROBIN: - dev->hard_start_xmit = bond_xmit_roundrobin; - break; - case BOND_MODE_XOR: - dev->hard_start_xmit = bond_xmit_xor; - break; - case BOND_MODE_BROADCAST: - dev->hard_start_xmit = bond_xmit_broadcast; - break; - default: - printk(KERN_ERR "Unknown bonding mode %d\n", bond_mode); - kfree(bond->stats); - kfree(bond); - return -EINVAL; - } - - dev->get_stats = bond_get_stats; - dev->open = bond_open; - dev->stop = bond_close; - dev->set_multicast_list = set_multicast_list; - dev->do_ioctl = bond_ioctl; - - /* - * Fill in the fields of the device structure with ethernet-generic - * values. - */ - - ether_setup(dev); - - dev->tx_queue_len = 0; - dev->flags |= IFF_MASTER|IFF_MULTICAST; -#ifdef CONFIG_NET_FASTROUTE - dev->accept_fastpath = bond_accept_fastpath; -#endif - - printk(KERN_INFO "%s registered with", dev->name); - if (miimon > 0) { - printk(" MII link monitoring set to %d ms", miimon); - updelay /= miimon; - downdelay /= miimon; - } else { - printk("out MII link monitoring"); - } - printk(", in %s mode.\n", bond_mode_name()); - - printk(KERN_INFO "%s registered with", dev->name); - if (arp_interval > 0) { - printk(" ARP monitoring set to %d ms with %d target(s):", - arp_interval, arp_ip_count); - for (count=0 ; countbond_proc_dir = proc_mkdir(dev->name, proc_net); - if (bond->bond_proc_dir == NULL) { - printk(KERN_ERR "%s: Cannot init /proc/net/%s/\n", - dev->name, dev->name); - kfree(bond->stats); - kfree(bond); - return -ENOMEM; - } - bond->bond_proc_info_file = - create_proc_info_entry("info", 0, bond->bond_proc_dir, - bond_get_info); - if (bond->bond_proc_info_file == NULL) { - printk(KERN_ERR "%s: Cannot init /proc/net/%s/info\n", - dev->name, dev->name); - remove_proc_entry(dev->name, proc_net); - kfree(bond->stats); - kfree(bond); - return -ENOMEM; - } -#endif /* CONFIG_PROC_FS */ - - if (first_pass == 1) { - these_bonds = bond; - register_netdevice_notifier(&bond_netdev_notifier); - first_pass = 0; - } else { - last_bond = these_bonds; - this_bond = these_bonds->next_bond; - while (this_bond != NULL) { - last_bond = this_bond; - this_bond = this_bond->next_bond; - } - last_bond->next_bond = bond; - } - - return 0; -} - -/* -static int __init bond_probe(struct net_device *dev) -{ - bond_init(dev); - return 0; -} - */ - -/* - * Convert string input module parms. Accept either the - * number of the mode or its string name. - */ -static inline int -bond_parse_parm(char *mode_arg, struct bond_parm_tbl *tbl) -{ - int i; - - for (i = 0; tbl[i].modename != NULL; i++) { - if ((isdigit(*mode_arg) && - tbl[i].mode == simple_strtol(mode_arg, NULL, 0)) || - (0 == strncmp(mode_arg, tbl[i].modename, - strlen(tbl[i].modename)))) { - return tbl[i].mode; - } - } - - return -1; -} - - -static int __init bonding_init(void) -{ - int no; - int err; - - /* Find a name for this unit */ - static struct net_device *dev_bond = NULL; - - printk(KERN_INFO "%s", version); - - /* - * Convert string parameters. - */ - if (mode) { - bond_mode = bond_parse_parm(mode, bond_mode_tbl); - if (bond_mode == -1) { - printk(KERN_WARNING - "bonding_init(): Invalid bonding mode \"%s\"\n", - mode == NULL ? "NULL" : mode); - return -EINVAL; - } - } - - if (multicast) { - multicast_mode = bond_parse_parm(multicast, bond_mc_tbl); - if (multicast_mode == -1) { - printk(KERN_WARNING - "bonding_init(): Invalid multicast mode \"%s\"\n", - multicast == NULL ? "NULL" : multicast); - return -EINVAL; - } - } - - if (max_bonds < 1 || max_bonds > INT_MAX) { - printk(KERN_WARNING - "bonding_init(): max_bonds (%d) not in range %d-%d, " - "so it was reset to BOND_DEFAULT_MAX_BONDS (%d)", - max_bonds, 1, INT_MAX, BOND_DEFAULT_MAX_BONDS); - max_bonds = BOND_DEFAULT_MAX_BONDS; - } - dev_bond = dev_bonds = kmalloc(max_bonds*sizeof(struct net_device), - GFP_KERNEL); - if (dev_bond == NULL) { - return -ENOMEM; - } - memset(dev_bonds, 0, max_bonds*sizeof(struct net_device)); - - if (miimon < 0) { - printk(KERN_WARNING - "bonding_init(): miimon module parameter (%d), " - "not in range 0-%d, so it was reset to %d\n", - miimon, INT_MAX, BOND_LINK_MON_INTERV); - miimon = BOND_LINK_MON_INTERV; - } - - if (updelay < 0) { - printk(KERN_WARNING - "bonding_init(): updelay module parameter (%d), " - "not in range 0-%d, so it was reset to 0\n", - updelay, INT_MAX); - updelay = 0; - } - - if (downdelay < 0) { - printk(KERN_WARNING - "bonding_init(): downdelay module parameter (%d), " - "not in range 0-%d, so it was reset to 0\n", - downdelay, INT_MAX); - downdelay = 0; - } - - if (miimon == 0) { - if ((updelay != 0) || (downdelay != 0)) { - /* just warn the user the up/down delay will have - * no effect since miimon is zero... - */ - printk(KERN_WARNING - "bonding_init(): miimon module parameter not " - "set and updelay (%d) or downdelay (%d) module " - "parameter is set; updelay and downdelay have " - "no effect unless miimon is set\n", - updelay, downdelay); - } - } else { - /* don't allow arp monitoring */ - if (arp_interval != 0) { - printk(KERN_WARNING - "bonding_init(): miimon (%d) and arp_interval " - "(%d) can't be used simultaneously, " - "disabling ARP monitoring\n", - miimon, arp_interval); - arp_interval = 0; - } - - if ((updelay % miimon) != 0) { - /* updelay will be rounded in bond_init() when it - * is divided by miimon, we just inform user here - */ - printk(KERN_WARNING - "bonding_init(): updelay (%d) is not a multiple " - "of miimon (%d), updelay rounded to %d ms\n", - updelay, miimon, (updelay / miimon) * miimon); - } - - if ((downdelay % miimon) != 0) { - /* downdelay will be rounded in bond_init() when it - * is divided by miimon, we just inform user here - */ - printk(KERN_WARNING - "bonding_init(): downdelay (%d) is not a " - "multiple of miimon (%d), downdelay rounded " - "to %d ms\n", - downdelay, miimon, - (downdelay / miimon) * miimon); - } - } - - if (arp_interval < 0) { - printk(KERN_WARNING - "bonding_init(): arp_interval module parameter (%d), " - "not in range 0-%d, so it was reset to %d\n", - arp_interval, INT_MAX, BOND_LINK_ARP_INTERV); - arp_interval = BOND_LINK_ARP_INTERV; - } - - for (arp_ip_count=0 ; - (arp_ip_count < MAX_ARP_IP_TARGETS) && arp_ip_target[arp_ip_count]; - arp_ip_count++ ) { - /* TODO: check and log bad ip address */ - if (my_inet_aton(arp_ip_target[arp_ip_count], - &arp_target[arp_ip_count]) == 0) { - printk(KERN_WARNING - "bonding_init(): bad arp_ip_target module " - "parameter (%s), ARP monitoring will not be " - "performed\n", - arp_ip_target[arp_ip_count]); - arp_interval = 0; - } - } - - - if ( (arp_interval > 0) && (arp_ip_count==0)) { - /* don't allow arping if no arp_ip_target given... */ - printk(KERN_WARNING - "bonding_init(): arp_interval module parameter " - "(%d) specified without providing an arp_ip_target " - "parameter, arp_interval was reset to 0\n", - arp_interval); - arp_interval = 0; - } - - if ((miimon == 0) && (arp_interval == 0)) { - /* miimon and arp_interval not set, we need one so things - * work as expected, see bonding.txt for details - */ - printk(KERN_ERR - "bonding_init(): either miimon or " - "arp_interval and arp_ip_target module parameters " - "must be specified, otherwise bonding will not detect " - "link failures! see bonding.txt for details.\n"); - } - - if ((primary != NULL) && (bond_mode != BOND_MODE_ACTIVEBACKUP)){ - /* currently, using a primary only makes sence - * in active backup mode - */ - printk(KERN_WARNING - "bonding_init(): %s primary device specified but has " - " no effect in %s mode\n", - primary, bond_mode_name()); - primary = NULL; - } - - - for (no = 0; no < max_bonds; no++) { - dev_bond->init = bond_init; - - err = dev_alloc_name(dev_bond,"bond%d"); - if (err < 0) { - kfree(dev_bonds); - return err; - } - SET_MODULE_OWNER(dev_bond); - if (register_netdev(dev_bond) != 0) { - kfree(dev_bonds); - return -EIO; - } - dev_bond++; - } - return 0; -} - -static void __exit bonding_exit(void) -{ - struct net_device *dev_bond = dev_bonds; - struct bonding *bond; - int no; - - unregister_netdevice_notifier(&bond_netdev_notifier); - - for (no = 0; no < max_bonds; no++) { - -#ifdef CONFIG_PROC_FS - bond = (struct bonding *) dev_bond->priv; - remove_proc_entry("info", bond->bond_proc_dir); - remove_proc_entry(dev_bond->name, proc_net); -#endif - unregister_netdev(dev_bond); - kfree(bond->stats); - kfree(dev_bond->priv); - - dev_bond->priv = NULL; - dev_bond++; - } - kfree(dev_bonds); -} - -module_init(bonding_init); -module_exit(bonding_exit); -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION(DRV_DESCRIPTION ", v" DRV_VERSION); - -/* - * Local variables: - * c-indent-level: 8 - * c-basic-offset: 8 - * tab-width: 8 - * End: - */ diff -Nru a/drivers/net/cs89x0.c b/drivers/net/cs89x0.c --- a/drivers/net/cs89x0.c Thu Jun 19 23:46:52 2003 +++ b/drivers/net/cs89x0.c Thu Jun 19 23:46:52 2003 @@ -1629,16 +1629,21 @@ } -static int set_mac_address(struct net_device *dev, void *addr) +static int set_mac_address(struct net_device *dev, void *p) { int i; + struct sockaddr *addr = p; + if (netif_running(dev)) return -EBUSY; + + memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); + if (net_debug) { printk("%s: Setting MAC address to ", dev->name); - for (i = 0; i < 6; i++) - printk(" %2.2x", dev->dev_addr[i] = ((unsigned char *)addr)[i]); + for (i = 0; i < dev->addr_len; i++) + printk(" %2.2x", dev->dev_addr[i]); printk(".\n"); } /* set the Ethernet address */ diff -Nru a/drivers/net/dl2k.h b/drivers/net/dl2k.h --- a/drivers/net/dl2k.h Thu Jun 19 23:46:52 2003 +++ b/drivers/net/dl2k.h Thu Jun 19 23:46:52 2003 @@ -243,7 +243,6 @@ VLANTagInsert = 0x0000000010000000, TFDDone = 0x80000000, VIDShift = 32, - CFI = 0x0000100000000000, UsePriorityShift = 48, }; diff -Nru a/drivers/net/e100/e100.h b/drivers/net/e100/e100.h --- a/drivers/net/e100/e100.h Thu Jun 19 23:46:52 2003 +++ b/drivers/net/e100/e100.h Thu Jun 19 23:46:52 2003 @@ -60,7 +60,14 @@ #include #include -#define E100_REGS_LEN 1 +#define E100_CABLE_UNKNOWN 0 +#define E100_CABLE_OK 1 +#define E100_CABLE_OPEN_NEAR 2 /* Open Circuit Near End */ +#define E100_CABLE_OPEN_FAR 3 /* Open Circuit Far End */ +#define E100_CABLE_SHORT_NEAR 4 /* Short Circuit Near End */ +#define E100_CABLE_SHORT_FAR 5 /* Short Circuit Far End */ + +#define E100_REGS_LEN 2 /* * Configure parameters for buffers per controller. * If the machine this is being used on is a faster machine (i.e. > 150MHz) @@ -105,8 +112,6 @@ #define E100_MAX_CU_IDLE_WAIT 50 /* Max udelays in wait_cus_idle */ /* HWI feature related constant */ -#define HWI_MAX_LOOP 100 -#define MAX_SAME_RESULTS 3 #define HWI_REGISTER_GRANULARITY 80 /* register granularity = 80 Cm */ #define HWI_NEAR_END_BOUNDARY 1000 /* Near end is defined as < 10 meters */ @@ -942,7 +947,6 @@ #ifdef CONFIG_PM u32 pci_state[16]; #endif - char ifname[IFNAMSIZ]; #ifdef E100_CU_DEBUG u8 last_cmd; u8 last_sub_cmd; @@ -956,7 +960,10 @@ #define E100_SPEED_100_FULL 4 /********* function prototypes *************/ +extern int e100_open(struct net_device *); +extern int e100_close(struct net_device *); extern void e100_isolate_driver(struct e100_private *bdp); +extern unsigned char e100_hw_init(struct e100_private *); extern void e100_sw_reset(struct e100_private *bdp, u32 reset_cmd); extern u8 e100_start_cu(struct e100_private *bdp, tcb_t *tcb); extern void e100_free_non_tx_cmd(struct e100_private *bdp, @@ -981,14 +988,13 @@ #define TEST_TIMEOUT 0x08 enum test_offsets { - E100_EEPROM_TEST_FAIL = 0, - E100_CHIP_TIMEOUT, - E100_ROM_TEST_FAIL, - E100_REG_TEST_FAIL, - E100_MAC_TEST_FAIL, - E100_LPBK_MAC_FAIL, - E100_LPBK_PHY_FAIL, - E100_MAX_TEST_RES + test_link, + test_eeprom, + test_self_test, + test_loopback_mac, + test_loopback_phy, + cable_diag, + max_test_res, /* must be last */ }; #endif diff -Nru a/drivers/net/e100/e100_main.c b/drivers/net/e100/e100_main.c --- a/drivers/net/e100/e100_main.c Thu Jun 19 23:46:52 2003 +++ b/drivers/net/e100/e100_main.c Thu Jun 19 23:46:52 2003 @@ -46,6 +46,24 @@ /* Change Log * + * 2.3.13 05/08/03 + * o Feature remove: /proc/net/PRO_LAN_Adapters support gone completely + * o Feature remove: IDIAG support (use ethtool -t instead) + * o Cleanup: fixed spelling mistakes found by community + * o Feature add: ethtool cable diag test + * o Feature add: ethtool parameter support (ring size, xsum, flow ctrl) + * o Cleanup: move e100_asf_enable under CONFIG_PM to avoid warning + * [Stephen Rothwell (sfr@canb.auug.org.au)] + * o Bug fix: don't call any netif_carrier_* until netdev registered. + * [Andrew Morton (akpm@digeo.com)] + * o Cleanup: replace (skb->len - skb->data_len) with skb_headlen(skb) + * [jmorris@intercode.com.au] + * o Bug fix: cleanup of Tx skbs after running ethtool diags + * o Bug fix: incorrect reporting of ethtool diag overall results + * o Bug fix: must hold xmit_lock before stopping queue in ethtool + * operations that require reset h/w and driver structures. + * o Bug fix: statistic command failure would stop statistic collection. + * * 2.2.21 02/11/03 * o Removed marketing brand strings. Instead, Using generic string * "Intel(R) PRO/100 Network Connection" for all adapters. @@ -61,21 +79,6 @@ * o New feature: added ICH5 support * * 2.1.27 11/20/02 - * o Bug fix: Device command timeout due to SMBus processing during init - * o Bug fix: Not setting/clearing I (Interrupt) bit in tcb correctly - * o Bug fix: Not using EEPROM WoL setting as default in ethtool - * o Bug fix: Not able to set autoneg on using ethtool when interface down - * o Bug fix: Not able to change speed/duplex using ethtool/mii - * when interface up - * o Bug fix: Ethtool shows autoneg on when forced to 100/Full - * o Bug fix: Compiler error when CONFIG_PROC_FS not defined - * o Bug fix: 2.5.44 e100 doesn't load with preemptive kernel enabled - * (sleep while holding spinlock) - * o Bug fix: 2.1.24-k1 doesn't display complete statistics - * o Bug fix: System panic due to NULL watchdog timer dereference during - * ifconfig down, rmmod and insmod - * - * 2.1.24 10/7/02 */ #include @@ -121,14 +124,13 @@ extern u32 e100_run_diag(struct net_device *dev, u64 *test_info, u32 flags); static int e100_ethtool_test(struct net_device *, struct ifreq *); static int e100_ethtool_gstrings(struct net_device *, struct ifreq *); -static char *test_strings[] = { - "E100_EEPROM_TEST_FAIL", - "E100_CHIP_TIMEOUT", - "E100_ROM_TEST_FAIL", - "E100_REG_TEST_FAIL", - "E100_MAC_TEST_FAIL", - "E100_LPBK_MAC_FAIL", - "E100_LPBK_PHY_FAIL" +static char test_strings[][ETH_GSTRING_LEN] = { + "Link test (on/offline)", + "Eeprom test (on/offline)", + "Self test (offline)", + "Mac loopback (offline)", + "Phy loopback (offline)", + "Cable diagnostic (offline)" }; static int e100_ethtool_led_blink(struct net_device *, struct ifreq *); @@ -139,10 +141,10 @@ nxmit_cb_entry_t *); static void e100_free_nontx_list(struct e100_private *); static void e100_non_tx_background(unsigned long); - +static inline void e100_tx_skb_free(struct e100_private *bdp, tcb_t *tcb); /* Global Data structures and variables */ char e100_copyright[] __devinitdata = "Copyright (c) 2003 Intel Corporation"; -char e100_driver_version[]="2.2.21-k1"; +char e100_driver_version[]="2.3.13-k1"; const char *e100_full_driver_name = "Intel(R) PRO/100 Network Driver"; char e100_short_driver_name[] = "e100"; static int e100nics = 0; @@ -155,6 +157,7 @@ static int e100_notify_reboot(struct notifier_block *, unsigned long event, void *ptr); static int e100_suspend(struct pci_dev *pcid, u32 state); static int e100_resume(struct pci_dev *pcid); +static unsigned char e100_asf_enabled(struct e100_private *bdp); struct notifier_block e100_notifier_reboot = { .notifier_call = e100_notify_reboot, .next = NULL, @@ -182,8 +185,6 @@ static u8 e100_D101M_checksum(struct e100_private *, struct sk_buff *); static u8 e100_D102_check_checksum(rfd_t *); static int e100_ioctl(struct net_device *, struct ifreq *, int); -static int e100_open(struct net_device *); -static int e100_close(struct net_device *); static int e100_change_mtu(struct net_device *, int); static int e100_xmit_frame(struct sk_buff *, struct net_device *); static unsigned char e100_init(struct e100_private *); @@ -193,7 +194,6 @@ static void e100intr(int, void *, struct pt_regs *); static void e100_print_brd_conf(struct e100_private *); static void e100_set_multi(struct net_device *); -void e100_set_speed_duplex(struct e100_private *); static u8 e100_pci_setup(struct pci_dev *, struct e100_private *); static u8 e100_sw_init(struct e100_private *); @@ -215,7 +215,6 @@ static unsigned char e100_clr_cntrs(struct e100_private *); static unsigned char e100_load_microcode(struct e100_private *); -static unsigned char e100_hw_init(struct e100_private *); static unsigned char e100_setup_iaaddr(struct e100_private *, u8 *); static unsigned char e100_update_stats(struct e100_private *bdp); @@ -228,7 +227,6 @@ char *); unsigned char e100_wait_exec_cmplx(struct e100_private *, u32, u8, u8); void e100_exec_cmplx(struct e100_private *, u32, u8); -static unsigned char e100_asf_enabled(struct e100_private *bdp); /** * e100_get_rx_struct - retrieve cell to hold skb buff from the pool @@ -638,25 +636,7 @@ } else { bdp->rfd_size = 16; } - e100_check_options(e100nics, bdp); - if (!e100_init(bdp)) { - printk(KERN_ERR "e100: Failed to initialize, instance #%d\n", - e100nics); - rc = -ENODEV; - goto err_pci; - } - - /* Check if checksum is valid */ - cal_checksum = e100_eeprom_calculate_chksum(bdp); - read_checksum = e100_eeprom_read(bdp, (bdp->eeprom_size - 1)); - if (cal_checksum != read_checksum) { - printk(KERN_ERR "e100: Corrupted EEPROM on instance #%d\n", - e100nics); - rc = -ENODEV; - goto err_pci; - } - dev->vlan_rx_register = e100_vlan_rx_register; dev->vlan_rx_add_vid = e100_vlan_rx_add_vid; dev->vlan_rx_kill_vid = e100_vlan_rx_kill_vid; @@ -674,15 +654,32 @@ dev->features = NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; - e100nics++; - - e100_get_speed_duplex_caps(bdp); - if ((rc = register_netdev(dev)) != 0) { goto err_pci; } - memcpy(bdp->ifname, dev->name, IFNAMSIZ); - bdp->ifname[IFNAMSIZ-1] = 0; + + e100_check_options(e100nics, bdp); + + if (!e100_init(bdp)) { + printk(KERN_ERR "e100: Failed to initialize, instance #%d\n", + e100nics); + rc = -ENODEV; + goto err_unregister_netdev; + } + + /* Check if checksum is valid */ + cal_checksum = e100_eeprom_calculate_chksum(bdp); + read_checksum = e100_eeprom_read(bdp, (bdp->eeprom_size - 1)); + if (cal_checksum != read_checksum) { + printk(KERN_ERR "e100: Corrupted EEPROM on instance #%d\n", + e100nics); + rc = -ENODEV; + goto err_unregister_netdev; + } + + e100nics++; + + e100_get_speed_duplex_caps(bdp); printk(KERN_NOTICE "e100: %s: %s\n", @@ -708,6 +705,8 @@ goto out; +err_unregister_netdev: + unregister_netdev(dev); err_pci: iounmap(bdp->scb); pci_release_regions(pcid); @@ -973,7 +972,7 @@ } } -static int +int e100_open(struct net_device *dev) { struct e100_private *bdp; @@ -1011,7 +1010,11 @@ mod_timer(&(bdp->watchdog_timer), jiffies + (2 * HZ)); - netif_start_queue(dev); + if (dev->flags & IFF_UP) + /* Otherwise process may sleep forever */ + netif_wake_queue(dev); + else + netif_start_queue(dev); e100_start_ru(bdp); if ((rc = request_irq(dev->irq, &e100intr, SA_SHIRQ, @@ -1032,7 +1035,7 @@ return rc; } -static int +int e100_close(struct net_device *dev) { struct e100_private *bdp = dev->priv; @@ -1073,7 +1076,8 @@ goto exit2; } - if (!TCBS_AVAIL(bdp->tcb_pool) || + /* tcb list may be empty temporarily during releasing resources */ + if (!TCBS_AVAIL(bdp->tcb_pool) || (bdp->tcb_phys == 0) || (bdp->non_tx_command_state != E100_NON_TX_IDLE)) { notify_stop = true; rc = 1; @@ -1284,10 +1288,8 @@ /* read NIC's part number */ e100_rd_pwa_no(bdp); - if (!e100_hw_init(bdp)) { - printk(KERN_ERR "e100: hw init failed\n"); + if (!e100_hw_init(bdp)) return false; - } /* Interrupts are enabled after device reset */ e100_disable_clear_intr(bdp); @@ -1329,6 +1331,8 @@ spin_lock_init(&(bdp->bd_non_tx_lock)); spin_lock_init(&(bdp->config_lock)); spin_lock_init(&(bdp->mdi_access_lock)); + /* Initialize configuration data */ + e100_config_init(bdp); return 1; } @@ -1383,11 +1387,11 @@ * true - If the adapter was initialized * false - If the adapter failed initialization */ -unsigned char __devinit +unsigned char e100_hw_init(struct e100_private *bdp) { if (!e100_phy_init(bdp)) - return false; + goto err; e100_sw_reset(bdp, PORT_SELECTIVE_RESET); @@ -1397,27 +1401,25 @@ /* Load the CU BASE (set to 0, because we use linear mode) */ if (!e100_wait_exec_cmplx(bdp, 0, SCB_CUC_LOAD_BASE, 0)) - return false; + goto err; if (!e100_wait_exec_cmplx(bdp, 0, SCB_RUC_LOAD_BASE, 0)) - return false; + goto err; /* Load interrupt microcode */ if (e100_load_microcode(bdp)) { bdp->flags |= DF_UCODE_LOADED; } - e100_config_init(bdp); - if (!e100_config(bdp)) { - return false; - } + if (!e100_config(bdp)) + goto err; if (!e100_setup_iaaddr(bdp, bdp->device->dev_addr)) - return false; + goto err; /* Clear the internal counters */ if (!e100_clr_cntrs(bdp)) - return false; + goto err; /* Change for 82558 enhancement */ /* If 82558/9 and if the user has enabled flow control, set up the @@ -1430,6 +1432,9 @@ } return true; +err: + printk(KERN_ERR "e100: hw init failed\n"); + return false; } /** @@ -1590,9 +1595,22 @@ void e100_free_tcb_pool(struct e100_private *bdp) { + tcb_t *tcb; + int i; + /* Return tx skbs */ + for (i = 0; i < bdp->params.TxDescriptors; i++) { + tcb = bdp->tcb_pool.data; + tcb += bdp->tcb_pool.head; + e100_tx_skb_free(bdp, tcb); + if (NEXT_TCB_TOUSE(bdp->tcb_pool.head) == bdp->tcb_pool.tail) + break; + bdp->tcb_pool.head = NEXT_TCB_TOUSE(bdp->tcb_pool.head); + } pci_free_consistent(bdp->pdev, sizeof (tcb_t) * bdp->params.TxDescriptors, bdp->tcb_pool.data, bdp->tcb_phys); + bdp->tcb_pool.head = 0; + bdp->tcb_pool.tail = 1; bdp->tcb_phys = 0; } @@ -1746,12 +1764,10 @@ e100_set_multi(dev); } } - - /* Update the statistics needed by the upper interface */ - /* This should be the last statistic related command - * as it's async. now */ - e100_dump_stats_cntrs(bdp); } + /* Issue command to dump statistics from device. */ + /* Check for command completion on next watchdog timer. */ + e100_dump_stats_cntrs(bdp); wmb(); @@ -2199,10 +2215,10 @@ (tcb->tbd_ptr)->tbd_buf_addr = cpu_to_le32(pci_map_single(bdp->pdev, skb->data, - (skb->len - skb->data_len), + skb_headlen(skb), PCI_DMA_TODEVICE)); (tcb->tbd_ptr)->tbd_buf_cnt = - cpu_to_le16(skb->len - skb->data_len); + cpu_to_le16(skb_headlen(skb)); for (i = 0; i < skb_shinfo(skb)->nr_frags; i++, tbd_arr_ptr++, frag++) { @@ -2542,6 +2558,7 @@ pcmd_complete = e100_cmd_complete_location(bdp); if (*pcmd_complete != le32_to_cpu(DUMP_RST_STAT_COMPLETED) && *pcmd_complete != le32_to_cpu(DUMP_STAT_COMPLETED)) { + *pcmd_complete = 0; return false; } @@ -3039,23 +3056,6 @@ e100_sw_reset(bdp, PORT_SELECTIVE_RESET); } -void -e100_set_speed_duplex(struct e100_private *bdp) -{ - int carrier_ok; - /* Device may lose link with some siwtches when */ - /* changing speed/duplex to non-autoneg. e100 */ - /* needs to remember carrier state in order to */ - /* start watchdog timer for recovering link */ - if ((carrier_ok = netif_carrier_ok(bdp->device))) - e100_isolate_driver(bdp); - e100_phy_set_speed_duplex(bdp, true); - e100_config_fc(bdp); /* re-config flow-control if necessary */ - e100_config(bdp); - if (carrier_ok) - e100_deisolate_driver(bdp, false); -} - static void e100_tcb_add_C_bit(struct e100_private *bdp) { @@ -3211,6 +3211,144 @@ case ETHTOOL_PHYS_ID: rc = e100_ethtool_led_blink(dev,ifr); break; +#ifdef ETHTOOL_GRINGPARAM + case ETHTOOL_GRINGPARAM: { + struct ethtool_ringparam ering; + struct e100_private *bdp = dev->priv; + memset((void *) &ering, 0, sizeof(ering)); + ering.rx_max_pending = E100_MAX_RFD; + ering.tx_max_pending = E100_MAX_TCB; + ering.rx_pending = bdp->params.RxDescriptors; + ering.tx_pending = bdp->params.TxDescriptors; + rc = copy_to_user(ifr->ifr_data, &ering, sizeof(ering)) + ? -EFAULT : 0; + return rc; + } +#endif +#ifdef ETHTOOL_SRINGPARAM + case ETHTOOL_SRINGPARAM: { + struct ethtool_ringparam ering; + struct e100_private *bdp = dev->priv; + if (copy_from_user(&ering, ifr->ifr_data, sizeof(ering))) + return -EFAULT; + if (ering.rx_pending > E100_MAX_RFD + || ering.rx_pending < E100_MIN_RFD) + return -EINVAL; + if (ering.tx_pending > E100_MAX_TCB + || ering.tx_pending < E100_MIN_TCB) + return -EINVAL; + if (netif_running(dev)) { + spin_lock_bh(&dev->xmit_lock); + e100_close(dev); + spin_unlock_bh(&dev->xmit_lock); + /* Use new values to open interface */ + bdp->params.RxDescriptors = ering.rx_pending; + bdp->params.TxDescriptors = ering.tx_pending; + e100_hw_init(bdp); + e100_open(dev); + } + else { + bdp->params.RxDescriptors = ering.rx_pending; + bdp->params.TxDescriptors = ering.tx_pending; + } + return 0; + } +#endif +#ifdef ETHTOOL_GPAUSEPARAM + case ETHTOOL_GPAUSEPARAM: { + struct ethtool_pauseparam epause; + struct e100_private *bdp = dev->priv; + memset((void *) &epause, 0, sizeof(epause)); + if ((bdp->flags & IS_BACHELOR) + && (bdp->params.b_params & PRM_FC)) { + epause.autoneg = 1; + if (bdp->flags && DF_LINK_FC_CAP) { + epause.rx_pause = 1; + epause.tx_pause = 1; + } + if (bdp->flags && DF_LINK_FC_TX_ONLY) + epause.tx_pause = 1; + } + rc = copy_to_user(ifr->ifr_data, &epause, sizeof(epause)) + ? -EFAULT : 0; + return rc; + } +#endif +#ifdef ETHTOOL_SPAUSEPARAM + case ETHTOOL_SPAUSEPARAM: { + struct ethtool_pauseparam epause; + struct e100_private *bdp = dev->priv; + if (!(bdp->flags & IS_BACHELOR)) + return -EINVAL; + if (copy_from_user(&epause, ifr->ifr_data, sizeof(epause))) + return -EFAULT; + if (epause.autoneg == 1) + bdp->params.b_params |= PRM_FC; + else + bdp->params.b_params &= ~PRM_FC; + if (netif_running(dev)) { + spin_lock_bh(&dev->xmit_lock); + e100_close(dev); + spin_unlock_bh(&dev->xmit_lock); + e100_hw_init(bdp); + e100_open(dev); + } + return 0; + } +#endif +#ifdef ETHTOOL_GRXCSUM + case ETHTOOL_GRXCSUM: + case ETHTOOL_GTXCSUM: + case ETHTOOL_GSG: + { struct ethtool_value eval; + struct e100_private *bdp = dev->priv; + memset((void *) &eval, 0, sizeof(eval)); + if ((ecmd.cmd == ETHTOOL_GRXCSUM) + && (bdp->params.b_params & PRM_XSUMRX)) + eval.data = 1; + else + eval.data = 0; + rc = copy_to_user(ifr->ifr_data, &eval, sizeof(eval)) + ? -EFAULT : 0; + return rc; + } +#endif +#ifdef ETHTOOL_SRXCSUM + case ETHTOOL_SRXCSUM: + case ETHTOOL_STXCSUM: + case ETHTOOL_SSG: + { struct ethtool_value eval; + struct e100_private *bdp = dev->priv; + if (copy_from_user(&eval, ifr->ifr_data, sizeof(eval))) + return -EFAULT; + if (ecmd.cmd == ETHTOOL_SRXCSUM) { + if (eval.data == 1) { + if (bdp->rev_id >= D101MA_REV_ID) + bdp->params.b_params |= PRM_XSUMRX; + else + return -EINVAL; + } else { + if (bdp->rev_id >= D101MA_REV_ID) + bdp->params.b_params &= ~PRM_XSUMRX; + else + return 0; + } + } else { + if (eval.data == 1) + return -EINVAL; + else + return 0; + } + if (netif_running(dev)) { + spin_lock_bh(&dev->xmit_lock); + e100_close(dev); + spin_unlock_bh(&dev->xmit_lock); + e100_hw_init(bdp); + e100_open(dev); + } + return 0; + } +#endif default: break; } //switch @@ -3296,7 +3434,13 @@ if ((ecmd.autoneg == AUTONEG_ENABLE) && (bdp->speed_duplex_caps & SUPPORTED_Autoneg)) { bdp->params.e100_speed_duplex = E100_AUTONEG; - e100_set_speed_duplex(bdp); + if (netif_running(dev)) { + spin_lock_bh(&dev->xmit_lock); + e100_close(dev); + spin_unlock_bh(&dev->xmit_lock); + e100_hw_init(bdp); + e100_open(dev); + } } else { if (ecmd.speed == SPEED_10) { if (ecmd.duplex == DUPLEX_HALF) { @@ -3327,7 +3471,13 @@ if (bdp->speed_duplex_caps & ethtool_new_speed_duplex) { bdp->params.e100_speed_duplex = e100_new_speed_duplex; - e100_set_speed_duplex(bdp); + if (netif_running(dev)) { + spin_lock_bh(&dev->xmit_lock); + e100_close(dev); + spin_unlock_bh(&dev->xmit_lock); + e100_hw_init(bdp); + e100_open(dev); + } } else { return -EOPNOTSUPP; } @@ -3362,14 +3512,14 @@ struct ethtool_test *info; int rc = -EFAULT; - info = kmalloc(sizeof(*info) + E100_MAX_TEST_RES * sizeof(u64), + info = kmalloc(sizeof(*info) + max_test_res * sizeof(u64), GFP_ATOMIC); if (!info) return -ENOMEM; memset((void *) info, 0, sizeof(*info) + - E100_MAX_TEST_RES * sizeof(u64)); + max_test_res * sizeof(u64)); if (copy_from_user(info, ifr->ifr_data, sizeof(*info))) goto exit; @@ -3377,7 +3527,7 @@ info->flags = e100_run_diag(dev, info->data, info->flags); if (!copy_to_user(ifr->ifr_data, info, - sizeof(*info) + E100_MAX_TEST_RES * sizeof(u64))) + sizeof(*info) + max_test_res * sizeof(u64))) rc = 0; exit: kfree(info); @@ -3391,6 +3541,7 @@ u32 regs_buff[E100_REGS_LEN]; struct ethtool_regs regs = {ETHTOOL_GREGS}; void *addr = ifr->ifr_data; + u16 mdi_reg; if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -3403,6 +3554,8 @@ regs_buff[0] = readb(&(bdp->scb->scb_cmd_hi)) << 24 | readb(&(bdp->scb->scb_cmd_low)) << 16 | readw(&(bdp->scb->scb_status)); + e100_mdi_read(bdp, MII_NCONFIG, bdp->phy_addr, &mdi_reg); + regs_buff[1] = mdi_reg; if(copy_to_user(addr, ®s, sizeof(regs))) return -EFAULT; @@ -3426,7 +3579,13 @@ if ((bdp->speed_duplex_caps & SUPPORTED_Autoneg) && (bdp->params.e100_speed_duplex == E100_AUTONEG)) { - e100_set_speed_duplex(bdp); + if (netif_running(dev)) { + spin_lock_bh(&dev->xmit_lock); + e100_close(dev); + spin_unlock_bh(&dev->xmit_lock); + e100_hw_init(bdp); + e100_open(dev); + } } else { return -EFAULT; } @@ -3452,7 +3611,7 @@ info.n_stats = E100_STATS_LEN; info.regdump_len = E100_REGS_LEN * sizeof(u32); info.eedump_len = (bdp->eeprom_size << 1); - info.testinfo_len = E100_MAX_TEST_RES; + info.testinfo_len = max_test_res; if (copy_to_user(ifr->ifr_data, &info, sizeof (info))) return -EFAULT; @@ -3802,15 +3961,15 @@ switch (info.string_set) { case ETH_SS_TEST: { int ret = 0; - if (info.len > E100_MAX_TEST_RES) - info.len = E100_MAX_TEST_RES; + if (info.len > max_test_res) + info.len = max_test_res; strings = kmalloc(info.len * ETH_GSTRING_LEN, GFP_ATOMIC); if (!strings) return -ENOMEM; memset(strings, 0, info.len * ETH_GSTRING_LEN); for (i = 0; i < info.len; i++) { - sprintf(strings + i * ETH_GSTRING_LEN, "%-31s", + sprintf(strings + i * ETH_GSTRING_LEN, "%s", test_strings[i]); } if (copy_to_user(ifr->ifr_data, &info, sizeof (info))) @@ -3879,7 +4038,13 @@ bdp->params.e100_speed_duplex = E100_SPEED_10_FULL; else bdp->params.e100_speed_duplex = E100_SPEED_10_HALF; - e100_set_speed_duplex(bdp); + if (netif_running(dev)) { + spin_lock_bh(&dev->xmit_lock); + e100_close(dev); + spin_unlock_bh(&dev->xmit_lock); + e100_hw_init(bdp); + e100_open(dev); + } } else /* Only allows changing speed/duplex */ @@ -4162,7 +4327,6 @@ return 0; } -#endif /* CONFIG_PM */ /** * e100_asf_enabled - checks if ASF is configured on the current adaper @@ -4188,6 +4352,7 @@ } return false; } +#endif /* CONFIG_PM */ #ifdef E100_CU_DEBUG unsigned char diff -Nru a/drivers/net/e100/e100_phy.c b/drivers/net/e100/e100_phy.c --- a/drivers/net/e100/e100_phy.c Thu Jun 19 23:46:52 2003 +++ b/drivers/net/e100/e100_phy.c Thu Jun 19 23:46:52 2003 @@ -628,8 +628,6 @@ u16 control; unsigned long expires; - e100_phy_reset(bdp); - bdp->flags |= DF_SPEED_FORCED; e100_mdi_read(bdp, MII_BMCR, bdp->phy_addr, &control); @@ -912,6 +910,10 @@ u16 ctrl_reg; ctrl_reg = BMCR_RESET; e100_mdi_write(bdp, MII_BMCR, bdp->phy_addr, ctrl_reg); + /* ieee 802.3 : The reset process shall be completed */ + /* within 0.5 seconds from the settting of PHY reset bit. */ + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(HZ / 2); } unsigned char __devinit @@ -928,6 +930,7 @@ bdp->PhyDelay = 0; bdp->zlock_state = ZLOCK_INITIAL; + e100_phy_reset(bdp); e100_phy_set_speed_duplex(bdp, false); e100_fix_polarity(bdp); diff -Nru a/drivers/net/e100/e100_test.c b/drivers/net/e100/e100_test.c --- a/drivers/net/e100/e100_test.c Thu Jun 19 23:46:52 2003 +++ b/drivers/net/e100/e100_test.c Thu Jun 19 23:46:52 2003 @@ -25,7 +25,7 @@ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 *******************************************************************************/ -#include "e100.h" +#include "e100_phy.h" #include "e100_config.h" extern u16 e100_eeprom_read(struct e100_private *, u16); @@ -46,6 +46,7 @@ static void e100_diag_loopback_cu_ru_exec(struct e100_private *); static u8 e100_diag_check_pkt(u8 *); static void e100_diag_loopback_free(struct e100_private *); +static int e100_cable_diag(struct e100_private *bdp); #define LB_PACKET_SIZE 1500 @@ -60,46 +61,52 @@ e100_run_diag(struct net_device *dev, u64 *test_info, u32 flags) { struct e100_private* bdp = dev->priv; - u8 test_result = true; - - e100_isolate_driver(bdp); + u8 test_result = 0; + if (!e100_get_link_state(bdp)) { + test_result = ETH_TEST_FL_FAILED; + test_info[test_link] = true; + } + if (!e100_diag_eeprom(dev)) { + test_result = ETH_TEST_FL_FAILED; + test_info[test_eeprom] = true; + } if (flags & ETH_TEST_FL_OFFLINE) { u8 fail_mask; - - fail_mask = e100_diag_selftest(dev); - if (fail_mask) { - test_result = false; - if (fail_mask & REGISTER_TEST_FAIL) - test_info [E100_REG_TEST_FAIL] = true; - if (fail_mask & ROM_TEST_FAIL) - test_info [E100_ROM_TEST_FAIL] = true; - if (fail_mask & SELF_TEST_FAIL) - test_info [E100_MAC_TEST_FAIL] = true; - if (fail_mask & TEST_TIMEOUT) - test_info [E100_CHIP_TIMEOUT] = true; + if (netif_running(dev)) { + spin_lock_bh(&dev->xmit_lock); + e100_close(dev); + spin_unlock_bh(&dev->xmit_lock); + } + if (e100_diag_selftest(dev)) { + test_result = ETH_TEST_FL_FAILED; + test_info[test_self_test] = true; } fail_mask = e100_diag_loopback(dev); if (fail_mask) { - test_result = false; + test_result = ETH_TEST_FL_FAILED; if (fail_mask & PHY_LOOPBACK) - test_info [E100_LPBK_PHY_FAIL] = true; + test_info[test_loopback_phy] = true; if (fail_mask & MAC_LOOPBACK) - test_info [E100_LPBK_MAC_FAIL] = true; + test_info[test_loopback_mac] = true; } - } - if (!e100_diag_eeprom(dev)) { - test_result = false; - test_info [E100_EEPROM_TEST_FAIL] = true; + test_info[cable_diag] = e100_cable_diag(bdp); + /* Need hw init regardless of netif_running */ + e100_hw_init(bdp); + if (netif_running(dev)) { + e100_open(dev); + } + } + else { + test_info[test_self_test] = false; + test_info[test_loopback_phy] = false; + test_info[test_loopback_mac] = false; + test_info[cable_diag] = false; } - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(HZ * 2); - e100_deisolate_driver(bdp, false); - - return flags | (test_result ? 0 : ETH_TEST_FL_FAILED); + return flags | test_result; } /** @@ -126,8 +133,6 @@ } } - e100_configure_device(bdp); - return retval; } @@ -165,14 +170,14 @@ u8 rc = 0; printk(KERN_DEBUG "%s: PHY loopback test starts\n", dev->name); - e100_sw_reset(dev->priv, PORT_SELECTIVE_RESET); + e100_hw_init(dev->priv); if (!e100_diag_one_loopback(dev, PHY_LOOPBACK)) { rc |= PHY_LOOPBACK; } printk(KERN_DEBUG "%s: PHY loopback test ends\n", dev->name); printk(KERN_DEBUG "%s: MAC loopback test starts\n", dev->name); - e100_sw_reset(dev->priv, PORT_SELECTIVE_RESET); + e100_hw_init(dev->priv); if (!e100_diag_one_loopback(dev, MAC_LOOPBACK)) { rc |= MAC_LOOPBACK; } @@ -257,15 +262,10 @@ if (set_loopback) /* Set PHY loopback mode */ e100_phy_set_loopback(bdp); - else { /* Back to normal speed and duplex */ - if (bdp->params.e100_speed_duplex == E100_AUTONEG) - /* Reset PHY and do autoneg */ - e100_phy_autoneg(bdp); - else - /* Reset PHY and force speed and duplex */ - e100_force_speed_duplex(bdp); - } - /* Wait for PHY state change */ + else + /* Reset PHY loopback mode */ + e100_phy_reset(bdp); + /* Wait for PHY state change */ set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(HZ); } else { /* For MAC loopback wait 500 msec to take effect */ @@ -348,10 +348,6 @@ e100_diag_loopback_cu_ru_exec(struct e100_private *bdp) { /*load CU & RU base */ - if (!e100_wait_exec_cmplx(bdp, 0, SCB_CUC_LOAD_BASE, 0)) - printk(KERN_ERR "e100: SCB_CUC_LOAD_BASE failed\n"); - if(!e100_wait_exec_cmplx(bdp, 0, SCB_RUC_LOAD_BASE, 0)) - printk(KERN_ERR "e100: SCB_RUC_LOAD_BASE failed!\n"); if(!e100_wait_exec_cmplx(bdp, bdp->loopback.dma_handle, SCB_RUC_START, 0)) printk(KERN_ERR "e100: SCB_RUC_START failed!\n"); @@ -431,5 +427,74 @@ pci_free_consistent(bdp->pdev, sizeof(rfd_t), bdp->loopback.rfd, bdp->loopback.dma_handle); +} + +static int +e100_cable_diag(struct e100_private *bdp) +{ + int saved_open_circut = 0xffff; + int saved_short_circut = 0xffff; + int saved_distance = 0xffff; + int saved_same = 0; + int cable_status = E100_CABLE_UNKNOWN; + int i; + + /* If we have link, */ + if (e100_get_link_state(bdp)) + return E100_CABLE_OK; + + if (bdp->rev_id < D102_REV_ID) + return E100_CABLE_UNKNOWN; + + /* Disable MDI/MDI-X auto switching */ + e100_mdi_write(bdp, MII_NCONFIG, bdp->phy_addr, + MDI_MDIX_RESET_ALL_MASK); + /* Set to 100 Full as required by cable test */ + e100_mdi_write(bdp, MII_BMCR, bdp->phy_addr, + BMCR_SPEED100 | BMCR_FULLDPLX); + + /* Test up to 100 times */ + for (i = 0; i < 100; i++) { + u16 ctrl_reg; + int distance, open_circut, short_circut, near_end; + + /* Enable and execute cable test */ + e100_mdi_write(bdp, HWI_CONTROL_REG, bdp->phy_addr, + (HWI_TEST_ENABLE | HWI_TEST_EXECUTE)); + /* Wait for cable test finished */ + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(HZ/100 + 1); + /* Read results */ + e100_mdi_read(bdp, HWI_CONTROL_REG, bdp->phy_addr, &ctrl_reg); + distance = ctrl_reg & HWI_TEST_DISTANCE; + open_circut = ctrl_reg & HWI_TEST_HIGHZ_PROBLEM; + short_circut = ctrl_reg & HWI_TEST_LOWZ_PROBLEM; + + if ((distance == saved_distance) && + (open_circut == saved_open_circut) && + (short_circut == saved_short_circut)) + saved_same++; + else { + saved_same = 0; + saved_distance = distance; + saved_open_circut = open_circut; + saved_short_circut = short_circut; + } + /* If results are the same 3 times */ + if (saved_same == 3) { + near_end = ((distance * HWI_REGISTER_GRANULARITY) < + HWI_NEAR_END_BOUNDARY); + if (open_circut) + cable_status = (near_end) ? + E100_CABLE_OPEN_NEAR : E100_CABLE_OPEN_FAR; + if (short_circut) + cable_status = (near_end) ? + E100_CABLE_SHORT_NEAR : E100_CABLE_SHORT_FAR; + break; + } + } + /* Reset cable test */ + e100_mdi_write(bdp, HWI_CONTROL_REG, bdp->phy_addr, HWI_RESET_ALL_MASK); + return cable_status; } diff -Nru a/drivers/net/e1000/Makefile b/drivers/net/e1000/Makefile --- a/drivers/net/e1000/Makefile Thu Jun 19 23:46:53 2003 +++ b/drivers/net/e1000/Makefile Thu Jun 19 23:46:53 2003 @@ -1,7 +1,7 @@ ################################################################################ # # -# Copyright(c) 1999 - 2002 Intel Corporation. All rights reserved. +# Copyright(c) 1999 - 2003 Intel Corporation. All rights reserved. # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the Free diff -Nru a/drivers/net/e1000/e1000.h b/drivers/net/e1000/e1000.h --- a/drivers/net/e1000/e1000.h Thu Jun 19 23:46:52 2003 +++ b/drivers/net/e1000/e1000.h Thu Jun 19 23:46:52 2003 @@ -63,6 +63,9 @@ #include #include #include +#ifdef NETIF_F_TSO +#include +#endif #include #include #include @@ -131,6 +134,7 @@ uint64_t dma; unsigned long length; unsigned long time_stamp; + unsigned int next_to_watch; }; struct e1000_desc_ring { @@ -166,7 +170,6 @@ struct timer_list watchdog_timer; struct timer_list phy_info_timer; struct vlan_group *vlgrp; - char *id_string; uint32_t bd_number; uint32_t rx_buffer_len; uint32_t part_num; @@ -215,6 +218,9 @@ struct e1000_phy_info phy_info; struct e1000_phy_stats phy_stats; + uint32_t test_icr; + struct e1000_desc_ring test_tx_ring; + struct e1000_desc_ring test_rx_ring; uint32_t pci_state[16]; diff -Nru a/drivers/net/e1000/e1000_ethtool.c b/drivers/net/e1000/e1000_ethtool.c --- a/drivers/net/e1000/e1000_ethtool.c Thu Jun 19 23:46:52 2003 +++ b/drivers/net/e1000/e1000_ethtool.c Thu Jun 19 23:46:52 2003 @@ -40,15 +40,60 @@ extern void e1000_reset(struct e1000_adapter *adapter); extern int e1000_set_spd_dplx(struct e1000_adapter *adapter, uint16_t spddplx); -static char e1000_gstrings_stats[][ETH_GSTRING_LEN] = { - "rx_packets", "tx_packets", "rx_bytes", "tx_bytes", "rx_errors", - "tx_errors", "rx_dropped", "tx_dropped", "multicast", "collisions", - "rx_length_errors", "rx_over_errors", "rx_crc_errors", - "rx_frame_errors", "rx_fifo_errors", "rx_missed_errors", - "tx_aborted_errors", "tx_carrier_errors", "tx_fifo_errors", - "tx_heartbeat_errors", "tx_window_errors", +struct e1000_stats { + char stat_string[ETH_GSTRING_LEN]; + int sizeof_stat; + int stat_offset; }; -#define E1000_STATS_LEN sizeof(e1000_gstrings_stats) / ETH_GSTRING_LEN + +#define E1000_STAT(m) sizeof(((struct e1000_adapter *)0)->m), \ + offsetof(struct e1000_adapter, m) +static struct e1000_stats e1000_gstrings_stats[] = { + { "rx_packets", E1000_STAT(net_stats.rx_packets) }, + { "tx_packets", E1000_STAT(net_stats.tx_packets) }, + { "rx_bytes", E1000_STAT(net_stats.rx_bytes) }, + { "tx_bytes", E1000_STAT(net_stats.tx_bytes) }, + { "rx_errors", E1000_STAT(net_stats.rx_errors) }, + { "tx_errors", E1000_STAT(net_stats.tx_errors) }, + { "rx_dropped", E1000_STAT(net_stats.rx_dropped) }, + { "tx_dropped", E1000_STAT(net_stats.tx_dropped) }, + { "multicast", E1000_STAT(net_stats.multicast) }, + { "collisions", E1000_STAT(net_stats.collisions) }, + { "rx_length_errors", E1000_STAT(net_stats.rx_length_errors) }, + { "rx_over_errors", E1000_STAT(net_stats.rx_over_errors) }, + { "rx_crc_errors", E1000_STAT(net_stats.rx_crc_errors) }, + { "rx_frame_errors", E1000_STAT(net_stats.rx_frame_errors) }, + { "rx_fifo_errors", E1000_STAT(net_stats.rx_fifo_errors) }, + { "rx_missed_errors", E1000_STAT(net_stats.rx_missed_errors) }, + { "tx_aborted_errors", E1000_STAT(net_stats.tx_aborted_errors) }, + { "tx_carrier_errors", E1000_STAT(net_stats.tx_carrier_errors) }, + { "tx_fifo_errors", E1000_STAT(net_stats.tx_fifo_errors) }, + { "tx_heartbeat_errors", E1000_STAT(net_stats.tx_heartbeat_errors) }, + { "tx_window_errors", E1000_STAT(net_stats.tx_window_errors) }, + { "tx_abort_late_coll", E1000_STAT(stats.latecol) }, + { "tx_deferred_ok", E1000_STAT(stats.dc) }, + { "tx_single_coll_ok", E1000_STAT(stats.scc) }, + { "tx_multi_coll_ok", E1000_STAT(stats.mcc) }, + { "rx_long_length_errors", E1000_STAT(stats.roc) }, + { "rx_short_length_errors", E1000_STAT(stats.ruc) }, + { "rx_align_errors", E1000_STAT(stats.algnerrc) }, + { "tx_tcp_seg_good", E1000_STAT(stats.tsctc) }, + { "tx_tcp_seg_failed", E1000_STAT(stats.tsctfc) }, + { "rx_flow_control_xon", E1000_STAT(stats.xonrxc) }, + { "rx_flow_control_xoff", E1000_STAT(stats.xoffrxc) }, + { "tx_flow_control_xon", E1000_STAT(stats.xontxc) }, + { "tx_flow_control_xoff", E1000_STAT(stats.xofftxc) }, + { "rx_csum_offload_good", E1000_STAT(hw_csum_good) }, + { "rx_csum_offload_errors", E1000_STAT(hw_csum_err) } +}; +#define E1000_STATS_LEN \ + sizeof(e1000_gstrings_stats) / sizeof(struct e1000_stats) +static char e1000_gstrings_test[][ETH_GSTRING_LEN] = { + "Register test (offline)", "Eeprom test (offline)", + "Interrupt test (offline)", "Loopback test (offline)", + "Link test (on/offline)" +}; +#define E1000_TEST_LEN sizeof(e1000_gstrings_test) / ETH_GSTRING_LEN static void e1000_ethtool_gset(struct e1000_adapter *adapter, struct ethtool_cmd *ecmd) @@ -154,6 +199,7 @@ strncpy(drvinfo->fw_version, "N/A", 32); strncpy(drvinfo->bus_info, adapter->pdev->slot_name, 32); drvinfo->n_stats = E1000_STATS_LEN; + drvinfo->testinfo_len = E1000_TEST_LEN; #define E1000_REGS_LEN 32 drvinfo->regdump_len = E1000_REGS_LEN * sizeof(uint32_t); drvinfo->eedump_len = adapter->hw.eeprom.word_size * 2; @@ -164,6 +210,7 @@ struct ethtool_regs *regs, uint32_t *regs_buff) { struct e1000_hw *hw = &adapter->hw; + uint16_t phy_data; regs->version = (1 << 24) | (hw->revision_id << 16) | hw->device_id; @@ -182,6 +229,62 @@ regs_buff[10] = E1000_READ_REG(hw, TDT); regs_buff[11] = E1000_READ_REG(hw, TIDV); + regs_buff[12] = adapter->hw.phy_type; /* PHY type (IGP=1, M88=0) */ + if(hw->phy_type == e1000_phy_igp) { + e1000_write_phy_reg(hw, IGP01E1000_PHY_PAGE_SELECT, + IGP01E1000_PHY_AGC_A); + e1000_read_phy_reg(hw, IGP01E1000_PHY_AGC_A & + IGP01E1000_PHY_PAGE_SELECT, &phy_data); + regs_buff[13] = (uint32_t)phy_data; /* cable length */ + e1000_write_phy_reg(hw, IGP01E1000_PHY_PAGE_SELECT, + IGP01E1000_PHY_AGC_B); + e1000_read_phy_reg(hw, IGP01E1000_PHY_AGC_B & + IGP01E1000_PHY_PAGE_SELECT, &phy_data); + regs_buff[14] = (uint32_t)phy_data; /* cable length */ + e1000_write_phy_reg(hw, IGP01E1000_PHY_PAGE_SELECT, + IGP01E1000_PHY_AGC_C); + e1000_read_phy_reg(hw, IGP01E1000_PHY_AGC_C & + IGP01E1000_PHY_PAGE_SELECT, &phy_data); + regs_buff[15] = (uint32_t)phy_data; /* cable length */ + e1000_write_phy_reg(hw, IGP01E1000_PHY_PAGE_SELECT, + IGP01E1000_PHY_AGC_D); + e1000_read_phy_reg(hw, IGP01E1000_PHY_AGC_D & + IGP01E1000_PHY_PAGE_SELECT, &phy_data); + regs_buff[16] = (uint32_t)phy_data; /* cable length */ + regs_buff[17] = 0; /* extended 10bt distance (not needed) */ + e1000_write_phy_reg(hw, IGP01E1000_PHY_PAGE_SELECT, 0x0); + e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_STATUS & + IGP01E1000_PHY_PAGE_SELECT, &phy_data); + regs_buff[18] = (uint32_t)phy_data; /* cable polarity */ + e1000_write_phy_reg(hw, IGP01E1000_PHY_PAGE_SELECT, + IGP01E1000_PHY_PCS_INIT_REG); + e1000_read_phy_reg(hw, IGP01E1000_PHY_PCS_INIT_REG & + IGP01E1000_PHY_PAGE_SELECT, &phy_data); + regs_buff[19] = (uint32_t)phy_data; /* cable polarity */ + regs_buff[20] = 0; /* polarity correction enabled (always) */ + regs_buff[22] = 0; /* phy receive errors (unavailable) */ + regs_buff[23] = regs_buff[18]; /* mdix mode */ + e1000_write_phy_reg(hw, IGP01E1000_PHY_PAGE_SELECT, 0x0); + } else { + e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data); + regs_buff[13] = (uint32_t)phy_data; /* cable length */ + regs_buff[14] = 0; /* Dummy (to align w/ IGP phy reg dump) */ + regs_buff[15] = 0; /* Dummy (to align w/ IGP phy reg dump) */ + regs_buff[16] = 0; /* Dummy (to align w/ IGP phy reg dump) */ + e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); + regs_buff[17] = (uint32_t)phy_data; /* extended 10bt distance */ + regs_buff[18] = regs_buff[13]; /* cable polarity */ + regs_buff[19] = 0; /* Dummy (to align w/ IGP phy reg dump) */ + regs_buff[20] = regs_buff[17]; /* polarity correction */ + /* phy receive errors */ + regs_buff[22] = adapter->phy_stats.receive_errors; + regs_buff[23] = regs_buff[13]; /* mdix mode */ + } + regs_buff[21] = adapter->phy_stats.idle_errors; /* phy idle errors */ + e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_data); + regs_buff[24] = (uint32_t)phy_data; /* phy local receiver status */ + regs_buff[25] = regs_buff[24]; /* phy remote receiver status */ + return; } @@ -219,7 +322,7 @@ uint16_t i; for (i = 0; i < last_word - first_word + 1; i++) if((ret_val = e1000_read_eeprom(hw, first_word + i, 1, - &eeprom_buff[i]))) + &eeprom_buff[i]))) break; } geeprom_error: @@ -249,7 +352,7 @@ first_word = eeprom->offset >> 1; last_word = (eeprom->offset + eeprom->len - 1) >> 1; eeprom_buff = kmalloc(max_len, GFP_KERNEL); - if(eeprom_buff == NULL) + if(!eeprom_buff) return -ENOMEM; ptr = (void *)eeprom_buff; @@ -284,6 +387,765 @@ return ret_val; } +#define REG_PATTERN_TEST(R, M, W) \ +{ \ + uint32_t pat, value; \ + uint32_t test[] = \ + {0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF}; \ + for(pat = 0; pat < sizeof(test)/sizeof(test[0]); pat++) { \ + E1000_WRITE_REG(&adapter->hw, R, (test[pat] & W)); \ + value = E1000_READ_REG(&adapter->hw, R); \ + if(value != (test[pat] & W & M)) { \ + *data = (adapter->hw.mac_type < e1000_82543) ? \ + E1000_82542_##R : E1000_##R; \ + return 1; \ + } \ + } \ +} + +#define REG_SET_AND_CHECK(R, M, W) \ +{ \ + uint32_t value; \ + E1000_WRITE_REG(&adapter->hw, R, W & M); \ + value = E1000_READ_REG(&adapter->hw, R); \ + if ((W & M) != (value & M)) { \ + *data = (adapter->hw.mac_type < e1000_82543) ? \ + E1000_82542_##R : E1000_##R; \ + return 1; \ + } \ +} + +static int +e1000_reg_test(struct e1000_adapter *adapter, uint64_t *data) +{ + uint32_t value; + uint32_t i; + + /* The status register is Read Only, so a write should fail. + * Some bits that get toggled are ignored. + */ + value = (E1000_READ_REG(&adapter->hw, STATUS) & (0xFFFFF833)); + E1000_WRITE_REG(&adapter->hw, STATUS, (0xFFFFFFFF)); + if(value != (E1000_READ_REG(&adapter->hw, STATUS) & (0xFFFFF833))) { + *data = 1; + return 1; + } + + REG_PATTERN_TEST(FCAL, 0xFFFFFFFF, 0xFFFFFFFF); + REG_PATTERN_TEST(FCAH, 0x0000FFFF, 0xFFFFFFFF); + REG_PATTERN_TEST(FCT, 0x0000FFFF, 0xFFFFFFFF); + REG_PATTERN_TEST(VET, 0x0000FFFF, 0xFFFFFFFF); + REG_PATTERN_TEST(RDTR, 0x0000FFFF, 0xFFFFFFFF); + REG_PATTERN_TEST(RDBAH, 0xFFFFFFFF, 0xFFFFFFFF); + REG_PATTERN_TEST(RDLEN, 0x000FFF80, 0x000FFFFF); + REG_PATTERN_TEST(RDH, 0x0000FFFF, 0x0000FFFF); + REG_PATTERN_TEST(RDT, 0x0000FFFF, 0x0000FFFF); + REG_PATTERN_TEST(FCRTH, 0x0000FFF8, 0x0000FFF8); + REG_PATTERN_TEST(FCTTV, 0x0000FFFF, 0x0000FFFF); + REG_PATTERN_TEST(TIPG, 0x3FFFFFFF, 0x3FFFFFFF); + REG_PATTERN_TEST(TDBAH, 0xFFFFFFFF, 0xFFFFFFFF); + REG_PATTERN_TEST(TDLEN, 0x000FFF80, 0x000FFFFF); + + REG_SET_AND_CHECK(RCTL, 0xFFFFFFFF, 0x00000000); + REG_SET_AND_CHECK(RCTL, 0x06DFB3FE, 0x003FFFFB); + REG_SET_AND_CHECK(TCTL, 0xFFFFFFFF, 0x00000000); + + if(adapter->hw.mac_type >= e1000_82543) { + + REG_SET_AND_CHECK(RCTL, 0x06DFB3FE, 0xFFFFFFFF); + REG_PATTERN_TEST(RDBAL, 0xFFFFFFF0, 0xFFFFFFFF); + REG_PATTERN_TEST(TXCW, 0xC000FFFF, 0x0000FFFF); + REG_PATTERN_TEST(TDBAL, 0xFFFFFFF0, 0xFFFFFFFF); + REG_PATTERN_TEST(TIDV, 0x0000FFFF, 0x0000FFFF); + + for(i = 0; i < E1000_RAR_ENTRIES; i++) { + REG_PATTERN_TEST(RA + ((i << 1) << 2), 0xFFFFFFFF, + 0xFFFFFFFF); + REG_PATTERN_TEST(RA + (((i << 1) + 1) << 2), 0x8003FFFF, + 0xFFFFFFFF); + } + + } else { + + REG_SET_AND_CHECK(RCTL, 0xFFFFFFFF, 0x01FFFFFF); + REG_PATTERN_TEST(RDBAL, 0xFFFFF000, 0xFFFFFFFF); + REG_PATTERN_TEST(TXCW, 0x0000FFFF, 0x0000FFFF); + REG_PATTERN_TEST(TDBAL, 0xFFFFF000, 0xFFFFFFFF); + + } + + for(i = 0; i < E1000_MC_TBL_SIZE; i++) + REG_PATTERN_TEST(MTA + (i << 2), 0xFFFFFFFF, 0xFFFFFFFF); + + return 0; +} + +static int +e1000_eeprom_test(struct e1000_adapter *adapter, uint64_t *data) +{ + uint16_t temp; + uint16_t checksum = 0; + uint16_t i; + + *data = 0; + /* Read and add up the contents of the EEPROM */ + for(i = 0; i < (EEPROM_CHECKSUM_REG + 1); i++) { + if((e1000_read_eeprom(&adapter->hw, i, 1, &temp)) < 0) { + *data = 1; + break; + } + checksum += temp; + } + + /* If Checksum is not Correct return error else test passed */ + if((checksum != (uint16_t) EEPROM_SUM) && !(*data)) + *data = 2; + + return *data; +} + +static void +e1000_test_intr(int irq, + void *data, + struct pt_regs *regs) +{ + struct net_device *netdev = (struct net_device *) data; + struct e1000_adapter *adapter = netdev->priv; + + adapter->test_icr |= E1000_READ_REG(&adapter->hw, ICR); + + return; +} + +static int +e1000_intr_test(struct e1000_adapter *adapter, uint64_t *data) +{ + struct net_device *netdev = adapter->netdev; + uint32_t icr, mask, i=0; + + *data = 0; + + /* Hook up test interrupt handler just for this test */ + if(request_irq + (netdev->irq, &e1000_test_intr, SA_SHIRQ, netdev->name, netdev)) { + *data = 1; + return -1; + } + + /* Disable all the interrupts */ + E1000_WRITE_REG(&adapter->hw, IMC, 0xFFFFFFFF); + msec_delay(10); + + /* Interrupts are disabled, so read interrupt cause + * register (icr) twice to verify that there are no interrupts + * pending. icr is clear on read. + */ + icr = E1000_READ_REG(&adapter->hw, ICR); + icr = E1000_READ_REG(&adapter->hw, ICR); + + if(icr != 0) { + /* if icr is non-zero, there is no point + * running other interrupt tests. + */ + *data = 2; + i = 10; + } + + /* Test each interrupt */ + for(; i < 10; i++) { + + /* Interrupt to test */ + mask = 1 << i; + + /* Disable the interrupt to be reported in + * the cause register and then force the same + * interrupt and see if one gets posted. If + * an interrupt was posted to the bus, the + * test failed. + */ + adapter->test_icr = 0; + E1000_WRITE_REG(&adapter->hw, IMC, mask); + E1000_WRITE_REG(&adapter->hw, ICS, mask); + msec_delay(10); + + if(adapter->test_icr & mask) { + *data = 3; + break; + } + + /* Enable the interrupt to be reported in + * the cause register and then force the same + * interrupt and see if one gets posted. If + * an interrupt was not posted to the bus, the + * test failed. + */ + adapter->test_icr = 0; + E1000_WRITE_REG(&adapter->hw, IMS, mask); + E1000_WRITE_REG(&adapter->hw, ICS, mask); + msec_delay(10); + + if(!(adapter->test_icr & mask)) { + *data = 4; + break; + } + + /* Disable the other interrupts to be reported in + * the cause register and then force the other + * interrupts and see if any get posted. If + * an interrupt was posted to the bus, the + * test failed. + */ + adapter->test_icr = 0; + E1000_WRITE_REG(&adapter->hw, IMC, ~mask); + E1000_WRITE_REG(&adapter->hw, ICS, ~mask); + msec_delay(10); + + if(adapter->test_icr) { + *data = 5; + break; + } + } + + /* Disable all the interrupts */ + E1000_WRITE_REG(&adapter->hw, IMC, 0xFFFFFFFF); + msec_delay(10); + + /* Unhook test interrupt handler */ + free_irq(netdev->irq, netdev); + + return *data; +} + +static void +e1000_free_desc_rings(struct e1000_adapter *adapter) +{ + struct e1000_desc_ring *txdr = &adapter->test_tx_ring; + struct e1000_desc_ring *rxdr = &adapter->test_rx_ring; + struct pci_dev *pdev = adapter->pdev; + int i; + + if(txdr->desc && txdr->buffer_info) { + for(i = 0; i < txdr->count; i++) { + if(txdr->buffer_info[i].dma) + pci_unmap_single(pdev, txdr->buffer_info[i].dma, + txdr->buffer_info[i].length, + PCI_DMA_TODEVICE); + if(txdr->buffer_info[i].skb) + dev_kfree_skb(txdr->buffer_info[i].skb); + } + } + + if(rxdr->desc && rxdr->buffer_info) { + for(i = 0; i < rxdr->count; i++) { + if(rxdr->buffer_info[i].dma) + pci_unmap_single(pdev, rxdr->buffer_info[i].dma, + rxdr->buffer_info[i].length, + PCI_DMA_FROMDEVICE); + if(rxdr->buffer_info[i].skb) + dev_kfree_skb(rxdr->buffer_info[i].skb); + } + } + + if(txdr->desc) + pci_free_consistent(pdev, txdr->size, txdr->desc, txdr->dma); + if(rxdr->desc) + pci_free_consistent(pdev, rxdr->size, rxdr->desc, rxdr->dma); + + if(txdr->buffer_info) + kfree(txdr->buffer_info); + if(rxdr->buffer_info) + kfree(rxdr->buffer_info); + + return; +} + +static int +e1000_setup_desc_rings(struct e1000_adapter *adapter) +{ + struct e1000_desc_ring *txdr = &adapter->test_tx_ring; + struct e1000_desc_ring *rxdr = &adapter->test_rx_ring; + struct pci_dev *pdev = adapter->pdev; + uint32_t rctl; + int size, i, ret_val; + + /* Setup Tx descriptor ring and Tx buffers */ + + txdr->count = 80; + + size = txdr->count * sizeof(struct e1000_buffer); + if(!(txdr->buffer_info = kmalloc(size, GFP_KERNEL))) { + ret_val = 1; + goto err_nomem; + } + memset(txdr->buffer_info, 0, size); + + txdr->size = txdr->count * sizeof(struct e1000_tx_desc); + E1000_ROUNDUP(txdr->size, 4096); + if(!(txdr->desc = pci_alloc_consistent(pdev, txdr->size, &txdr->dma))) { + ret_val = 2; + goto err_nomem; + } + memset(txdr->desc, 0, txdr->size); + txdr->next_to_use = txdr->next_to_clean = 0; + + E1000_WRITE_REG(&adapter->hw, TDBAL, + ((uint64_t) txdr->dma & 0x00000000FFFFFFFF)); + E1000_WRITE_REG(&adapter->hw, TDBAH, ((uint64_t) txdr->dma >> 32)); + E1000_WRITE_REG(&adapter->hw, TDLEN, + txdr->count * sizeof(struct e1000_tx_desc)); + E1000_WRITE_REG(&adapter->hw, TDH, 0); + E1000_WRITE_REG(&adapter->hw, TDT, 0); + E1000_WRITE_REG(&adapter->hw, TCTL, + E1000_TCTL_PSP | E1000_TCTL_EN | + E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT | + E1000_FDX_COLLISION_DISTANCE << E1000_COLD_SHIFT); + + for(i = 0; i < txdr->count; i++) { + struct e1000_tx_desc *tx_desc = E1000_TX_DESC(*txdr, i); + struct sk_buff *skb; + unsigned int size = 1024; + + if(!(skb = alloc_skb(size, GFP_KERNEL))) { + ret_val = 3; + goto err_nomem; + } + skb_put(skb, size); + txdr->buffer_info[i].skb = skb; + txdr->buffer_info[i].length = skb->len; + txdr->buffer_info[i].dma = + pci_map_single(pdev, skb->data, skb->len, + PCI_DMA_TODEVICE); + tx_desc->buffer_addr = cpu_to_le64(txdr->buffer_info[i].dma); + tx_desc->lower.data = cpu_to_le32(skb->len); + tx_desc->lower.data |= E1000_TXD_CMD_EOP; + tx_desc->lower.data |= E1000_TXD_CMD_IFCS; + tx_desc->lower.data |= E1000_TXD_CMD_RPS; + tx_desc->upper.data = 0; + } + + /* Setup Rx descriptor ring and Rx buffers */ + + rxdr->count = 80; + + size = rxdr->count * sizeof(struct e1000_buffer); + if(!(rxdr->buffer_info = kmalloc(size, GFP_KERNEL))) { + ret_val = 4; + goto err_nomem; + } + memset(rxdr->buffer_info, 0, size); + + rxdr->size = rxdr->count * sizeof(struct e1000_rx_desc); + if(!(rxdr->desc = pci_alloc_consistent(pdev, rxdr->size, &rxdr->dma))) { + ret_val = 5; + goto err_nomem; + } + memset(rxdr->desc, 0, rxdr->size); + rxdr->next_to_use = rxdr->next_to_clean = 0; + + rctl = E1000_READ_REG(&adapter->hw, RCTL); + E1000_WRITE_REG(&adapter->hw, RCTL, rctl & ~E1000_RCTL_EN); + E1000_WRITE_REG(&adapter->hw, RDBAL, + ((uint64_t) rxdr->dma & 0xFFFFFFFF)); + E1000_WRITE_REG(&adapter->hw, RDBAH, ((uint64_t) rxdr->dma >> 32)); + E1000_WRITE_REG(&adapter->hw, RDLEN, rxdr->size); + E1000_WRITE_REG(&adapter->hw, RDH, 0); + E1000_WRITE_REG(&adapter->hw, RDT, 0); + rctl = E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_SZ_2048 | + E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF | + (adapter->hw.mc_filter_type << E1000_RCTL_MO_SHIFT); + E1000_WRITE_REG(&adapter->hw, RCTL, rctl); + + for(i = 0; i < rxdr->count; i++) { + struct e1000_rx_desc *rx_desc = E1000_RX_DESC(*rxdr, i); + struct sk_buff *skb; + + if(!(skb = alloc_skb(E1000_RXBUFFER_2048 + 2, GFP_KERNEL))) { + ret_val = 6; + goto err_nomem; + } + skb_reserve(skb, 2); + rxdr->buffer_info[i].skb = skb; + rxdr->buffer_info[i].length = E1000_RXBUFFER_2048; + rxdr->buffer_info[i].dma = + pci_map_single(pdev, skb->data, E1000_RXBUFFER_2048, + PCI_DMA_FROMDEVICE); + rx_desc->buffer_addr = cpu_to_le64(rxdr->buffer_info[i].dma); + memset(skb->data, 0x00, skb->len); + } + + return 0; + + err_nomem: + e1000_free_desc_rings(adapter); + return ret_val; +} + +static void +e1000_phy_disable_receiver(struct e1000_adapter *adapter) +{ + /* Write out to PHY registers 29 and 30 to disable the Receiver. */ + e1000_write_phy_reg(&adapter->hw, 29, 0x001F); + e1000_write_phy_reg(&adapter->hw, 30, 0x8FFC); + e1000_write_phy_reg(&adapter->hw, 29, 0x001A); + e1000_write_phy_reg(&adapter->hw, 30, 0x8FF0); + + return; +} + +static void +e1000_phy_reset_clk_and_crs(struct e1000_adapter *adapter) +{ + uint16_t phy_reg; + + /* Because we reset the PHY above, we need to re-force TX_CLK in the + * Extended PHY Specific Control Register to 25MHz clock. This + * value defaults back to a 2.5MHz clock when the PHY is reset. + */ + e1000_read_phy_reg(&adapter->hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_reg); + phy_reg |= M88E1000_EPSCR_TX_CLK_25; + e1000_write_phy_reg(&adapter->hw, + M88E1000_EXT_PHY_SPEC_CTRL, phy_reg); + + /* In addition, because of the s/w reset above, we need to enable + * CRS on TX. This must be set for both full and half duplex + * operation. + */ + e1000_read_phy_reg(&adapter->hw, M88E1000_PHY_SPEC_CTRL, &phy_reg); + phy_reg |= M88E1000_PSCR_ASSERT_CRS_ON_TX; + e1000_write_phy_reg(&adapter->hw, + M88E1000_PHY_SPEC_CTRL, phy_reg); +} + +static int +e1000_nonintegrated_phy_loopback(struct e1000_adapter *adapter) +{ + uint32_t ctrl_reg; + uint16_t phy_reg; + + /* Setup the Device Control Register for PHY loopback test. */ + + ctrl_reg = E1000_READ_REG(&adapter->hw, CTRL); + ctrl_reg |= (E1000_CTRL_ILOS | /* Invert Loss-Of-Signal */ + E1000_CTRL_FRCSPD | /* Set the Force Speed Bit */ + E1000_CTRL_FRCDPX | /* Set the Force Duplex Bit */ + E1000_CTRL_SPD_1000 | /* Force Speed to 1000 */ + E1000_CTRL_FD); /* Force Duplex to FULL */ + + E1000_WRITE_REG(&adapter->hw, CTRL, ctrl_reg); + + /* Read the PHY Specific Control Register (0x10) */ + e1000_read_phy_reg(&adapter->hw, M88E1000_PHY_SPEC_CTRL, &phy_reg); + + /* Clear Auto-Crossover bits in PHY Specific Control Register + * (bits 6:5). + */ + phy_reg &= ~M88E1000_PSCR_AUTO_X_MODE; + e1000_write_phy_reg(&adapter->hw, M88E1000_PHY_SPEC_CTRL, phy_reg); + + /* Perform software reset on the PHY */ + e1000_phy_reset(&adapter->hw); + + /* Have to setup TX_CLK and TX_CRS after software reset */ + e1000_phy_reset_clk_and_crs(adapter); + + e1000_write_phy_reg(&adapter->hw, PHY_CTRL, 0x8100); + + /* Wait for reset to complete. */ + udelay(500); + + /* Have to setup TX_CLK and TX_CRS after software reset */ + e1000_phy_reset_clk_and_crs(adapter); + + /* Write out to PHY registers 29 and 30 to disable the Receiver. */ + e1000_phy_disable_receiver(adapter); + + /* Set the loopback bit in the PHY control register. */ + e1000_read_phy_reg(&adapter->hw, PHY_CTRL, &phy_reg); + phy_reg |= MII_CR_LOOPBACK; + e1000_write_phy_reg(&adapter->hw, PHY_CTRL, phy_reg); + + /* Setup TX_CLK and TX_CRS one more time. */ + e1000_phy_reset_clk_and_crs(adapter); + + /* Check Phy Configuration */ + e1000_read_phy_reg(&adapter->hw, PHY_CTRL, &phy_reg); + if(phy_reg != 0x4100) + return 9; + + e1000_read_phy_reg(&adapter->hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_reg); + if(phy_reg != 0x0070) + return 10; + + e1000_read_phy_reg(&adapter->hw, 29, &phy_reg); + if(phy_reg != 0x001A) + return 11; + + return 0; +} + +static int +e1000_integrated_phy_loopback(struct e1000_adapter *adapter) +{ + uint32_t ctrl_reg = 0; + uint32_t stat_reg = 0; + + adapter->hw.autoneg = FALSE; + + if(adapter->hw.phy_type == e1000_phy_m88) { + /* Auto-MDI/MDIX Off */ + e1000_write_phy_reg(&adapter->hw, + M88E1000_PHY_SPEC_CTRL, 0x0808); + /* reset to update Auto-MDI/MDIX */ + e1000_write_phy_reg(&adapter->hw, PHY_CTRL, 0x9140); + /* autoneg off */ + e1000_write_phy_reg(&adapter->hw, PHY_CTRL, 0x8140); + } + /* force 1000, set loopback */ + e1000_write_phy_reg(&adapter->hw, PHY_CTRL, 0x4140); + + /* Now set up the MAC to the same speed/duplex as the PHY. */ + ctrl_reg = E1000_READ_REG(&adapter->hw, CTRL); + ctrl_reg &= ~E1000_CTRL_SPD_SEL; /* Clear the speed sel bits */ + ctrl_reg |= (E1000_CTRL_FRCSPD | /* Set the Force Speed Bit */ + E1000_CTRL_FRCDPX | /* Set the Force Duplex Bit */ + E1000_CTRL_SPD_1000 |/* Force Speed to 1000 */ + E1000_CTRL_FD); /* Force Duplex to FULL */ + + if(adapter->hw.media_type == e1000_media_type_copper && + adapter->hw.phy_type == e1000_phy_m88) { + ctrl_reg |= E1000_CTRL_ILOS; /* Invert Loss of Signal */ + } else { + /* Set the ILOS bit on the fiber Nic is half + * duplex link is detected. */ + stat_reg = E1000_READ_REG(&adapter->hw, STATUS); + if((stat_reg & E1000_STATUS_FD) == 0) + ctrl_reg |= (E1000_CTRL_ILOS | E1000_CTRL_SLU); + } + + E1000_WRITE_REG(&adapter->hw, CTRL, ctrl_reg); + + /* Disable the receiver on the PHY so when a cable is plugged in, the + * PHY does not begin to autoneg when a cable is reconnected to the NIC. + */ + if(adapter->hw.phy_type == e1000_phy_m88) + e1000_phy_disable_receiver(adapter); + + udelay(500); + + return 0; +} + +static int +e1000_set_phy_loopback(struct e1000_adapter *adapter) +{ + uint16_t phy_reg = 0; + uint16_t count = 0; + + switch (adapter->hw.mac_type) { + case e1000_82543: + if(adapter->hw.media_type == e1000_media_type_copper) { + /* Attempt to setup Loopback mode on Non-integrated PHY. + * Some PHY registers get corrupted at random, so + * attempt this 10 times. + */ + while(e1000_nonintegrated_phy_loopback(adapter) && + count++ < 10); + if(count < 11) + return 0; + } + break; + + case e1000_82544: + case e1000_82540: + case e1000_82545: + case e1000_82546: + case e1000_82541: + case e1000_82547: + return e1000_integrated_phy_loopback(adapter); + break; + + default: + /* Default PHY loopback work is to read the MII + * control register and assert bit 14 (loopback mode). + */ + e1000_read_phy_reg(&adapter->hw, PHY_CTRL, &phy_reg); + phy_reg |= MII_CR_LOOPBACK; + e1000_write_phy_reg(&adapter->hw, PHY_CTRL, phy_reg); + return 0; + break; + } + + return 8; +} + +static int +e1000_setup_loopback_test(struct e1000_adapter *adapter) +{ + uint32_t rctl; + + if(adapter->hw.media_type == e1000_media_type_fiber) { + if(adapter->hw.mac_type == e1000_82545 || + adapter->hw.mac_type == e1000_82546) + return e1000_set_phy_loopback(adapter); + else { + rctl = E1000_READ_REG(&adapter->hw, RCTL); + rctl |= E1000_RCTL_LBM_TCVR; + E1000_WRITE_REG(&adapter->hw, RCTL, rctl); + return 0; + } + } else if(adapter->hw.media_type == e1000_media_type_copper) + return e1000_set_phy_loopback(adapter); + + return 7; +} + +static void +e1000_loopback_cleanup(struct e1000_adapter *adapter) +{ + uint32_t rctl; + uint16_t phy_reg; + + rctl = E1000_READ_REG(&adapter->hw, RCTL); + rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC); + E1000_WRITE_REG(&adapter->hw, RCTL, rctl); + + if(adapter->hw.media_type == e1000_media_type_copper || + (adapter->hw.media_type == e1000_media_type_fiber && + (adapter->hw.mac_type == e1000_82545 || + adapter->hw.mac_type == e1000_82546))) { + adapter->hw.autoneg = TRUE; + e1000_read_phy_reg(&adapter->hw, PHY_CTRL, &phy_reg); + if(phy_reg & MII_CR_LOOPBACK) { + phy_reg &= ~MII_CR_LOOPBACK; + e1000_write_phy_reg(&adapter->hw, PHY_CTRL, phy_reg); + e1000_phy_reset(&adapter->hw); + } + } +} + +static void +e1000_create_lbtest_frame(struct sk_buff *skb, unsigned int frame_size) +{ + memset(skb->data, 0xFF, frame_size); + frame_size = (frame_size % 2) ? (frame_size - 1) : frame_size; + memset(&skb->data[frame_size / 2], 0xAA, frame_size / 2 - 1); + memset(&skb->data[frame_size / 2 + 10], 0xBE, 1); + memset(&skb->data[frame_size / 2 + 12], 0xAF, 1); +} + +static int +e1000_check_lbtest_frame(struct sk_buff *skb, unsigned int frame_size) +{ + frame_size = (frame_size % 2) ? (frame_size - 1) : frame_size; + if(*(skb->data + 3) == 0xFF) { + if((*(skb->data + frame_size / 2 + 10) == 0xBE) && + (*(skb->data + frame_size / 2 + 12) == 0xAF)) { + return 0; + } + } + return 13; +} + +static int +e1000_run_loopback_test(struct e1000_adapter *adapter) +{ + struct e1000_desc_ring *txdr = &adapter->test_tx_ring; + struct e1000_desc_ring *rxdr = &adapter->test_rx_ring; + struct pci_dev *pdev = adapter->pdev; + int i; + + E1000_WRITE_REG(&adapter->hw, RDT, rxdr->count - 1); + + for(i = 0; i < 64; i++) { + e1000_create_lbtest_frame(txdr->buffer_info[i].skb, 1024); + pci_dma_sync_single(pdev, txdr->buffer_info[i].dma, + txdr->buffer_info[i].length, + PCI_DMA_TODEVICE); + } + E1000_WRITE_REG(&adapter->hw, TDT, i); + + msec_delay(200); + + pci_dma_sync_single(pdev, rxdr->buffer_info[0].dma, + rxdr->buffer_info[0].length, PCI_DMA_FROMDEVICE); + + return e1000_check_lbtest_frame(rxdr->buffer_info[0].skb, 1024); +} + +static int +e1000_loopback_test(struct e1000_adapter *adapter, uint64_t *data) +{ + if((*data = e1000_setup_desc_rings(adapter))) goto err_loopback; + if((*data = e1000_setup_loopback_test(adapter))) goto err_loopback; + *data = e1000_run_loopback_test(adapter); + e1000_loopback_cleanup(adapter); + e1000_free_desc_rings(adapter); +err_loopback: + return *data; +} + +static int +e1000_link_test(struct e1000_adapter *adapter, uint64_t *data) +{ + *data = 0; + e1000_check_for_link(&adapter->hw); + + if(!(E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_LU)) { + *data = 1; + } + return *data; +} + +static int +e1000_ethtool_test(struct e1000_adapter *adapter, + struct ethtool_test *eth_test, uint64_t *data) +{ + boolean_t if_running = netif_running(adapter->netdev); + + if(eth_test->flags == ETH_TEST_FL_OFFLINE) { + /* Offline tests */ + + /* Link test performed before hardware reset so autoneg doesn't + * interfere with test result */ + if(e1000_link_test(adapter, &data[4])) + eth_test->flags |= ETH_TEST_FL_FAILED; + + if(if_running) + e1000_down(adapter); + + e1000_reset(adapter); + if(e1000_reg_test(adapter, &data[0])) + eth_test->flags |= ETH_TEST_FL_FAILED; + + e1000_reset(adapter); + if(e1000_eeprom_test(adapter, &data[1])) + eth_test->flags |= ETH_TEST_FL_FAILED; + + e1000_reset(adapter); + if(e1000_intr_test(adapter, &data[2])) + eth_test->flags |= ETH_TEST_FL_FAILED; + + e1000_reset(adapter); + if(e1000_loopback_test(adapter, &data[3])) + eth_test->flags |= ETH_TEST_FL_FAILED; + + e1000_reset(adapter); + if(if_running) + e1000_up(adapter); + } else { + /* Online tests */ + if(e1000_link_test(adapter, &data[4])) + eth_test->flags |= ETH_TEST_FL_FAILED; + + /* Offline tests aren't run; pass by default */ + data[0] = 0; + data[1] = 0; + data[2] = 0; + data[3] = 0; + } + return 0; +} + static void e1000_ethtool_gwol(struct e1000_adapter *adapter, struct ethtool_wolinfo *wol) { @@ -443,24 +1305,46 @@ case ETHTOOL_GSTRINGS: { struct ethtool_gstrings gstrings = { ETHTOOL_GSTRINGS }; char *strings = NULL; + int err = 0; if(copy_from_user(&gstrings, addr, sizeof(gstrings))) return -EFAULT; switch(gstrings.string_set) { - case ETH_SS_STATS: + case ETH_SS_TEST: + gstrings.len = E1000_TEST_LEN; + strings = kmalloc(E1000_TEST_LEN * ETH_GSTRING_LEN, + GFP_KERNEL); + if(!strings) + return -ENOMEM; + memcpy(strings, e1000_gstrings_test, E1000_TEST_LEN * + ETH_GSTRING_LEN); + break; + case ETH_SS_STATS: { + int i; gstrings.len = E1000_STATS_LEN; - strings = *e1000_gstrings_stats; + strings = kmalloc(E1000_STATS_LEN * ETH_GSTRING_LEN, + GFP_KERNEL); + if(!strings) + return -ENOMEM; + for(i=0; i < E1000_STATS_LEN; i++) { + memcpy(&strings[i * ETH_GSTRING_LEN], + e1000_gstrings_stats[i].stat_string, + ETH_GSTRING_LEN); + } break; + } default: return -EOPNOTSUPP; } if(copy_to_user(addr, &gstrings, sizeof(gstrings))) - return -EFAULT; + err = -EFAULT; addr += offsetof(struct ethtool_gstrings, data); - if(copy_to_user(addr, strings, + if(!err && copy_to_user(addr, strings, gstrings.len * ETH_GSTRING_LEN)) - return -EFAULT; - return 0; + err = -EFAULT; + + kfree(strings); + return err; } case ETHTOOL_GREGS: { struct ethtool_regs regs = {ETHTOOL_GREGS}; @@ -522,16 +1406,14 @@ void *ptr; int err = 0; + if(copy_from_user(&eeprom, addr, sizeof(eeprom))) + return -EFAULT; + eeprom_buff = kmalloc(hw->eeprom.word_size * 2, GFP_KERNEL); - if(eeprom_buff == NULL) + if(!eeprom_buff) return -ENOMEM; - if(copy_from_user(&eeprom, addr, sizeof(eeprom))) { - err = -EFAULT; - goto err_geeprom_ioctl; - } - if((err = e1000_ethtool_geeprom(adapter, &eeprom, eeprom_buff))) goto err_geeprom_ioctl; @@ -565,15 +1447,42 @@ } case ETHTOOL_GSTATS: { struct { - struct ethtool_stats cmd; + struct ethtool_stats eth_stats; uint64_t data[E1000_STATS_LEN]; } stats = { {ETHTOOL_GSTATS, E1000_STATS_LEN} }; int i; for(i = 0; i < E1000_STATS_LEN; i++) - stats.data[i] = - ((unsigned long *)&adapter->net_stats)[i]; + stats.data[i] = (e1000_gstrings_stats[i].sizeof_stat == + sizeof(uint64_t)) ? + *(uint64_t *)((char *)adapter + + e1000_gstrings_stats[i].stat_offset) : + *(uint32_t *)((char *)adapter + + e1000_gstrings_stats[i].stat_offset); if(copy_to_user(addr, &stats, sizeof(stats))) + return -EFAULT; + return 0; + } + case ETHTOOL_TEST: { + struct { + struct ethtool_test eth_test; + uint64_t data[E1000_TEST_LEN]; + } test = { {ETHTOOL_TEST} }; + int err; + + if(!capable(CAP_NET_ADMIN)) + return -EPERM; + + if(copy_from_user(&test.eth_test, addr, sizeof(test.eth_test))) + return -EFAULT; + + test.eth_test.len = E1000_TEST_LEN; + + if((err = e1000_ethtool_test(adapter, &test.eth_test, + test.data))) + return err; + + if(copy_to_user(addr, &test, sizeof(test)) != 0) return -EFAULT; return 0; } diff -Nru a/drivers/net/e1000/e1000_hw.c b/drivers/net/e1000/e1000_hw.c --- a/drivers/net/e1000/e1000_hw.c Thu Jun 19 23:46:52 2003 +++ b/drivers/net/e1000/e1000_hw.c Thu Jun 19 23:46:52 2003 @@ -185,6 +185,7 @@ break; case E1000_DEV_ID_82546EB_COPPER: case E1000_DEV_ID_82546EB_FIBER: + case E1000_DEV_ID_82546EB_QUAD_COPPER: hw->mac_type = e1000_82546; break; case E1000_DEV_ID_82541EI: @@ -288,9 +289,7 @@ /* Configure activity LED after PHY reset */ led_ctrl = E1000_READ_REG(hw, LEDCTL); led_ctrl &= IGP_ACTIVITY_LED_MASK; - led_ctrl |= IGP_ACTIVITY_LED_ENABLE; - if(hw->mac_type == e1000_82547) - led_ctrl |= IGP_LED3_MODE; + led_ctrl |= (IGP_ACTIVITY_LED_ENABLE | IGP_LED3_MODE); E1000_WRITE_REG(hw, LEDCTL, led_ctrl); } @@ -737,9 +736,7 @@ /* Configure activity LED after PHY reset */ led_ctrl = E1000_READ_REG(hw, LEDCTL); led_ctrl &= IGP_ACTIVITY_LED_MASK; - led_ctrl |= IGP_ACTIVITY_LED_ENABLE; - if(hw->mac_type == e1000_82547) - led_ctrl |= IGP_LED3_MODE; + led_ctrl |= (IGP_ACTIVITY_LED_ENABLE | IGP_LED3_MODE); E1000_WRITE_REG(hw, LEDCTL, led_ctrl); if(hw->autoneg_advertised == ADVERTISE_1000_FULL) { @@ -2293,9 +2290,7 @@ /* Configure activity LED after PHY reset */ led_ctrl = E1000_READ_REG(hw, LEDCTL); led_ctrl &= IGP_ACTIVITY_LED_MASK; - led_ctrl |= IGP_ACTIVITY_LED_ENABLE; - if(hw->mac_type == e1000_82547) - led_ctrl |= IGP_LED3_MODE; + led_ctrl |= (IGP_ACTIVITY_LED_ENABLE | IGP_LED3_MODE); E1000_WRITE_REG(hw, LEDCTL, led_ctrl); } } @@ -3801,6 +3796,7 @@ case E1000_DEV_ID_82540EM_LOM: case E1000_DEV_ID_82545EM_COPPER: case E1000_DEV_ID_82546EB_COPPER: + case E1000_DEV_ID_82546EB_QUAD_COPPER: case E1000_DEV_ID_82541EI: case E1000_DEV_ID_82541EP: case E1000_DEV_ID_82547EI: @@ -3842,6 +3838,7 @@ case E1000_DEV_ID_82545EM_FIBER: case E1000_DEV_ID_82546EB_COPPER: case E1000_DEV_ID_82546EB_FIBER: + case E1000_DEV_ID_82546EB_QUAD_COPPER: case E1000_DEV_ID_82541EI: case E1000_DEV_ID_82541EP: case E1000_DEV_ID_82547EI: @@ -3896,6 +3893,7 @@ case E1000_DEV_ID_82540EM_LOM: case E1000_DEV_ID_82545EM_COPPER: case E1000_DEV_ID_82546EB_COPPER: + case E1000_DEV_ID_82546EB_QUAD_COPPER: case E1000_DEV_ID_82541EI: case E1000_DEV_ID_82541EP: case E1000_DEV_ID_82547EI: @@ -3949,6 +3947,7 @@ case E1000_DEV_ID_82540EM_LOM: case E1000_DEV_ID_82545EM_COPPER: case E1000_DEV_ID_82546EB_COPPER: + case E1000_DEV_ID_82546EB_QUAD_COPPER: case E1000_DEV_ID_82541EI: case E1000_DEV_ID_82541EP: case E1000_DEV_ID_82547EI: @@ -4206,7 +4205,11 @@ status = E1000_READ_REG(hw, STATUS); hw->bus_type = (status & E1000_STATUS_PCIX_MODE) ? e1000_bus_type_pcix : e1000_bus_type_pci; - if(hw->bus_type == e1000_bus_type_pci) { + + if(hw->device_id == E1000_DEV_ID_82546EB_QUAD_COPPER) { + hw->bus_speed = (hw->bus_type == e1000_bus_type_pci) ? + e1000_bus_speed_66 : e1000_bus_speed_120; + } else if(hw->bus_type == e1000_bus_type_pci) { hw->bus_speed = (status & E1000_STATUS_PCI66) ? e1000_bus_speed_66 : e1000_bus_speed_33; } else { diff -Nru a/drivers/net/e1000/e1000_hw.h b/drivers/net/e1000/e1000_hw.h --- a/drivers/net/e1000/e1000_hw.h Thu Jun 19 23:46:52 2003 +++ b/drivers/net/e1000/e1000_hw.h Thu Jun 19 23:46:52 2003 @@ -99,6 +99,7 @@ e1000_bus_speed_33, e1000_bus_speed_66, e1000_bus_speed_100, + e1000_bus_speed_120, e1000_bus_speed_133, e1000_bus_speed_reserved } e1000_bus_speed; @@ -314,10 +315,11 @@ #define E1000_DEV_ID_82545EM_FIBER 0x1011 #define E1000_DEV_ID_82546EB_COPPER 0x1010 #define E1000_DEV_ID_82546EB_FIBER 0x1012 +#define E1000_DEV_ID_82546EB_QUAD_COPPER 0x101D #define E1000_DEV_ID_82541EI 0x1013 #define E1000_DEV_ID_82541EP 0x1018 #define E1000_DEV_ID_82547EI 0x1019 -#define NUM_DEV_IDS 19 +#define NUM_DEV_IDS 20 #define NODE_ADDRESS_SIZE 6 #define ETH_LENGTH_OF_ADDRESS 6 @@ -601,7 +603,7 @@ #define E1000_EECD 0x00010 /* EEPROM/Flash Control - RW */ #define E1000_EERD 0x00014 /* EEPROM Read - RW */ #define E1000_CTRL_EXT 0x00018 /* Extended Device Control - RW */ -#define E1000_FLA 0x0001C /* Flash Access Register - RW */ +#define E1000_FLA 0x0001C /* Flash Access - RW */ #define E1000_MDIC 0x00020 /* MDI Control - RW */ #define E1000_FCAL 0x00028 /* Flow Control Address Low - RW */ #define E1000_FCAH 0x0002C /* Flow Control Address High -RW */ @@ -730,6 +732,7 @@ * the registers function in the same manner. */ #define E1000_82542_CTRL E1000_CTRL +#define E1000_82542_CTRL_DUP E1000_CTRL_DUP #define E1000_82542_STATUS E1000_STATUS #define E1000_82542_EECD E1000_EECD #define E1000_82542_EERD E1000_EERD @@ -1485,7 +1488,6 @@ #define E1000_COLLISION_DISTANCE 64 #define E1000_FDX_COLLISION_DISTANCE E1000_COLLISION_DISTANCE #define E1000_HDX_COLLISION_DISTANCE E1000_COLLISION_DISTANCE -#define E1000_GB_HDX_COLLISION_DISTANCE 512 #define E1000_COLD_SHIFT 12 /* The number of Transmit and Receive Descriptors must be a multiple of 8 */ diff -Nru a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c --- a/drivers/net/e1000/e1000_main.c Thu Jun 19 23:46:52 2003 +++ b/drivers/net/e1000/e1000_main.c Thu Jun 19 23:46:52 2003 @@ -30,7 +30,14 @@ /* Change Log * - * 5.0.43 3/5/03 + * 5.1.11 5/6/03 + * o Feature: Added support for 82546EB (Quad-port) hardware. + * o Feature: Added support for Diagnostics through Ethtool. + * o Cleanup: Removed /proc support. + * o Cleanup: Removed proprietary IDIAG interface. + * o Bug fix: TSO bug fixes. + * + * 5.0.42 3/5/03 * o Feature: Added support for 82541 and 82547 hardware. * o Feature: Added support for Intel Gigabit PHY (IGP) and a variety of * eeproms. @@ -46,51 +53,22 @@ * shared interrupt instances. * * 4.4.18 11/27/02 - * o Feature: Added user-settable knob for interrupt throttle rate (ITR). - * o Cleanup: removed large static array allocations. - * o Cleanup: C99 struct initializer format. - * o Bug fix: restore VLAN settings when interface is brought up. - * o Bug fix: return cleanly in probe if error in detecting MAC type. - * o Bug fix: Wake up on magic packet by default only if enabled in eeprom. - * o Bug fix: Validate MAC address in set_mac. - * o Bug fix: Throw away zero-length Tx skbs. - * o Bug fix: Make ethtool EEPROM acceses work on older versions of ethtool. - * - * 4.4.12 10/15/02 */ char e1000_driver_name[] = "e1000"; char e1000_driver_string[] = "Intel(R) PRO/1000 Network Driver"; -char e1000_driver_version[] = "5.0.43-k1"; +char e1000_driver_version[] = "5.1.11-k1"; char e1000_copyright[] = "Copyright (c) 1999-2003 Intel Corporation."; /* e1000_pci_tbl - PCI Device ID Table * - * Private driver_data field (last one) stores an index into e1000_strings * Wildcard entries (PCI_ANY_ID) should come last * Last entry must be all 0s * * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, - * Class, Class Mask, String Index } + * Class, Class Mask, private data (not used) } */ static struct pci_device_id e1000_pci_tbl[] __devinitdata = { - /* Intel(R) PRO/1000 Network Connection */ - {0x8086, 0x1000, 0x8086, 0x1000, 0, 0, 0}, - {0x8086, 0x1001, 0x8086, 0x1003, 0, 0, 0}, - {0x8086, 0x1004, 0x8086, 0x1004, 0, 0, 0}, - {0x8086, 0x1008, 0x8086, 0x1107, 0, 0, 0}, - {0x8086, 0x1009, 0x8086, 0x1109, 0, 0, 0}, - {0x8086, 0x100C, 0x8086, 0x1112, 0, 0, 0}, - {0x8086, 0x100E, 0x8086, 0x001E, 0, 0, 0}, - /* Compaq Gigabit Ethernet Server Adapter */ - {0x8086, 0x1000, 0x0E11, PCI_ANY_ID, 0, 0, 1}, - {0x8086, 0x1001, 0x0E11, PCI_ANY_ID, 0, 0, 1}, - {0x8086, 0x1004, 0x0E11, PCI_ANY_ID, 0, 0, 1}, - /* IBM Mobile, Desktop & Server Adapters */ - {0x8086, 0x1000, 0x1014, PCI_ANY_ID, 0, 0, 2}, - {0x8086, 0x1001, 0x1014, PCI_ANY_ID, 0, 0, 2}, - {0x8086, 0x1004, 0x1014, PCI_ANY_ID, 0, 0, 2}, - /* Generic */ {0x8086, 0x1000, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, {0x8086, 0x1001, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, {0x8086, 0x1004, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, @@ -106,6 +84,7 @@ {0x8086, 0x1016, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, {0x8086, 0x1017, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, {0x8086, 0x101E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + {0x8086, 0x101D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, {0x8086, 0x1013, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, {0x8086, 0x1019, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, /* required last entry */ @@ -114,12 +93,6 @@ MODULE_DEVICE_TABLE(pci, e1000_pci_tbl); -static char *e1000_strings[] = { - "Intel(R) PRO/1000 Network Connection", - "HP Gigabit Ethernet Server Adapter", - "IBM Mobile, Desktop & Server Adapters" -}; - /* Local Function Prototypes */ int e1000_up(struct e1000_adapter *adapter); @@ -130,7 +103,7 @@ static int e1000_init_module(void); static void e1000_exit_module(void); static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent); -static void e1000_remove(struct pci_dev *pdev); +static void __devexit e1000_remove(struct pci_dev *pdev); static int e1000_sw_init(struct e1000_adapter *adapter); static int e1000_open(struct net_device *netdev); static int e1000_close(struct net_device *netdev); @@ -155,8 +128,14 @@ static inline void e1000_irq_disable(struct e1000_adapter *adapter); static inline void e1000_irq_enable(struct e1000_adapter *adapter); static void e1000_intr(int irq, void *data, struct pt_regs *regs); -static boolean_t e1000_clean_tx_irq(struct e1000_adapter *adapter); +#ifdef CONFIG_E1000_NAPI +static int e1000_clean(struct net_device *netdev, int *budget); +static boolean_t e1000_clean_rx_irq(struct e1000_adapter *adapter, + int *work_done, int work_to_do); +#else static boolean_t e1000_clean_rx_irq(struct e1000_adapter *adapter); +#endif +static boolean_t e1000_clean_tx_irq(struct e1000_adapter *adapter); static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter); static int e1000_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd); static int e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, @@ -189,7 +168,6 @@ .priority = 0 }; - /* Exported from other modules */ extern void e1000_check_options(struct e1000_adapter *adapter); @@ -228,8 +206,9 @@ printk(KERN_INFO "%s\n", e1000_copyright); ret = pci_module_init(&e1000_driver); - if(ret >= 0) + if(ret >= 0) { register_reboot_notifier(&e1000_notifier_reboot); + } return ret; } @@ -418,6 +397,10 @@ netdev->do_ioctl = &e1000_ioctl; netdev->tx_timeout = &e1000_tx_timeout; netdev->watchdog_timeo = 5 * HZ; +#ifdef CONFIG_E1000_NAPI + netdev->poll = &e1000_clean; + netdev->weight = 64; +#endif netdev->vlan_rx_register = e1000_vlan_rx_register; netdev->vlan_rx_add_vid = e1000_vlan_rx_add_vid; netdev->vlan_rx_kill_vid = e1000_vlan_rx_kill_vid; @@ -428,7 +411,6 @@ netdev->base_addr = adapter->hw.io_base; adapter->bd_number = cards_found; - adapter->id_string = e1000_strings[ent->driver_data]; /* setup the private structure */ @@ -445,6 +427,12 @@ netdev->features = NETIF_F_SG; } +#ifdef NETIF_F_TSO + if((adapter->hw.mac_type >= e1000_82544) && + (adapter->hw.mac_type != e1000_82547)) + netdev->features |= NETIF_F_TSO; +#endif + if(pci_using_dac) netdev->features |= NETIF_F_HIGHDMA; @@ -483,15 +471,14 @@ (void (*)(void *))e1000_tx_timeout_task, netdev); register_netdev(netdev); - memcpy(adapter->ifname, netdev->name, IFNAMSIZ); - adapter->ifname[IFNAMSIZ-1] = 0; /* we're going to reset, so assume we have no link for now */ netif_carrier_off(netdev); netif_stop_queue(netdev); - printk(KERN_INFO "%s: %s\n", netdev->name, adapter->id_string); + printk(KERN_INFO "%s: Intel(R) PRO/1000 Network Connection\n", + netdev->name); e1000_check_options(adapter); /* Initial Wake on LAN setting @@ -551,7 +538,6 @@ e1000_phy_hw_reset(&adapter->hw); - iounmap(adapter->hw.hw_addr); pci_release_regions(pdev); @@ -814,8 +800,9 @@ e1000_config_collision_dist(&adapter->hw); - /* Setup Transmit Descriptor Settings for this adapter */ - adapter->txd_cmd = E1000_TXD_CMD_IFCS | E1000_TXD_CMD_IDE; + /* Setup Transmit Descriptor Settings for eop descriptor */ + adapter->txd_cmd = E1000_TXD_CMD_IDE | E1000_TXD_CMD_EOP | + E1000_TXD_CMD_IFCS; if(adapter->hw.report_tx_early == 1) adapter->txd_cmd |= E1000_TXD_CMD_RS; @@ -1413,10 +1400,63 @@ #define E1000_TX_FLAGS_CSUM 0x00000001 #define E1000_TX_FLAGS_VLAN 0x00000002 +#define E1000_TX_FLAGS_TSO 0x00000004 #define E1000_TX_FLAGS_VLAN_MASK 0xffff0000 #define E1000_TX_FLAGS_VLAN_SHIFT 16 static inline boolean_t +e1000_tso(struct e1000_adapter *adapter, struct sk_buff *skb) +{ +#ifdef NETIF_F_TSO + struct e1000_context_desc *context_desc; + int i; + uint8_t ipcss, ipcso, tucss, tucso, hdr_len; + uint16_t ipcse, tucse, mss; + + if(skb_shinfo(skb)->tso_size) { + hdr_len = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2)); + mss = skb_shinfo(skb)->tso_size; + skb->nh.iph->tot_len = 0; + skb->nh.iph->check = 0; + skb->h.th->check = ~csum_tcpudp_magic(skb->nh.iph->saddr, + skb->nh.iph->daddr, + 0, + IPPROTO_TCP, + 0); + ipcss = skb->nh.raw - skb->data; + ipcso = (void *)&(skb->nh.iph->check) - (void *)skb->data; + ipcse = skb->h.raw - skb->data - 1; + tucss = skb->h.raw - skb->data; + tucso = (void *)&(skb->h.th->check) - (void *)skb->data; + tucse = 0; + + i = adapter->tx_ring.next_to_use; + context_desc = E1000_CONTEXT_DESC(adapter->tx_ring, i); + + context_desc->lower_setup.ip_fields.ipcss = ipcss; + context_desc->lower_setup.ip_fields.ipcso = ipcso; + context_desc->lower_setup.ip_fields.ipcse = cpu_to_le16(ipcse); + context_desc->upper_setup.tcp_fields.tucss = tucss; + context_desc->upper_setup.tcp_fields.tucso = tucso; + context_desc->upper_setup.tcp_fields.tucse = cpu_to_le16(tucse); + context_desc->tcp_seg_setup.fields.mss = cpu_to_le16(mss); + context_desc->tcp_seg_setup.fields.hdr_len = hdr_len; + context_desc->cmd_and_length = cpu_to_le32( + E1000_TXD_CMD_DEXT | E1000_TXD_CMD_TSE | + E1000_TXD_CMD_IP | E1000_TXD_CMD_TCP | + (skb->len - (hdr_len))); + + if(++i == adapter->tx_ring.count) i = 0; + adapter->tx_ring.next_to_use = i; + + return TRUE; + } +#endif + + return FALSE; +} + +static inline boolean_t e1000_tx_csum(struct e1000_adapter *adapter, struct sk_buff *skb) { struct e1000_context_desc *context_desc; @@ -1434,8 +1474,7 @@ context_desc->upper_setup.tcp_fields.tucso = cso; context_desc->upper_setup.tcp_fields.tucse = 0; context_desc->tcp_seg_setup.data = 0; - context_desc->cmd_and_length = - cpu_to_le32(adapter->txd_cmd | E1000_TXD_CMD_DEXT); + context_desc->cmd_and_length = cpu_to_le32(E1000_TXD_CMD_DEXT); if(++i == adapter->tx_ring.count) i = 0; adapter->tx_ring.next_to_use = i; @@ -1450,11 +1489,16 @@ #define E1000_MAX_DATA_PER_TXD (1<tx_ring; int len = skb->len, offset = 0, size, count = 0, i; +#ifdef NETIF_F_TSO + int tso = skb_shinfo(skb)->tso_size; +#endif + int nr_frags = skb_shinfo(skb)->nr_frags; int f; len -= skb->data_len; @@ -1462,6 +1506,12 @@ while(len) { size = min(len, E1000_MAX_DATA_PER_TXD); +#ifdef NETIF_F_TSO + /* Workaround for premature desc write-backs + * in TSO mode. Append 4-byte sentinel desc */ + if(tso && !nr_frags && size == len && size > 4) + size -= 4; +#endif tx_ring->buffer_info[i].length = size; tx_ring->buffer_info[i].dma = pci_map_single(adapter->pdev, @@ -1476,7 +1526,7 @@ if(++i == tx_ring->count) i = 0; } - for(f = 0; f < skb_shinfo(skb)->nr_frags; f++) { + for(f = 0; f < nr_frags; f++) { struct skb_frag_struct *frag; frag = &skb_shinfo(skb)->frags[f]; @@ -1485,6 +1535,12 @@ while(len) { size = min(len, E1000_MAX_DATA_PER_TXD); +#ifdef NETIF_F_TSO + /* Workaround for premature desc write-backs + * in TSO mode. Append 4-byte sentinel desc */ + if(tso && f == (nr_frags-1) && size == len && size > 4) + size -= 4; +#endif tx_ring->buffer_info[i].length = size; tx_ring->buffer_info[i].dma = pci_map_page(adapter->pdev, @@ -1502,6 +1558,7 @@ } if(--i < 0) i = tx_ring->count - 1; tx_ring->buffer_info[i].skb = skb; + tx_ring->buffer_info[first].next_to_watch = i; return count; } @@ -1511,11 +1568,14 @@ { struct e1000_desc_ring *tx_ring = &adapter->tx_ring; struct e1000_tx_desc *tx_desc = NULL; - uint32_t txd_upper, txd_lower; + uint32_t txd_upper = 0, txd_lower = E1000_TXD_CMD_IFCS; int i; - txd_upper = 0; - txd_lower = adapter->txd_cmd; + if(tx_flags & E1000_TX_FLAGS_TSO) { + txd_lower |= E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D | + E1000_TXD_CMD_TSE; + txd_upper |= (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8; + } if(tx_flags & E1000_TX_FLAGS_CSUM) { txd_lower |= E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; @@ -1538,7 +1598,7 @@ if(++i == tx_ring->count) i = 0; } - tx_desc->lower.data |= cpu_to_le32(E1000_TXD_CMD_EOP); + tx_desc->lower.data |= cpu_to_le32(adapter->txd_cmd); /* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only @@ -1598,6 +1658,7 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) { struct e1000_adapter *adapter = netdev->priv; + unsigned int first; int tx_flags = 0; if(skb->len <= 0) { @@ -1623,10 +1684,14 @@ tx_flags |= (vlan_tx_tag_get(skb) << E1000_TX_FLAGS_VLAN_SHIFT); } - if(e1000_tx_csum(adapter, skb)) + first = adapter->tx_ring.next_to_use; + + if(e1000_tso(adapter, skb)) + tx_flags |= E1000_TX_FLAGS_TSO; + else if(e1000_tx_csum(adapter, skb)) tx_flags |= E1000_TX_FLAGS_CSUM; - e1000_tx_queue(adapter, e1000_tx_map(adapter, skb), tx_flags); + e1000_tx_queue(adapter, e1000_tx_map(adapter, skb, first), tx_flags); netdev->trans_start = jiffies; @@ -1858,6 +1923,7 @@ } if((hw->mac_type <= e1000_82546) && + (hw->phy_type == e1000_phy_m88) && !e1000_read_phy_reg(hw, M88E1000_RX_ERR_CNTR, &phy_tmp)) adapter->phy_stats.receive_errors += phy_tmp; } @@ -1904,7 +1970,9 @@ struct net_device *netdev = data; struct e1000_adapter *adapter = netdev->priv; uint32_t icr = E1000_READ_REG(&adapter->hw, ICR); +#ifndef CONFIG_E1000_NAPI int i; +#endif if(!icr) return; /* Not our interrupt */ @@ -1914,12 +1982,52 @@ mod_timer(&adapter->watchdog_timer, jiffies); } +#ifdef CONFIG_E1000_NAPI + if(netif_rx_schedule_prep(netdev)) { + + /* Disable interrupts and register for poll. The flush + of the posted write is intentionally left out. + */ + + atomic_inc(&adapter->irq_sem); + E1000_WRITE_REG(&adapter->hw, IMC, ~0); + __netif_rx_schedule(netdev); + } +#else for(i = 0; i < E1000_MAX_INTR; i++) if(!e1000_clean_rx_irq(adapter) && !e1000_clean_tx_irq(adapter)) break; +#endif +} +#ifdef CONFIG_E1000_NAPI +/** + * e1000_clean - NAPI Rx polling callback + * @adapter: board private structure + **/ + +static int +e1000_clean(struct net_device *netdev, int *budget) +{ + struct e1000_adapter *adapter = netdev->priv; + int work_to_do = min(*budget, netdev->quota); + int work_done = 0; + + e1000_clean_tx_irq(adapter); + e1000_clean_rx_irq(adapter, &work_done, work_to_do); + + *budget -= work_done; + netdev->quota -= work_done; + + if(work_done < work_to_do) { + netif_rx_complete(netdev); + e1000_irq_enable(adapter); + } + + return (work_done >= work_to_do); } +#endif /** * e1000_clean_tx_irq - Reclaim resources after transmit completes @@ -1932,39 +2040,47 @@ struct e1000_desc_ring *tx_ring = &adapter->tx_ring; struct net_device *netdev = adapter->netdev; struct pci_dev *pdev = adapter->pdev; - struct e1000_tx_desc *tx_desc; - int i, cleaned = FALSE; + struct e1000_tx_desc *tx_desc, *eop_desc; + struct e1000_buffer *buffer_info; + int i, eop, cleaned = FALSE; i = tx_ring->next_to_clean; - tx_desc = E1000_TX_DESC(*tx_ring, i); + eop = tx_ring->buffer_info[i].next_to_watch; + eop_desc = E1000_TX_DESC(*tx_ring, eop); - while(tx_desc->upper.data & cpu_to_le32(E1000_TXD_STAT_DD)) { + while(eop_desc->upper.data & cpu_to_le32(E1000_TXD_STAT_DD)) { - cleaned = TRUE; + for(cleaned = FALSE; !cleaned; ) { + tx_desc = E1000_TX_DESC(*tx_ring, i); + buffer_info = &tx_ring->buffer_info[i]; - if(tx_ring->buffer_info[i].dma) { + if(buffer_info->dma) { - pci_unmap_page(pdev, - tx_ring->buffer_info[i].dma, - tx_ring->buffer_info[i].length, - PCI_DMA_TODEVICE); + pci_unmap_page(pdev, + buffer_info->dma, + buffer_info->length, + PCI_DMA_TODEVICE); - tx_ring->buffer_info[i].dma = 0; - } + buffer_info->dma = 0; + } - if(tx_ring->buffer_info[i].skb) { + if(buffer_info->skb) { - dev_kfree_skb_any(tx_ring->buffer_info[i].skb); + dev_kfree_skb_any(buffer_info->skb); - tx_ring->buffer_info[i].skb = NULL; - } + buffer_info->skb = NULL; + } - tx_desc->buffer_addr = 0; - tx_desc->lower.data = 0; - tx_desc->upper.data = 0; + tx_desc->buffer_addr = 0; + tx_desc->lower.data = 0; + tx_desc->upper.data = 0; - if(++i == tx_ring->count) i = 0; - tx_desc = E1000_TX_DESC(*tx_ring, i); + cleaned = (i == eop); + if(++i == tx_ring->count) i = 0; + } + + eop = tx_ring->buffer_info[i].next_to_watch; + eop_desc = E1000_TX_DESC(*tx_ring, eop); } tx_ring->next_to_clean = i; @@ -1981,7 +2097,12 @@ **/ static boolean_t +#ifdef CONFIG_E1000_NAPI +e1000_clean_rx_irq(struct e1000_adapter *adapter, int *work_done, + int work_to_do) +#else e1000_clean_rx_irq(struct e1000_adapter *adapter) +#endif { struct e1000_desc_ring *rx_ring = &adapter->rx_ring; struct net_device *netdev = adapter->netdev; @@ -1998,6 +2119,13 @@ while(rx_desc->status & E1000_RXD_STAT_DD) { +#ifdef CONFIG_E1000_NAPI + if(*work_done >= work_to_do) + break; + + (*work_done)++; +#endif + cleaned = TRUE; pci_unmap_single(pdev, @@ -2060,12 +2188,21 @@ e1000_rx_checksum(adapter, rx_desc, skb); skb->protocol = eth_type_trans(skb, netdev); +#ifdef CONFIG_E1000_NAPI + if(adapter->vlgrp && (rx_desc->status & E1000_RXD_STAT_VP)) { + vlan_hwaccel_receive_skb(skb, adapter->vlgrp, + (rx_desc->special & E1000_RXD_SPC_VLAN_MASK)); + } else { + netif_receive_skb(skb); + } +#else /* CONFIG_E1000_NAPI */ if(adapter->vlgrp && (rx_desc->status & E1000_RXD_STAT_VP)) { vlan_hwaccel_rx(skb, adapter->vlgrp, (rx_desc->special & E1000_RXD_SPC_VLAN_MASK)); } else { netif_rx(skb); } +#endif /* CONFIG_E1000_NAPI */ netdev->last_rx = jiffies; rx_desc->status = 0; @@ -2517,7 +2654,6 @@ } return NOTIFY_DONE; } - static int e1000_suspend(struct pci_dev *pdev, uint32_t state) diff -Nru a/drivers/net/e1000/e1000_osdep.h b/drivers/net/e1000/e1000_osdep.h --- a/drivers/net/e1000/e1000_osdep.h Thu Jun 19 23:46:52 2003 +++ b/drivers/net/e1000/e1000_osdep.h Thu Jun 19 23:46:52 2003 @@ -27,7 +27,7 @@ *******************************************************************************/ -/* glue for the OS independant part of e1000 +/* glue for the OS independent part of e1000 * includes register access macros */ diff -Nru a/drivers/net/eepro.c b/drivers/net/eepro.c --- a/drivers/net/eepro.c Thu Jun 19 23:46:52 2003 +++ b/drivers/net/eepro.c Thu Jun 19 23:46:52 2003 @@ -1716,7 +1716,7 @@ static int n_eepro; /* For linux 2.1.xx */ -MODULE_AUTHOR("Pascal Dupuis for the 2.1 stuff (locking,...)"); +MODULE_AUTHOR("Pascal Dupuis, and aris@cathedrallabs.org"); MODULE_DESCRIPTION("Intel i82595 ISA EtherExpressPro10/10+ driver"); MODULE_LICENSE("GPL"); diff -Nru a/drivers/net/eepro100.c b/drivers/net/eepro100.c --- a/drivers/net/eepro100.c Thu Jun 19 23:46:52 2003 +++ b/drivers/net/eepro100.c Thu Jun 19 23:46:52 2003 @@ -2392,6 +2392,7 @@ { PCI_VENDOR_ID_INTEL, 0x103C, PCI_ANY_ID, PCI_ANY_ID, }, { PCI_VENDOR_ID_INTEL, 0x103D, PCI_ANY_ID, PCI_ANY_ID, }, { PCI_VENDOR_ID_INTEL, 0x103E, PCI_ANY_ID, PCI_ANY_ID, }, + { PCI_VENDOR_ID_INTEL, 0x1050, PCI_ANY_ID, PCI_ANY_ID, }, { PCI_VENDOR_ID_INTEL, 0x1059, PCI_ANY_ID, PCI_ANY_ID, }, { PCI_VENDOR_ID_INTEL, 0x1227, PCI_ANY_ID, PCI_ANY_ID, }, { PCI_VENDOR_ID_INTEL, 0x1228, PCI_ANY_ID, PCI_ANY_ID, }, diff -Nru a/drivers/net/ns83820.c b/drivers/net/ns83820.c --- a/drivers/net/ns83820.c Thu Jun 19 23:46:53 2003 +++ b/drivers/net/ns83820.c Thu Jun 19 23:46:53 2003 @@ -1766,7 +1766,7 @@ int using_dac = 0; /* See if we can set the dma mask early on; failure is fatal. */ - if (TRY_DAC && !pci_set_dma_mask(pci_dev, 0xffffffffffffffff)) { + if (TRY_DAC && !pci_set_dma_mask(pci_dev, 0xffffffffffffffffULL)) { using_dac = 1; } else if (!pci_set_dma_mask(pci_dev, 0xffffffff)) { using_dac = 0; diff -Nru a/drivers/net/pci-skeleton.c b/drivers/net/pci-skeleton.c --- a/drivers/net/pci-skeleton.c Thu Jun 19 23:46:51 2003 +++ b/drivers/net/pci-skeleton.c Thu Jun 19 23:46:51 2003 @@ -602,7 +602,7 @@ *ioaddr_out = NULL; *dev_out = NULL; - /* dev zeroed in init_etherdev */ + /* dev zeroed in alloc_etherdev */ dev = alloc_etherdev (sizeof (*tp)); if (dev == NULL) { printk (KERN_ERR PFX "unable to alloc new ethernet\n"); @@ -789,7 +789,7 @@ dev->irq = pdev->irq; dev->base_addr = (unsigned long) ioaddr; - /* dev->priv/tp zeroed and aligned in init_etherdev */ + /* dev->priv/tp zeroed and aligned in alloc_etherdev */ tp = dev->priv; /* note: tp->chipset set in netdrv_init_board */ diff -Nru a/drivers/net/pcnet32.c b/drivers/net/pcnet32.c --- a/drivers/net/pcnet32.c Thu Jun 19 23:46:52 2003 +++ b/drivers/net/pcnet32.c Thu Jun 19 23:46:52 2003 @@ -974,7 +974,9 @@ } skb_reserve (rx_skbuff, 2); } - lp->rx_dma_addr[i] = pci_map_single(lp->pci_dev, rx_skbuff->tail, rx_skbuff->len, PCI_DMA_FROMDEVICE); + + if (lp->rx_dma_addr[i] == 0) + lp->rx_dma_addr[i] = pci_map_single(lp->pci_dev, rx_skbuff->tail, rx_skbuff->len, PCI_DMA_FROMDEVICE); lp->rx_ring[i].base = (u32)le32_to_cpu(lp->rx_dma_addr[i]); lp->rx_ring[i].buf_length = le16_to_cpu(-PKT_BUF_SZ); lp->rx_ring[i].status = le16_to_cpu(0x8000); @@ -1009,7 +1011,7 @@ /* ReInit Ring */ lp->a.write_csr (ioaddr, 0, 1); i = 0; - while (i++ < 100) + while (i++ < 1000) if (lp->a.read_csr (ioaddr, 0) & 0x0100) break; @@ -1100,6 +1102,7 @@ lp->tx_skbuff[entry] = skb; lp->tx_dma_addr[entry] = pci_map_single(lp->pci_dev, skb->data, skb->len, PCI_DMA_TODEVICE); lp->tx_ring[entry].base = (u32)le32_to_cpu(lp->tx_dma_addr[entry]); + wmb(); /* Make sure owner changes after all others are visible */ lp->tx_ring[entry].status = le16_to_cpu(status); lp->cur_tx++; diff -Nru a/drivers/net/r8169.c b/drivers/net/r8169.c --- a/drivers/net/r8169.c Thu Jun 19 23:46:52 2003 +++ b/drivers/net/r8169.c Thu Jun 19 23:46:52 2003 @@ -42,6 +42,7 @@ #include #include #include + #include #define RTL8169_VERSION "1.2" @@ -364,8 +365,8 @@ *ioaddr_out = NULL; *dev_out = NULL; - // dev zeroed in init_etherdev - dev = init_etherdev(NULL, sizeof (*tp)); + // dev zeroed in alloc_etherdev + dev = alloc_etherdev(sizeof (*tp)); if (dev == NULL) { printk(KERN_ERR PFX "unable to alloc new ethernet\n"); return -ENOMEM; @@ -389,18 +390,18 @@ printk(KERN_ERR PFX "region #1 not an MMIO resource, aborting\n"); rc = -ENODEV; - goto err_out; + goto err_out_disable; } // check for weird/broken PCI region reporting if (mmio_len < RTL_MIN_IO_SIZE) { printk(KERN_ERR PFX "Invalid PCI region size(s), aborting\n"); rc = -ENODEV; - goto err_out; + goto err_out_disable; } rc = pci_request_regions(pdev, dev->name); if (rc) - goto err_out; + goto err_out_disable; // enable PCI bus-mastering pci_set_master(pdev); @@ -448,8 +449,10 @@ err_out_free_res: pci_release_regions(pdev); +err_out_disable: + pci_disable_device(pdev); + err_out: - unregister_netdev(dev); kfree(dev); return rc; } @@ -462,7 +465,7 @@ void *ioaddr = NULL; static int board_idx = -1; static int printed_version = 0; - int i; + int i, rc; int option = -1, Cap10_100 = 0, Cap1000 = 0; assert(pdev != NULL); @@ -475,20 +478,18 @@ printed_version = 1; } - i = rtl8169_init_board(pdev, &dev, &ioaddr); - if (i < 0) { - return i; - } + rc = rtl8169_init_board(pdev, &dev, &ioaddr); + if (rc) + return rc; tp = dev->priv; assert(ioaddr != NULL); assert(dev != NULL); assert(tp != NULL); - // Get MAC address // - for (i = 0; i < MAC_ADDR_LEN; i++) { + // Get MAC address. FIXME: read EEPROM + for (i = 0; i < MAC_ADDR_LEN; i++) dev->dev_addr[i] = RTL_R8(MAC0 + i); - } dev->open = rtl8169_open; dev->hard_start_xmit = rtl8169_start_xmit; @@ -505,11 +506,20 @@ tp->pci_dev = pdev; tp->mmio_addr = ioaddr; + spin_lock_init(&tp->lock); + + rc = register_netdev(dev); + if (rc) { + iounmap(ioaddr); + pci_release_regions(pdev); + pci_disable_device(pdev); + kfree(dev); + return rc; + } + printk(KERN_DEBUG "%s: Identified chip type is '%s'.\n", dev->name, rtl_chip_info[tp->chipset].name); - spin_lock_init(&tp->lock); - pci_set_drvdata(pdev, dev); printk(KERN_INFO "%s: %s at 0x%lx, " @@ -621,7 +631,7 @@ rtl8169_remove_one(struct pci_dev *pdev) { struct net_device *dev = pci_get_drvdata(pdev); - struct rtl8169_private *tp = (struct rtl8169_private *) (dev->priv); + struct rtl8169_private *tp = dev->priv; assert(dev != NULL); assert(tp != NULL); @@ -634,6 +644,7 @@ memset(dev, 0xBC, sizeof (struct net_device) + sizeof (struct rtl8169_private)); + pci_disable_device(pdev); kfree(dev); pci_set_drvdata(pdev, NULL); } @@ -821,10 +832,9 @@ void *ioaddr = tp->mmio_addr; int entry = tp->cur_tx % NUM_TX_DESC; - if(skb->len < ETH_ZLEN) - { + if (skb->len < ETH_ZLEN) { skb = skb_padto(skb, ETH_ZLEN); - if(skb == NULL) + if (skb == NULL) return 0; } diff -Nru a/drivers/net/sis900.c b/drivers/net/sis900.c --- a/drivers/net/sis900.c Thu Jun 19 23:46:52 2003 +++ b/drivers/net/sis900.c Thu Jun 19 23:46:52 2003 @@ -201,7 +201,7 @@ static int sis900_close(struct net_device *net_dev); static int mii_ioctl(struct net_device *net_dev, struct ifreq *rq, int cmd); static struct net_device_stats *sis900_get_stats(struct net_device *net_dev); -static u16 sis900_compute_hashtable_index(u8 *addr, u8 revision); +static u16 sis900_mcast_bitnr(u8 *addr, u8 revision); static void set_rx_mode(struct net_device *net_dev); static void sis900_reset(struct net_device *net_dev); static void sis630_set_eq(struct net_device *net_dev, u8 revision); @@ -213,7 +213,7 @@ static void sis900_set_mode (long ioaddr, int speed, int duplex); /** - * sis900_get_mac_addr: - Get MAC address for stand alone SiS900 model + * sis900_get_mac_addr - Get MAC address for stand alone SiS900 model * @pci_dev: the sis900 pci device * @net_dev: the net device to get address for * @@ -243,7 +243,7 @@ } /** - * sis630e_get_mac_addr: - Get MAC address for SiS630E model + * sis630e_get_mac_addr - Get MAC address for SiS630E model * @pci_dev: the sis900 pci device * @net_dev: the net device to get address for * @@ -276,7 +276,7 @@ /** - * sis635_get_mac_addr: - Get MAC address for SIS635 model + * sis635_get_mac_addr - Get MAC address for SIS635 model * @pci_dev: the sis900 pci device * @net_dev: the net device to get address for * @@ -312,7 +312,7 @@ } /** - * sis96x_get_mac_addr: - Get MAC address for SiS962 or SiS963 model + * sis96x_get_mac_addr - Get MAC address for SiS962 or SiS963 model * @pci_dev: the sis900 pci device * @net_dev: the net device to get address for * @@ -354,7 +354,7 @@ } /** - * sis900_probe: - Probe for sis900 device + * sis900_probe - Probe for sis900 device * @pci_dev: the sis900 pci device * @pci_id: the pci device ID * @@ -499,7 +499,7 @@ } /** - * sis900_mii_probe: - Probe MII PHY for sis900 + * sis900_mii_probe - Probe MII PHY for sis900 * @net_dev: the net device to probe for * * Search for total of 32 possible mii phy addresses. @@ -528,7 +528,7 @@ mii_status = mdio_read(net_dev, phy_addr, MII_STATUS); if (mii_status == 0xffff || mii_status == 0x0000) - /* the mii is not accessable, try next one */ + /* the mii is not accessible, try next one */ continue; if ((mii_phy = kmalloc(sizeof(struct mii_phy), GFP_KERNEL)) == NULL) { @@ -593,7 +593,7 @@ current->state = TASK_INTERRUPTIBLE; schedule_timeout(0); poll_bit ^= (mdio_read(net_dev, sis_priv->cur_phy, MII_STATUS) & poll_bit); - if (jiffies >= timeout) { + if (time_after_eq(jiffies, timeout)) { printk(KERN_WARNING "%s: reset phy and link down now\n", net_dev->name); return -ETIME; } @@ -619,7 +619,7 @@ } /** - * sis900_default_phy: - Select default PHY for sis900 mac. + * sis900_default_phy - Select default PHY for sis900 mac. * @net_dev: the net device to probe for * * Select first detected PHY with link as default. @@ -672,7 +672,7 @@ /** - * sis900_set_capability: - set the media capability of network adapter. + * sis900_set_capability - set the media capability of network adapter. * @net_dev : the net device to probe for * @phy : default PHY * @@ -702,7 +702,7 @@ #define eeprom_delay() inl(ee_addr) /** - * read_eeprom: - Read Serial EEPROM + * read_eeprom - Read Serial EEPROM * @ioaddr: base i/o address * @location: the EEPROM location to read * @@ -752,7 +752,7 @@ /* Read and write the MII management registers using software-generated serial MDIO protocol. Note that the command bits and data bits are - send out seperately */ + send out separately */ #define mdio_delay() inl(mdio_addr) static void mdio_idle(long mdio_addr) @@ -777,7 +777,7 @@ } /** - * mdio_read: - read MII PHY register + * mdio_read - read MII PHY register * @net_dev: the net device to read * @phy_id: the phy address to read * @location: the phy regiester id to read @@ -819,7 +819,7 @@ } /** - * mdio_write: - write MII PHY register + * mdio_write - write MII PHY register * @net_dev: the net device to write * @phy_id: the phy address to write * @location: the phy regiester id to write @@ -873,7 +873,7 @@ /** - * sis900_reset_phy: - reset sis900 mii phy. + * sis900_reset_phy - reset sis900 mii phy. * @net_dev: the net device to write * @phy_addr: default phy address * @@ -896,7 +896,7 @@ } /** - * sis900_open: - open sis900 device + * sis900_open - open sis900 device * @net_dev: the net device to open * * Do some initialization and start net interface. @@ -953,7 +953,7 @@ } /** - * sis900_init_rxfilter: - Initialize the Rx filter + * sis900_init_rxfilter - Initialize the Rx filter * @net_dev: the net device to initialize for * * Set receive filter address to our MAC address @@ -991,7 +991,7 @@ } /** - * sis900_init_tx_ring: - Initialize the Tx descriptor ring + * sis900_init_tx_ring - Initialize the Tx descriptor ring * @net_dev: the net device to initialize for * * Initialize the Tx descriptor ring, @@ -1024,7 +1024,7 @@ } /** - * sis900_init_rx_ring: - Initialize the Rx descriptor ring + * sis900_init_rx_ring - Initialize the Rx descriptor ring * @net_dev: the net device to initialize for * * Initialize the Rx descriptor ring, @@ -1078,7 +1078,7 @@ } /** - * sis630_set_eq: - set phy equalizer value for 630 LAN + * sis630_set_eq - set phy equalizer value for 630 LAN * @net_dev: the net device to set equalizer value * @revision: 630 LAN revision number * @@ -1166,7 +1166,7 @@ } /** - * sis900_timer: - sis900 timer routine + * sis900_timer - sis900 timer routine * @data: pointer to sis900 net device * * On each timer ticks we check two things, @@ -1236,7 +1236,7 @@ } /** - * sis900_check_mode: - check the media mode for sis900 + * sis900_check_mode - check the media mode for sis900 * @net_dev: the net device to be checked * @mii_phy: the mii phy * @@ -1267,7 +1267,7 @@ } /** - * sis900_set_mode: - Set the media mode of mac register. + * sis900_set_mode - Set the media mode of mac register. * @ioaddr: the address of the device * @speed : the transmit speed to be determined * @duplex: the duplex mode to be determined @@ -1311,7 +1311,7 @@ } /** - * sis900_auto_negotiate: Set the Auto-Negotiation Enable/Reset bit. + * sis900_auto_negotiate - Set the Auto-Negotiation Enable/Reset bit. * @net_dev: the net device to read mode for * @phy_addr: mii phy address * @@ -1345,7 +1345,7 @@ /** - * sis900_read_mode: - read media mode for sis900 internal phy + * sis900_read_mode - read media mode for sis900 internal phy * @net_dev: the net device to read mode for * @speed : the transmit speed to be determined * @duplex : the duplex mode to be determined @@ -1402,7 +1402,7 @@ } /** - * sis900_tx_timeout: - sis900 transmit timeout routine + * sis900_tx_timeout - sis900 transmit timeout routine * @net_dev: the net device to transmit * * print transmit timeout status @@ -1457,7 +1457,7 @@ } /** - * sis900_start_xmit: - sis900 start transmit routine + * sis900_start_xmit - sis900 start transmit routine * @skb: socket buffer pointer to put the data being transmitted * @net_dev: the net device to transmit with * @@ -1527,7 +1527,7 @@ } /** - * sis900_interrupt: - sis900 interrupt handler + * sis900_interrupt - sis900 interrupt handler * @irq: the irq number * @dev_instance: the client data object * @regs: snapshot of processor context @@ -1586,7 +1586,7 @@ } /** - * sis900_rx: - sis900 receive routine + * sis900_rx - sis900 receive routine * @net_dev: the net device which receives data * * Process receive interrupt events, @@ -1725,7 +1725,7 @@ } /** - * sis900_finish_xmit: - finish up transmission of packets + * sis900_finish_xmit - finish up transmission of packets * @net_dev: the net device to be transmitted on * * Check for error condition and free socket buffer etc @@ -1795,7 +1795,7 @@ } /** - * sis900_close: - close sis900 device + * sis900_close - close sis900 device * @net_dev: the net device to be closed * * Disable interrupts, stop the Tx and Rx Status Machine @@ -1851,7 +1851,7 @@ } /** - * netdev_ethtool_ioctl: - For the basic support of ethtool + * netdev_ethtool_ioctl - For the basic support of ethtool * @net_dev: the net device to command for * @useraddr: start address of interface request * @@ -1885,7 +1885,7 @@ } /** - * mii_ioctl: - process MII i/o control command + * mii_ioctl - process MII i/o control command * @net_dev: the net device to command for * @rq: parameter for command * @cmd: the i/o command @@ -1924,7 +1924,7 @@ } /** - * sis900_get_stats: - Get sis900 read/write statistics + * sis900_get_stats - Get sis900 read/write statistics * @net_dev: the net device to get statistics for * * get tx/rx statistics for sis900 @@ -1939,7 +1939,7 @@ } /** - * sis900_set_config: - Set media type by net_device.set_config + * sis900_set_config - Set media type by net_device.set_config * @dev: the net device for media type change * @map: ifmap passed by ifconfig * @@ -1976,7 +1976,7 @@ status = mdio_read(dev, mii_phy->phy_addr, MII_CONTROL); /* enable auto negotiation and reset the negotioation - (I dont really know what the auto negatiotiation reset + (I don't really know what the auto negatiotiation reset really means, but it sounds for me right to do one here)*/ mdio_write(dev, mii_phy->phy_addr, MII_CONTROL, status | MII_CNTL_AUTO | MII_CNTL_RST_AUTO); @@ -2036,7 +2036,7 @@ } /** - * sis900_compute_hashtable_index: - compute hashtable index + * sis900_mcast_bitnr - compute hashtable index * @addr: multicast address * @revision: revision id of chip * @@ -2046,7 +2046,7 @@ * multicast hash table. */ -static u16 sis900_compute_hashtable_index(u8 *addr, u8 revision) +static inline u16 sis900_mcast_bitnr(u8 *addr, u8 revision) { u32 crc = ether_crc(6, addr); @@ -2059,7 +2059,7 @@ } /** - * set_rx_mode: - Set SiS900 receive mode + * set_rx_mode - Set SiS900 receive mode * @net_dev: the net device to be set * * Set SiS900 receive mode for promiscuous, multicast, or broadcast mode. @@ -2100,9 +2100,11 @@ struct dev_mc_list *mclist; rx_mode = RFAAB; for (i = 0, mclist = net_dev->mc_list; mclist && i < net_dev->mc_count; - i++, mclist = mclist->next) - set_bit(sis900_compute_hashtable_index(mclist->dmi_addr, revision), - mc_filter); + i++, mclist = mclist->next) { + unsigned int bit_nr = + sis900_mcast_bitnr(mclist->dmi_addr, revision); + mc_filter[bit_nr >> 4] |= (1 << bit_nr); + } } /* update Multicast Hash Table in Receive Filter */ @@ -2131,7 +2133,7 @@ } /** - * sis900_reset: - Reset sis900 MAC + * sis900_reset - Reset sis900 MAC * @net_dev: the net device to reset * * reset sis900 MAC and wait until finished @@ -2166,7 +2168,7 @@ } /** - * sis900_remove: - Remove sis900 device + * sis900_remove - Remove sis900 device * @pci_dev: the pci device to be removed * * remove and release SiS900 net device @@ -2195,10 +2197,10 @@ } static struct pci_driver sis900_pci_driver = { - name: SIS900_MODULE_NAME, - id_table: sis900_pci_tbl, - probe: sis900_probe, - remove: __devexit_p(sis900_remove), + .name = SIS900_MODULE_NAME, + .id_table = sis900_pci_tbl, + .probe = sis900_probe, + .remove = __devexit_p(sis900_remove), }; static int __init sis900_init_module(void) diff -Nru a/drivers/net/sk98lin/skge.c b/drivers/net/sk98lin/skge.c --- a/drivers/net/sk98lin/skge.c Thu Jun 19 23:46:52 2003 +++ b/drivers/net/sk98lin/skge.c Thu Jun 19 23:46:52 2003 @@ -498,7 +498,7 @@ } /* Configure DMA attributes. */ - if (pci_set_dma_mask(pdev, (u64) 0xffffffffffffffff) && + if (pci_set_dma_mask(pdev, (u64) 0xffffffffffffffffULL) && pci_set_dma_mask(pdev, (u64) 0xffffffff)) continue; diff -Nru a/drivers/net/sundance.c b/drivers/net/sundance.c --- a/drivers/net/sundance.c Thu Jun 19 23:46:52 2003 +++ b/drivers/net/sundance.c Thu Jun 19 23:46:52 2003 @@ -71,19 +71,32 @@ Versin LK1.06b (D-Link): - New tx scheme, adaptive tx_coalesce - + + Version LK1.07 (D-Link): + - Fix tx bugs in big-endian machines + - Remove unused max_interrupt_work module parameter, the new + NAPI-like rx scheme doesn't need it. + - Remove redundancy get_stats() in intr_handler(), those + I/O access could affect performance in ARM-based system + - Add Linux software VLAN support + + Version LK1.08 (D-Link): + - Fix bug of custom mac address + (StationAddr register only accept word write) + + Version LK1.09 (D-Link): + - Fix the flowctrl bug. + - Set Pause bit in MII ANAR if flow control enabled. */ #define DRV_NAME "sundance" -#define DRV_VERSION "1.01+LK1.06b" -#define DRV_RELDATE "6-Nov-2002" +#define DRV_VERSION "1.01+LK1.09a" +#define DRV_RELDATE "16-May-2003" /* The user-configurable values. These may be modified when a driver module is loaded.*/ static int debug = 1; /* 1 normal messages, 0 quiet .. 7 verbose. */ -/* Maximum events (Rx packets, etc.) to handle at each interrupt. */ -static int max_interrupt_work = 0; /* Maximum number of multicast addresses to filter (vs. rx-all-multicast). Typical is a 64 element hash table based on the Ethernet CRC. */ static int multicast_filter_limit = 32; @@ -129,8 +142,7 @@ /* Operational parameters that usually are not changed. */ /* Time in jiffies before concluding the transmitter is hung. */ #define TX_TIMEOUT (4*HZ) - -#define PKT_BUF_SZ 1536 /* Size of each temporary Rx buffer.*/ +#define PKT_BUF_SZ 1536 /* Size of each temporary Rx buffer.*/ #ifndef __KERNEL__ #define __KERNEL__ @@ -181,12 +193,10 @@ MODULE_DESCRIPTION("Sundance Alta Ethernet driver"); MODULE_LICENSE("GPL"); -MODULE_PARM(max_interrupt_work, "i"); MODULE_PARM(debug, "i"); MODULE_PARM(rx_copybreak, "i"); MODULE_PARM(media, "1-" __MODULE_STRING(MAX_UNITS) "s"); MODULE_PARM(flowctrl, "i"); -MODULE_PARM_DESC(max_interrupt_work, "Sundance Alta maximum events handled per interrupt"); MODULE_PARM_DESC(debug, "Sundance Alta debug level (0-5)"); MODULE_PARM_DESC(rx_copybreak, "Sundance Alta copy breakpoint for copy-only-tiny-frames"); MODULE_PARM_DESC(flowctrl, "Sundance Alta flow control [0|1]"); @@ -502,6 +512,7 @@ static void netdev_error(struct net_device *dev, int intr_status); static void netdev_error(struct net_device *dev, int intr_status); static void set_rx_mode(struct net_device *dev); +static int __set_mac_addr(struct net_device *dev); static struct net_device_stats *get_stats(struct net_device *dev); static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); static int netdev_close(struct net_device *dev); @@ -662,8 +673,8 @@ np->an_enable = 1; } } - if (flowctrl == 0) - np->flowctrl = 0; + if (flowctrl == 1) + np->flowctrl = 1; } /* Fibre PHY? */ @@ -678,6 +689,9 @@ /* Reset PHY */ mdio_write (dev, np->phys[0], MII_BMCR, BMCR_RESET); mdelay (300); + /* If flow control enabled, we need to advertise it.*/ + if (np->flowctrl) + mdio_write (dev, np->phys[0], MII_ADVERTISE, np->mii_if.advertising | 0x0400); mdio_write (dev, np->phys[0], MII_BMCR, BMCR_ANENABLE|BMCR_ANRESTART); /* Force media type */ if (!np->an_enable) { @@ -847,17 +861,18 @@ if (netif_msg_ifup(np)) printk(KERN_DEBUG "%s: netdev_open() irq %d.\n", dev->name, dev->irq); - init_ring(dev); writel(np->rx_ring_dma, ioaddr + RxListPtr); /* The Tx list pointer is written as packets are queued. */ - for (i = 0; i < 6; i++) - writeb(dev->dev_addr[i], ioaddr + StationAddr + i); - /* Initialize other registers. */ + __set_mac_addr(dev); +#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) + writew(dev->mtu + 18, ioaddr + MaxFrameSize); +#else writew(dev->mtu + 14, ioaddr + MaxFrameSize); +#endif if (dev->mtu > 2047) writel(readl(ioaddr + ASICCtrl) | 0x0C, ioaddr + ASICCtrl); @@ -879,7 +894,7 @@ writeb(0x01, ioaddr + DebugCtrl1); netif_start_queue(dev); - writew(StatsEnable | RxEnable | TxEnable, ioaddr + MACCtrl1); + writew (StatsEnable | RxEnable | TxEnable, ioaddr + MACCtrl1); if (netif_msg_ifup(np)) printk(KERN_DEBUG "%s: Done netdev_open(), status: Rx %x Tx %x " @@ -925,7 +940,7 @@ printk(KERN_INFO "%s: Setting %s-duplex based on MII #%d " "negotiated capability %4.4x.\n", dev->name, duplex ? "full" : "half", np->phys[0], negotiated); - writew(duplex ? 0x20 : 0, ioaddr + MACCtrl0); + writew(readw(ioaddr + MACCtrl0) | duplex ? 0x20 : 0, ioaddr + MACCtrl0); } } @@ -951,7 +966,7 @@ { struct netdev_private *np = dev->priv; long ioaddr = dev->base_addr; - long flag; + unsigned long flag; netif_stop_queue(dev); tasklet_disable(&np->tx_tasklet); @@ -966,11 +981,11 @@ for (i=0; itx_ring_dma + i*sizeof(*np->tx_ring), - np->tx_ring[i].next_desc, - np->tx_ring[i].status, - (np->tx_ring[i].status >> 2) & 0xff, - np->tx_ring[i].frag[0].addr, - np->tx_ring[i].frag[0].length); + le32_to_cpu(np->tx_ring[i].next_desc), + le32_to_cpu(np->tx_ring[i].status), + (le32_to_cpu(np->tx_ring[i].status) >> 2) & 0xff, + le32_to_cpu(np->tx_ring[i].frag[0].addr), + le32_to_cpu(np->tx_ring[i].frag[0].length)); } printk(KERN_DEBUG "TxListPtr=%08x netif_queue_stopped=%d\n", readl(dev->base_addr + TxListPtr), @@ -1157,7 +1172,6 @@ struct net_device *dev = (struct net_device *)dev_instance; struct netdev_private *np; long ioaddr; - int boguscnt = max_interrupt_work; int hw_frame_id; int tx_cnt; int tx_status; @@ -1226,11 +1240,14 @@ int entry = np->dirty_tx % TX_RING_SIZE; struct sk_buff *skb; int sw_frame_id; - sw_frame_id = (np->tx_ring[entry].status >> 2) & 0xff; - if (sw_frame_id == hw_frame_id && - !(np->tx_ring[entry].status & 0x00010000)) + sw_frame_id = (le32_to_cpu( + np->tx_ring[entry].status) >> 2) & 0xff; + if (sw_frame_id == hw_frame_id && + !(le32_to_cpu(np->tx_ring[entry].status) + & 0x00010000)) break; - if (sw_frame_id == (hw_frame_id + 1) % TX_RING_SIZE) + if (sw_frame_id == (hw_frame_id + 1) % + TX_RING_SIZE) break; skb = np->tx_skbuff[entry]; /* Free the original skb. */ @@ -1248,7 +1265,8 @@ for (; np->cur_tx - np->dirty_tx > 0; np->dirty_tx++) { int entry = np->dirty_tx % TX_RING_SIZE; struct sk_buff *skb; - if (!(np->tx_ring[entry].status & 0x00010000)) + if (!(le32_to_cpu(np->tx_ring[entry].status) + & 0x00010000)) break; skb = np->tx_skbuff[entry]; /* Free the original skb. */ @@ -1271,15 +1289,7 @@ /* Abnormal error summary/uncommon events handlers. */ if (intr_status & (IntrPCIErr | LinkChange | StatsMax)) netdev_error(dev, intr_status); - if (--boguscnt < 0) { - get_stats(dev); - if (netif_msg_hw(np)) - printk(KERN_WARNING "%s: Too much work at interrupt, " - "status=0x%4.4x / 0x%4.4x.\n", - dev->name, intr_status, readw(ioaddr + IntrClear)); - break; - } - } while (1); + } while (0); if (netif_msg_intr(np)) printk(KERN_DEBUG "%s: exiting interrupt, status=%#4.4x.\n", dev->name, readw(ioaddr + IntrStatus)); @@ -1447,9 +1457,12 @@ "full" : "half"); } check_duplex (dev); - if (np->flowctrl == 0) - writew(readw(ioaddr + MACCtrl0) & ~EnbFlowCtrl, + if (np->flowctrl && np->mii_if.full_duplex) { + writew(readw(ioaddr + MulticastFilter1+2) | 0x0200, + ioaddr + MulticastFilter1+2); + writew(readw(ioaddr + MACCtrl0) | EnbFlowCtrl, ioaddr + MACCtrl0); + } } if (intr_status & StatsMax) { get_stats(dev); @@ -1463,8 +1476,8 @@ static struct net_device_stats *get_stats(struct net_device *dev) { - long ioaddr = dev->base_addr; struct netdev_private *np = dev->priv; + long ioaddr = dev->base_addr; int i; /* We should lock this segment of code for SMP eventually, although @@ -1477,7 +1490,7 @@ np->stats.collisions += readb(ioaddr + StatsLateColl); np->stats.collisions += readb(ioaddr + StatsMultiColl); np->stats.collisions += readb(ioaddr + StatsOneColl); - readb(ioaddr + StatsCarrierError); + np->stats.tx_carrier_errors += readb(ioaddr + StatsCarrierError); readb(ioaddr + StatsTxDefer); for (i = StatsTxDefer; i <= StatsMcastRx; i++) readb(ioaddr + i); @@ -1492,6 +1505,7 @@ static void set_rx_mode(struct net_device *dev) { long ioaddr = dev->base_addr; + struct netdev_private *np = dev->priv; u16 mc_filter[4]; /* Multicast hash filter */ u32 rx_mode; int i; @@ -1524,11 +1538,28 @@ writeb(AcceptBroadcast | AcceptMyPhys, ioaddr + RxMode); return; } + if (np->mii_if.full_duplex && np->flowctrl) + mc_filter[3] |= 0x0200; + for (i = 0; i < 4; i++) writew(mc_filter[i], ioaddr + MulticastFilter0 + i*2); writeb(rx_mode, ioaddr + RxMode); } +static int __set_mac_addr(struct net_device *dev) +{ + u16 addr16; + + addr16 = (dev->dev_addr[0] | (dev->dev_addr[1] << 8)); + writew(addr16, dev->base_addr + StationAddr); + addr16 = (dev->dev_addr[2] | (dev->dev_addr[3] << 8)); + writew(addr16, dev->base_addr + StationAddr+2); + addr16 = (dev->dev_addr[4] | (dev->dev_addr[5] << 8)); + writew(addr16, dev->base_addr + StationAddr+4); + return 0; +} + + static int netdev_ethtool_ioctl(struct net_device *dev, void *useraddr) { struct netdev_private *np = dev->priv; @@ -1615,6 +1646,7 @@ struct mii_ioctl_data *data = (struct mii_ioctl_data *) & rq->ifr_data; int rc; int i; + long ioaddr = dev->base_addr; if (!netif_running(dev)) return -EINVAL; @@ -1632,11 +1664,12 @@ for (i=0; itx_ring_dma + i*sizeof(*np->tx_ring), - np->tx_ring[i].next_desc, - np->tx_ring[i].status, - (np->tx_ring[i].status >> 2) & 0xff, - np->tx_ring[i].frag[0].addr, - np->tx_ring[i].frag[0].length); + le32_to_cpu(np->tx_ring[i].next_desc), + le32_to_cpu(np->tx_ring[i].status), + (le32_to_cpu(np->tx_ring[i].status) >> 2) + & 0xff, + le32_to_cpu(np->tx_ring[i].frag[0].addr), + le32_to_cpu(np->tx_ring[i].frag[0].length)); } printk(KERN_DEBUG "TxListPtr=%08x netif_queue_stopped=%d\n", readl(dev->base_addr + TxListPtr), @@ -1646,6 +1679,7 @@ np->dirty_tx, np->dirty_tx % TX_RING_SIZE); printk(KERN_DEBUG "cur_rx=%d dirty_rx=%d\n", np->cur_rx, np->dirty_rx); printk(KERN_DEBUG "cur_task=%d\n", np->cur_task); + printk(KERN_DEBUG "TxStatus=%04x\n", readw(ioaddr + TxStatus)); return 0; } @@ -1753,10 +1787,10 @@ } static struct pci_driver sundance_driver = { - name: DRV_NAME, - id_table: sundance_pci_tbl, - probe: sundance_probe1, - remove: __devexit_p(sundance_remove1), + .name = DRV_NAME, + .id_table = sundance_pci_tbl, + .probe = sundance_probe1, + .remove = __devexit_p(sundance_remove1), }; static int __init sundance_init(void) diff -Nru a/drivers/net/tg3.c b/drivers/net/tg3.c --- a/drivers/net/tg3.c Thu Jun 19 23:46:52 2003 +++ b/drivers/net/tg3.c Thu Jun 19 23:46:52 2003 @@ -6703,7 +6703,7 @@ } /* Configure DMA attributes. */ - if (!pci_set_dma_mask(pdev, (u64) 0xffffffffffffffff)) { + if (!pci_set_dma_mask(pdev, (u64) 0xffffffffffffffffULL)) { pci_using_dac = 1; } else { err = pci_set_dma_mask(pdev, (u64) 0xffffffff); diff -Nru a/drivers/net/tlan.c b/drivers/net/tlan.c --- a/drivers/net/tlan.c Thu Jun 19 23:46:52 2003 +++ b/drivers/net/tlan.c Thu Jun 19 23:46:52 2003 @@ -166,19 +166,18 @@ * Thanks to Gunnar Eikman *******************************************************************************/ - #include - -#include "tlan.h" - #include #include #include +#include #include #include #include +#include #include +#include "tlan.h" typedef u32 (TLanIntVectorFunc)( struct net_device *, u16 ); @@ -208,7 +207,6 @@ MODULE_PARM_DESC(speed, "ThunderLAN port speen setting(s) (0,10,100)"); MODULE_PARM_DESC(debug, "ThunderLAN debug mask"); MODULE_PARM_DESC(bbuf, "ThunderLAN use big buffer (0-1)"); -EXPORT_NO_SYMBOLS; /* Define this to enable Link beat monitoring */ #undef MONITOR @@ -218,10 +216,11 @@ static int bbuf; static u8 *TLanPadBuffer; +static dma_addr_t TLanPadBufferDMA; static char TLanSignature[] = "TLAN"; -static const char tlan_banner[] = "ThunderLAN driver v1.15\n"; -static int tlan_have_pci; -static int tlan_have_eisa; +static const char tlan_banner[] = "ThunderLAN driver v1.15\n"; +static int tlan_have_pci; +static int tlan_have_eisa; const char *media[] = { "10BaseT-HD ", "10BaseT-FD ","100baseTx-HD ", @@ -347,6 +346,27 @@ static int TLan_EeReadByte( struct net_device *, u8, u8 * ); +static void +TLan_StoreSKB( struct tlan_list_tag *tag, struct sk_buff *skb) +{ + unsigned long addr = (unsigned long)skb; + tag->buffer[9].address = (u32)addr; + addr >>= 31; /* >>= 32 is undefined for 32bit arch, stupid C */ + addr >>= 1; + tag->buffer[8].address = (u32)addr; +} + +static struct sk_buff * +TLan_GetSKB( struct tlan_list_tag *tag) +{ + unsigned long addr = tag->buffer[8].address; + addr <<= 31; + addr <<= 1; + addr |= tag->buffer[9].address; + return (struct sk_buff *) addr; +} + + static TLanIntVectorFunc *TLanIntVector[TLAN_INT_NUMBER_OF_INTS] = { TLan_HandleInvalid, TLan_HandleTxEOF, @@ -422,10 +442,10 @@ unregister_netdev( dev ); if ( priv->dmaStorage ) { - kfree( priv->dmaStorage ); + pci_free_consistent(priv->pciDev, priv->dmaSize, priv->dmaStorage, priv->dmaStorageDMA ); } - release_region( dev->base_addr, 0x10 ); + pci_release_regions(pdev); kfree( dev ); @@ -433,10 +453,10 @@ } static struct pci_driver tlan_driver = { - name: "tlan", - id_table: tlan_pci_tbl, - probe: tlan_init_one, - remove: __devexit_p(tlan_remove_one), + .name = "tlan", + .id_table = tlan_pci_tbl, + .probe = tlan_init_one, + .remove = __devexit_p(tlan_remove_one), }; static int __init tlan_probe(void) @@ -445,8 +465,7 @@ printk(KERN_INFO "%s", tlan_banner); - TLanPadBuffer = (u8 *) kmalloc(TLAN_MIN_FRAME_SIZE, - GFP_KERNEL); + TLanPadBuffer = (u8 *) pci_alloc_consistent(NULL, TLAN_MIN_FRAME_SIZE, &TLanPadBufferDMA); if (TLanPadBuffer == NULL) { printk(KERN_ERR "TLAN: Could not allocate memory for pad buffer.\n"); @@ -471,7 +490,7 @@ if (TLanDevicesInstalled == 0) { pci_unregister_driver(&tlan_driver); - kfree(TLanPadBuffer); + pci_free_consistent(NULL, TLAN_MIN_FRAME_SIZE, TLanPadBuffer, TLanPadBufferDMA); return -ENODEV; } return 0; @@ -512,26 +531,44 @@ TLanPrivateInfo *priv; u8 pci_rev; u16 device_id; - int reg; + int reg, rc = -ENODEV; + + if (pdev) { + rc = pci_enable_device(pdev); + if (rc) + return rc; - if (pdev && pci_enable_device(pdev)) - return -EIO; + rc = pci_request_regions(pdev, TLanSignature); + if (rc) { + printk(KERN_ERR "TLAN: Could not reserve IO regions\n"); + goto err_out; + } + } - dev = init_etherdev(NULL, sizeof(TLanPrivateInfo)); + dev = alloc_etherdev(sizeof(TLanPrivateInfo)); if (dev == NULL) { printk(KERN_ERR "TLAN: Could not allocate memory for device.\n"); - return -ENOMEM; + rc = -ENOMEM; + goto err_out_regions; } SET_MODULE_OWNER(dev); priv = dev->priv; + priv->pciDev = pdev; + /* Is this a PCI device? */ if (pdev) { u32 pci_io_base = 0; priv->adapter = &board_info[ent->driver_data]; + rc = pci_set_dma_mask(pdev, 0xFFFFFFFF); + if (rc) { + printk(KERN_ERR "TLAN: No suitable PCI mapping available.\n"); + goto err_out_free_dev; + } + pci_read_config_byte ( pdev, PCI_REVISION_ID, &pci_rev); for ( reg= 0; reg <= 5; reg ++ ) { @@ -544,9 +581,8 @@ } if (!pci_io_base) { printk(KERN_ERR "TLAN: No IO mappings available\n"); - unregister_netdev(dev); - kfree(dev); - return -ENODEV; + rc = -EIO; + goto err_out_free_dev; } dev->base_addr = pci_io_base; @@ -592,19 +628,22 @@ /* This will be used when we get an adapter error from * within our irq handler */ - INIT_LIST_HEAD(&priv->tlan_tqueue.list); - priv->tlan_tqueue.sync = 0; - priv->tlan_tqueue.routine = (void *)(void*)TLan_tx_timeout; - priv->tlan_tqueue.data = dev; + INIT_TQUEUE(&priv->tlan_tqueue, (void *)(void*)TLan_tx_timeout, dev); spin_lock_init(&priv->lock); - if (TLan_Init(dev)) { + rc = TLan_Init(dev); + if (rc) { + printk(KERN_ERR "TLAN: Could not set up device.\n"); + goto err_out_free_dev; + } + + rc = register_netdev(dev); + if (rc) { printk(KERN_ERR "TLAN: Could not register device.\n"); - unregister_netdev(dev); - kfree(dev); - return -EAGAIN; - } else { + goto err_out_uninit; + } + TLanDevicesInstalled++; boards_found++; @@ -625,8 +664,19 @@ priv->adapter->deviceLabel, priv->adapterRev); return 0; - } +err_out_uninit: + pci_free_consistent(priv->pciDev, priv->dmaSize, priv->dmaStorage, + priv->dmaStorageDMA ); +err_out_free_dev: + kfree(dev); +err_out_regions: + if (pdev) + pci_release_regions(pdev); +err_out: + if (pdev) + pci_disable_device(pdev); + return rc; } @@ -639,7 +689,7 @@ dev = TLan_Eisa_Devices; priv = dev->priv; if (priv->dmaStorage) { - kfree(priv->dmaStorage); + pci_free_consistent(priv->pciDev, priv->dmaSize, priv->dmaStorage, priv->dmaStorageDMA ); } release_region( dev->base_addr, 0x10); unregister_netdev( dev ); @@ -657,7 +707,7 @@ if (tlan_have_eisa) TLan_Eisa_Cleanup(); - kfree( TLanPadBuffer ); + pci_free_consistent(NULL, TLAN_MIN_FRAME_SIZE, TLanPadBuffer, TLanPadBufferDMA); } @@ -792,15 +842,6 @@ priv = dev->priv; - if (!priv->is_eisa) /* EISA devices have already requested IO */ - if (!request_region( dev->base_addr, 0x10, TLanSignature )) { - printk(KERN_ERR "TLAN: %s: IO port region 0x%lx size 0x%x in use.\n", - dev->name, - dev->base_addr, - 0x10 ); - return -EIO; - } - if ( bbuf ) { dma_size = ( TLAN_NUM_RX_LISTS + TLAN_NUM_TX_LISTS ) * ( sizeof(TLanList) + TLAN_MAX_FRAME_SIZE ); @@ -808,21 +849,25 @@ dma_size = ( TLAN_NUM_RX_LISTS + TLAN_NUM_TX_LISTS ) * ( sizeof(TLanList) ); } - priv->dmaStorage = kmalloc(dma_size, GFP_KERNEL | GFP_DMA); + priv->dmaStorage = pci_alloc_consistent(priv->pciDev, dma_size, &priv->dmaStorageDMA); + priv->dmaSize = dma_size; + if ( priv->dmaStorage == NULL ) { printk(KERN_ERR "TLAN: Could not allocate lists and buffers for %s.\n", dev->name ); - release_region( dev->base_addr, 0x10 ); return -ENOMEM; } memset( priv->dmaStorage, 0, dma_size ); priv->rxList = (TLanList *) ( ( ( (u32) priv->dmaStorage ) + 7 ) & 0xFFFFFFF8 ); + priv->rxListDMA = ( ( ( (u32) priv->dmaStorageDMA ) + 7 ) & 0xFFFFFFF8 ); priv->txList = priv->rxList + TLAN_NUM_RX_LISTS; + priv->txListDMA = priv->rxListDMA + sizeof(TLanList) * TLAN_NUM_RX_LISTS; if ( bbuf ) { priv->rxBuffer = (u8 *) ( priv->txList + TLAN_NUM_TX_LISTS ); - priv->txBuffer = priv->rxBuffer - + ( TLAN_NUM_RX_LISTS * TLAN_MAX_FRAME_SIZE ); + priv->rxBufferDMA =priv->txListDMA + sizeof(TLanList) * TLAN_NUM_TX_LISTS; + priv->txBuffer = priv->rxBuffer + ( TLAN_NUM_RX_LISTS * TLAN_MAX_FRAME_SIZE ); + priv->txBufferDMA = priv->rxBufferDMA + ( TLAN_NUM_RX_LISTS * TLAN_MAX_FRAME_SIZE ); } err = 0; @@ -1003,6 +1048,7 @@ { TLanPrivateInfo *priv = dev->priv; TLanList *tail_list; + dma_addr_t tail_list_phys; u8 *tail_buffer; int pad; unsigned long flags; @@ -1014,6 +1060,7 @@ } tail_list = priv->txList + priv->txTail; + tail_list_phys = priv->txListDMA + sizeof(TLanList) * priv->txTail; if ( tail_list->cStat != TLAN_CSTAT_UNUSED ) { TLAN_DBG( TLAN_DEBUG_TX, "TRANSMIT: %s is busy (Head=%d Tail=%d)\n", dev->name, priv->txHead, priv->txTail ); @@ -1028,8 +1075,8 @@ tail_buffer = priv->txBuffer + ( priv->txTail * TLAN_MAX_FRAME_SIZE ); memcpy( tail_buffer, skb->data, skb->len ); } else { - tail_list->buffer[0].address = virt_to_bus( skb->data ); - tail_list->buffer[9].address = (u32) skb; + tail_list->buffer[0].address = pci_map_single(priv->pciDev, skb->data, skb->len, PCI_DMA_TODEVICE); + TLan_StoreSKB(tail_list, skb); } pad = TLAN_MIN_FRAME_SIZE - skb->len; @@ -1038,7 +1085,7 @@ tail_list->frameSize = (u16) skb->len + pad; tail_list->buffer[0].count = (u32) skb->len; tail_list->buffer[1].count = TLAN_LAST_BUFFER | (u32) pad; - tail_list->buffer[1].address = virt_to_bus( TLanPadBuffer ); + tail_list->buffer[1].address = TLanPadBufferDMA; } else { tail_list->frameSize = (u16) skb->len; tail_list->buffer[0].count = TLAN_LAST_BUFFER | (u32) skb->len; @@ -1051,14 +1098,14 @@ if ( ! priv->txInProgress ) { priv->txInProgress = 1; TLAN_DBG( TLAN_DEBUG_TX, "TRANSMIT: Starting TX on buffer %d\n", priv->txTail ); - outl( virt_to_bus( tail_list ), dev->base_addr + TLAN_CH_PARM ); + outl( tail_list_phys, dev->base_addr + TLAN_CH_PARM ); outl( TLAN_HC_GO, dev->base_addr + TLAN_HOST_CMD ); } else { TLAN_DBG( TLAN_DEBUG_TX, "TRANSMIT: Adding buffer %d to TX channel\n", priv->txTail ); if ( priv->txTail == 0 ) { - ( priv->txList + ( TLAN_NUM_TX_LISTS - 1 ) )->forward = virt_to_bus( tail_list ); + ( priv->txList + ( TLAN_NUM_TX_LISTS - 1 ) )->forward = tail_list_phys; } else { - ( priv->txList + ( priv->txTail - 1 ) )->forward = virt_to_bus( tail_list ); + ( priv->txList + ( priv->txTail - 1 ) )->forward = tail_list_phys; } } spin_unlock_irqrestore(&priv->lock, flags); @@ -1344,6 +1391,7 @@ TLanPrivateInfo *priv = dev->priv; int eoc = 0; TLanList *head_list; + dma_addr_t head_list_phys; u32 ack = 0; u16 tmpCStat; @@ -1353,7 +1401,10 @@ while (((tmpCStat = head_list->cStat ) & TLAN_CSTAT_FRM_CMP) && (ack < 255)) { ack++; if ( ! bbuf ) { - dev_kfree_skb_any( (struct sk_buff *) head_list->buffer[9].address ); + struct sk_buff *skb = TLan_GetSKB(head_list); + pci_unmap_single(priv->pciDev, head_list->buffer[0].address, skb->len, PCI_DMA_TODEVICE); + dev_kfree_skb_any(skb); + head_list->buffer[8].address = 0; head_list->buffer[9].address = 0; } @@ -1374,8 +1425,9 @@ if ( eoc ) { TLAN_DBG( TLAN_DEBUG_TX, "TRANSMIT: Handling TX EOC (Head=%d Tail=%d)\n", priv->txHead, priv->txTail ); head_list = priv->txList + priv->txHead; + head_list_phys = priv->txListDMA + sizeof(TLanList) * priv->txHead; if ( ( head_list->cStat & TLAN_CSTAT_READY ) == TLAN_CSTAT_READY ) { - outl( virt_to_bus( head_list ), dev->base_addr + TLAN_CH_PARM ); + outl(head_list_phys, dev->base_addr + TLAN_CH_PARM ); ack |= TLAN_HC_GO; } else { priv->txInProgress = 0; @@ -1468,9 +1520,11 @@ void *t; u32 frameSize; u16 tmpCStat; + dma_addr_t head_list_phys; TLAN_DBG( TLAN_DEBUG_RX, "RECEIVE: Handling RX EOF (Head=%d Tail=%d)\n", priv->rxHead, priv->rxTail ); head_list = priv->rxList + priv->rxHead; + head_list_phys = priv->rxListDMA + sizeof(TLanList) * priv->rxHead; while (((tmpCStat = head_list->cStat) & TLAN_CSTAT_FRM_CMP) && (ack < 255)) { frameSize = head_list->frameSize; @@ -1498,17 +1552,16 @@ struct sk_buff *new_skb; /* - * I changed the algorithm here. What we now do - * is allocate the new frame. If this fails we - * simply recycle the frame. - */ + * I changed the algorithm here. What we now do + * is allocate the new frame. If this fails we + * simply recycle the frame. + */ new_skb = dev_alloc_skb( TLAN_MAX_FRAME_SIZE + 7 ); if ( new_skb != NULL ) { - /* If this ever happened it would be a problem */ - /* not any more - ac */ - skb = (struct sk_buff *) head_list->buffer[9].address; + skb = TLan_GetSKB(head_list); + pci_unmap_single(priv->pciDev, head_list->buffer[0].address, TLAN_MAX_FRAME_SIZE, PCI_DMA_FROMDEVICE); skb_trim( skb, frameSize ); priv->stats.rx_bytes += frameSize; @@ -1519,9 +1572,9 @@ new_skb->dev = dev; skb_reserve( new_skb, 2 ); t = (void *) skb_put( new_skb, TLAN_MAX_FRAME_SIZE ); - head_list->buffer[0].address = virt_to_bus( t ); + head_list->buffer[0].address = pci_map_single(priv->pciDev, new_skb->data, TLAN_MAX_FRAME_SIZE, PCI_DMA_FROMDEVICE); head_list->buffer[8].address = (u32) t; - head_list->buffer[9].address = (u32) new_skb; + TLan_StoreSKB(head_list, new_skb); } else printk(KERN_WARNING "TLAN: Couldn't allocate memory for received data.\n" ); } @@ -1529,11 +1582,12 @@ head_list->forward = 0; head_list->cStat = 0; tail_list = priv->rxList + priv->rxTail; - tail_list->forward = virt_to_bus( head_list ); + tail_list->forward = head_list_phys; CIRC_INC( priv->rxHead, TLAN_NUM_RX_LISTS ); CIRC_INC( priv->rxTail, TLAN_NUM_RX_LISTS ); head_list = priv->rxList + priv->rxHead; + head_list_phys = priv->rxListDMA + sizeof(TLanList) * priv->rxHead; } if (!ack) @@ -1545,7 +1599,8 @@ if ( eoc ) { TLAN_DBG( TLAN_DEBUG_RX, "RECEIVE: Handling RX EOC (Head=%d Tail=%d)\n", priv->rxHead, priv->rxTail ); head_list = priv->rxList + priv->rxHead; - outl( virt_to_bus( head_list ), dev->base_addr + TLAN_CH_PARM ); + head_list_phys = priv->rxListDMA + sizeof(TLanList) * priv->rxHead; + outl(head_list_phys, dev->base_addr + TLAN_CH_PARM ); ack |= TLAN_HC_GO | TLAN_HC_RT; priv->rxEocCount++; } @@ -1611,7 +1666,7 @@ * host_int The contents of the HOST_INT * port. * - * This driver is structured to determine EOC occurances by + * This driver is structured to determine EOC occurrences by * reading the CSTAT member of the list structure. Tx EOC * interrupts are disabled via the DIO INTDIS register. * However, TLAN chips before revision 3.0 didn't have this @@ -1624,15 +1679,17 @@ { TLanPrivateInfo *priv = dev->priv; TLanList *head_list; + dma_addr_t head_list_phys; u32 ack = 1; host_int = 0; if ( priv->tlanRev < 0x30 ) { TLAN_DBG( TLAN_DEBUG_TX, "TRANSMIT: Handling TX EOC (Head=%d Tail=%d) -- IRQ\n", priv->txHead, priv->txTail ); head_list = priv->txList + priv->txHead; + head_list_phys = priv->txListDMA + sizeof(TLanList) * priv->txHead; if ( ( head_list->cStat & TLAN_CSTAT_READY ) == TLAN_CSTAT_READY ) { netif_stop_queue(dev); - outl( virt_to_bus( head_list ), dev->base_addr + TLAN_CH_PARM ); + outl( head_list_phys, dev->base_addr + TLAN_CH_PARM ); ack |= TLAN_HC_GO; } else { priv->txInProgress = 0; @@ -1683,10 +1740,9 @@ printk( "TLAN: %s: Adaptor Error = 0x%x\n", dev->name, error ); TLan_ReadAndClearStats( dev, TLAN_RECORD ); outl( TLAN_HC_AD_RST, dev->base_addr + TLAN_HOST_CMD ); - - queue_task(&priv->tlan_tqueue, &tq_immediate); - mark_bh(IMMEDIATE_BH); - + + schedule_task(&priv->tlan_tqueue); + netif_wake_queue(dev); ack = 0; } else { @@ -1733,7 +1789,7 @@ * host_int The contents of the HOST_INT * port. * - * This driver is structured to determine EOC occurances by + * This driver is structured to determine EOC occurrences by * reading the CSTAT member of the list structure. Rx EOC * interrupts are disabled via the DIO INTDIS register. * However, TLAN chips before revision 3.0 didn't have this @@ -1745,13 +1801,13 @@ u32 TLan_HandleRxEOC( struct net_device *dev, u16 host_int ) { TLanPrivateInfo *priv = dev->priv; - TLanList *head_list; + dma_addr_t head_list_phys; u32 ack = 1; if ( priv->tlanRev < 0x30 ) { TLAN_DBG( TLAN_DEBUG_RX, "RECEIVE: Handling RX EOC (Head=%d Tail=%d) -- IRQ\n", priv->rxHead, priv->rxTail ); - head_list = priv->rxList + priv->rxHead; - outl( virt_to_bus( head_list ), dev->base_addr + TLAN_CH_PARM ); + head_list_phys = priv->rxListDMA + sizeof(TLanList) * priv->rxHead; + outl( head_list_phys, dev->base_addr + TLAN_CH_PARM ); ack |= TLAN_HC_GO | TLAN_HC_RT; priv->rxEocCount++; } @@ -1888,6 +1944,7 @@ TLanPrivateInfo *priv = dev->priv; int i; TLanList *list; + dma_addr_t list_phys; struct sk_buff *skb; void *t = NULL; @@ -1897,12 +1954,13 @@ list = priv->txList + i; list->cStat = TLAN_CSTAT_UNUSED; if ( bbuf ) { - list->buffer[0].address = virt_to_bus( priv->txBuffer + ( i * TLAN_MAX_FRAME_SIZE ) ); + list->buffer[0].address = priv->txBufferDMA + ( i * TLAN_MAX_FRAME_SIZE ); } else { list->buffer[0].address = 0; } list->buffer[2].count = 0; list->buffer[2].address = 0; + list->buffer[8].address = 0; list->buffer[9].address = 0; } @@ -1910,11 +1968,12 @@ priv->rxTail = TLAN_NUM_RX_LISTS - 1; for ( i = 0; i < TLAN_NUM_RX_LISTS; i++ ) { list = priv->rxList + i; + list_phys = priv->rxListDMA + sizeof(TLanList) * i; list->cStat = TLAN_CSTAT_READY; list->frameSize = TLAN_MAX_FRAME_SIZE; list->buffer[0].count = TLAN_MAX_FRAME_SIZE | TLAN_LAST_BUFFER; if ( bbuf ) { - list->buffer[0].address = virt_to_bus( priv->rxBuffer + ( i * TLAN_MAX_FRAME_SIZE ) ); + list->buffer[0].address = priv->rxBufferDMA + ( i * TLAN_MAX_FRAME_SIZE ); } else { skb = dev_alloc_skb( TLAN_MAX_FRAME_SIZE + 7 ); if ( skb == NULL ) { @@ -1925,14 +1984,14 @@ skb_reserve( skb, 2 ); t = (void *) skb_put( skb, TLAN_MAX_FRAME_SIZE ); } - list->buffer[0].address = virt_to_bus( t ); + list->buffer[0].address = pci_map_single(priv->pciDev, t, TLAN_MAX_FRAME_SIZE, PCI_DMA_FROMDEVICE); list->buffer[8].address = (u32) t; - list->buffer[9].address = (u32) skb; + TLan_StoreSKB(list, skb); } list->buffer[1].count = 0; list->buffer[1].address = 0; if ( i < TLAN_NUM_RX_LISTS - 1 ) - list->forward = virt_to_bus( list + 1 ); + list->forward = list_phys + sizeof(TLanList); else list->forward = 0; } @@ -1950,23 +2009,26 @@ if ( ! bbuf ) { for ( i = 0; i < TLAN_NUM_TX_LISTS; i++ ) { list = priv->txList + i; - skb = (struct sk_buff *) list->buffer[9].address; + skb = TLan_GetSKB(list); if ( skb ) { + pci_unmap_single(priv->pciDev, list->buffer[0].address, skb->len, PCI_DMA_TODEVICE); dev_kfree_skb_any( skb ); + list->buffer[8].address = 0; list->buffer[9].address = 0; } } for ( i = 0; i < TLAN_NUM_RX_LISTS; i++ ) { list = priv->rxList + i; - skb = (struct sk_buff *) list->buffer[9].address; + skb = TLan_GetSKB(list); if ( skb ) { + pci_unmap_single(priv->pciDev, list->buffer[0].address, TLAN_MAX_FRAME_SIZE, PCI_DMA_FROMDEVICE); dev_kfree_skb_any( skb ); + list->buffer[8].address = 0; list->buffer[9].address = 0; } } } - } /* TLan_FreeLists */ @@ -2271,8 +2333,8 @@ printk("TLAN: Partner capability: "); for (i = 5; i <= 10; i++) if (partner & (1<base_addr, TLAN_LED_REG, TLAN_LED_LINK ); @@ -2304,7 +2366,7 @@ if ( debug >= 1 && debug != TLAN_DEBUG_PROBE ) { outb( ( TLAN_HC_REQ_INT >> 8 ), dev->base_addr + TLAN_HOST_CMD + 1 ); } - outl( virt_to_bus( priv->rxList ), dev->base_addr + TLAN_CH_PARM ); + outl( priv->rxListDMA, dev->base_addr + TLAN_CH_PARM ); outl( TLAN_HC_GO | TLAN_HC_RT, dev->base_addr + TLAN_HOST_CMD ); } else { printk( "TLAN: %s: Link inactive, will retry in 10 secs...\n", dev->name ); @@ -2376,7 +2438,7 @@ * dev A pointer to the device structure of the * TLAN device having the PHYs to be detailed. * - * This function prints the registers a PHY (aka tranceiver). + * This function prints the registers a PHY (aka transceiver). * ********************************************************************/ @@ -2492,7 +2554,7 @@ /* Wait for 50 ms and powerup * This is abitrary. It is intended to make sure the - * tranceiver settles. + * transceiver settles. */ TLan_SetTimer( dev, (HZ/20), TLAN_TIMER_PHY_PUP ); @@ -2512,7 +2574,7 @@ TLan_MiiWriteReg( dev, priv->phy[priv->phyNum], MII_GEN_CTL, value ); TLan_MiiSync(dev->base_addr); /* Wait for 500 ms and reset the - * tranceiver. The TLAN docs say both 50 ms and + * transceiver. The TLAN docs say both 50 ms and * 500 ms, so do the longer, just in case. */ TLan_SetTimer( dev, (HZ/20), TLAN_TIMER_PHY_RESET ); @@ -2627,7 +2689,7 @@ TLan_MiiWriteReg( dev, phy, TLAN_TLPHY_CTL, tctl ); } - /* Wait for 2 sec to give the tranceiver time + /* Wait for 2 sec to give the transceiver time * to establish link. */ TLan_SetTimer( dev, (4*HZ), TLAN_TIMER_FINISH_RESET ); diff -Nru a/drivers/net/tlan.h b/drivers/net/tlan.h --- a/drivers/net/tlan.h Thu Jun 19 23:46:52 2003 +++ b/drivers/net/tlan.h Thu Jun 19 23:46:52 2003 @@ -169,15 +169,22 @@ typedef struct tlan_private_tag { struct net_device *nextDevice; + struct pci_dev *pciDev; void *dmaStorage; + dma_addr_t dmaStorageDMA; + unsigned int dmaSize; u8 *padBuffer; TLanList *rxList; + dma_addr_t rxListDMA; u8 *rxBuffer; + dma_addr_t rxBufferDMA; u32 rxHead; u32 rxTail; u32 rxEocCount; TLanList *txList; + dma_addr_t txListDMA; u8 *txBuffer; + dma_addr_t txBufferDMA; u32 txHead; u32 txInProgress; u32 txTail; diff -Nru a/drivers/net/tulip/tulip_core.c b/drivers/net/tulip/tulip_core.c --- a/drivers/net/tulip/tulip_core.c Thu Jun 19 23:46:52 2003 +++ b/drivers/net/tulip/tulip_core.c Thu Jun 19 23:46:52 2003 @@ -231,6 +231,7 @@ { 0x1737, 0xAB09, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET }, { 0x17B3, 0xAB08, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET }, { 0x14f1, 0x1803, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CONEXANT }, + { 0x10b9, 0x5261, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DM910X }, /* ALi 1563 integrated ethernet */ { } /* terminate list */ }; MODULE_DEVICE_TABLE(pci, tulip_pci_tbl); @@ -1403,12 +1404,14 @@ csr0 &= ~0xfff10000; /* zero reserved bits 31:20, 16 */ /* DM9102A has troubles with MRM & clear reserved bits 24:22, 20, 16, 7:1 */ - if (pdev->vendor == 0x1282 && pdev->device == 0x9102) + if ((pdev->vendor == 0x1282 && pdev->device == 0x9102) + || (pdev->vendor == 0x10b9 && pdev->device == 0x5261)) csr0 &= ~0x01f100ff; #if defined(__sparc__) /* DM9102A needs 32-dword alignment/burst length on sparc - chip bug? */ - if (pdev->vendor == 0x1282 && pdev->device == 0x9102) + if ((pdev->vendor == 0x1282 && pdev->device == 0x9102) + || (pdev->vendor == 0x10b9 && pdev->device == 0x5261)) csr0 = (csr0 & ~0xff00) | 0xe000; #endif diff -Nru a/drivers/net/typhoon.c b/drivers/net/typhoon.c --- a/drivers/net/typhoon.c Thu Jun 19 23:46:52 2003 +++ b/drivers/net/typhoon.c Thu Jun 19 23:46:52 2003 @@ -2134,7 +2134,7 @@ return 0; } -#if CONFIG_PM +#ifdef CONFIG_PM static int typhoon_resume(struct pci_dev *pdev) { @@ -2482,7 +2482,7 @@ .id_table = typhoon_pci_tbl, .probe = typhoon_init_one, .remove = __devexit_p(typhoon_remove_one), -#if CONFIG_PM +#ifdef CONFIG_PM .suspend = typhoon_suspend, .resume = typhoon_resume, .enable_wake = typhoon_enable_wake, diff -Nru a/drivers/net/via-rhine.c b/drivers/net/via-rhine.c --- a/drivers/net/via-rhine.c Thu Jun 19 23:46:52 2003 +++ b/drivers/net/via-rhine.c Thu Jun 19 23:46:52 2003 @@ -1690,6 +1690,8 @@ /* Unconditionally log net taps. */ printk(KERN_NOTICE "%s: Promiscuous mode enabled.\n", dev->name); rx_mode = 0x1C; + writel(0xffffffff, ioaddr + MulticastFilter0); + writel(0xffffffff, ioaddr + MulticastFilter1); } else if ((dev->mc_count > multicast_filter_limit) || (dev->flags & IFF_ALLMULTI)) { /* Too many to match, or accept all multicasts. */ diff -Nru a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c --- a/drivers/net/wireless/airo.c Thu Jun 19 23:46:52 2003 +++ b/drivers/net/wireless/airo.c Thu Jun 19 23:46:52 2003 @@ -3161,7 +3161,7 @@ readStatsRid(apriv, &stats, rid); j = 0; - for(i=0; (int)statsLabels[i]!=-1 && + for(i=0; statsLabels[i]!=(char *)-1 && i*44096) { diff -Nru a/include/linux/ethtool.h b/include/linux/ethtool.h --- a/include/linux/ethtool.h Thu Jun 19 23:46:52 2003 +++ b/include/linux/ethtool.h Thu Jun 19 23:46:52 2003 @@ -252,23 +252,23 @@ /* CMDs currently supported */ #define ETHTOOL_GSET 0x00000001 /* Get settings. */ -#define ETHTOOL_SSET 0x00000002 /* Set settings, privileged. */ +#define ETHTOOL_SSET 0x00000002 /* Set settings. */ #define ETHTOOL_GDRVINFO 0x00000003 /* Get driver info. */ -#define ETHTOOL_GREGS 0x00000004 /* Get NIC registers, privileged. */ +#define ETHTOOL_GREGS 0x00000004 /* Get NIC registers. */ #define ETHTOOL_GWOL 0x00000005 /* Get wake-on-lan options. */ -#define ETHTOOL_SWOL 0x00000006 /* Set wake-on-lan options, priv. */ +#define ETHTOOL_SWOL 0x00000006 /* Set wake-on-lan options. */ #define ETHTOOL_GMSGLVL 0x00000007 /* Get driver message level */ -#define ETHTOOL_SMSGLVL 0x00000008 /* Set driver msg level, priv. */ -#define ETHTOOL_NWAY_RST 0x00000009 /* Restart autonegotiation, priv. */ +#define ETHTOOL_SMSGLVL 0x00000008 /* Set driver msg level. */ +#define ETHTOOL_NWAY_RST 0x00000009 /* Restart autonegotiation. */ #define ETHTOOL_GLINK 0x0000000a /* Get link status (ethtool_value) */ #define ETHTOOL_GEEPROM 0x0000000b /* Get EEPROM data */ -#define ETHTOOL_SEEPROM 0x0000000c /* Set EEPROM data, priv. */ +#define ETHTOOL_SEEPROM 0x0000000c /* Set EEPROM data. */ #define ETHTOOL_GCOALESCE 0x0000000e /* Get coalesce config */ -#define ETHTOOL_SCOALESCE 0x0000000f /* Set coalesce config, priv. */ +#define ETHTOOL_SCOALESCE 0x0000000f /* Set coalesce config. */ #define ETHTOOL_GRINGPARAM 0x00000010 /* Get ring parameters */ -#define ETHTOOL_SRINGPARAM 0x00000011 /* Set ring parameters, priv. */ +#define ETHTOOL_SRINGPARAM 0x00000011 /* Set ring parameters. */ #define ETHTOOL_GPAUSEPARAM 0x00000012 /* Get pause parameters */ -#define ETHTOOL_SPAUSEPARAM 0x00000013 /* Set pause parameters, priv. */ +#define ETHTOOL_SPAUSEPARAM 0x00000013 /* Set pause parameters. */ #define ETHTOOL_GRXCSUM 0x00000014 /* Get RX hw csum enable (ethtool_value) */ #define ETHTOOL_SRXCSUM 0x00000015 /* Set RX hw csum enable (ethtool_value) */ #define ETHTOOL_GTXCSUM 0x00000016 /* Get TX hw csum enable (ethtool_value) */ @@ -276,8 +276,8 @@ #define ETHTOOL_GSG 0x00000018 /* Get scatter-gather enable * (ethtool_value) */ #define ETHTOOL_SSG 0x00000019 /* Set scatter-gather enable - * (ethtool_value), priv. */ -#define ETHTOOL_TEST 0x0000001a /* execute NIC self-test, priv. */ + * (ethtool_value). */ +#define ETHTOOL_TEST 0x0000001a /* execute NIC self-test. */ #define ETHTOOL_GSTRINGS 0x0000001b /* get specified string set */ #define ETHTOOL_PHYS_ID 0x0000001c /* identify the NIC */ #define ETHTOOL_GSTATS 0x0000001d /* get NIC-specific statistics */ @@ -299,6 +299,7 @@ #define SUPPORTED_MII (1 << 9) #define SUPPORTED_FIBRE (1 << 10) #define SUPPORTED_BNC (1 << 11) +#define SUPPORTED_10000baseT_Full (1 << 12) /* Indicates what features are advertised by the interface. */ #define ADVERTISED_10baseT_Half (1 << 0) @@ -313,6 +314,7 @@ #define ADVERTISED_MII (1 << 9) #define ADVERTISED_FIBRE (1 << 10) #define ADVERTISED_BNC (1 << 11) +#define ADVERTISED_10000baseT_Full (1 << 12) /* The following are all involved in forcing a particular link * mode for the device for setting things. When getting the @@ -320,10 +322,11 @@ * it was foced up into this mode or autonegotiated. */ -/* The forced speed, 10Mb, 100Mb, gigabit. */ +/* The forced speed, 10Mb, 100Mb, gigabit, 10GbE. */ #define SPEED_10 10 #define SPEED_100 100 #define SPEED_1000 1000 +#define SPEED_10000 10000 /* Duplex, half or full. */ #define DUPLEX_HALF 0x00 diff -Nru a/include/linux/if_arcnet.h b/include/linux/if_arcnet.h --- a/include/linux/if_arcnet.h Thu Jun 19 23:46:52 2003 +++ b/include/linux/if_arcnet.h Thu Jun 19 23:46:52 2003 @@ -25,6 +25,7 @@ /* RFC1201 Protocol ID's */ #define ARC_P_IP 212 /* 0xD4 */ +#define ARC_P_IPV6 196 /* 0xC4: RFC2497 */ #define ARC_P_ARP 213 /* 0xD5 */ #define ARC_P_RARP 214 /* 0xD6 */ #define ARC_P_IPX 250 /* 0xFA */ @@ -44,6 +45,9 @@ #define ARC_P_POWERLAN_BEACON2 243 /* 0xF3 */ #define ARC_P_LANSOFT 251 /* 0xFB - what is this? */ #define ARC_P_ATALK 0xDD + +/* Hardware address length */ +#define ARCNET_ALEN 1 /* * The RFC1201-specific components of an arcnet packet header. diff -Nru a/include/linux/if_bonding.h b/include/linux/if_bonding.h --- a/include/linux/if_bonding.h Thu Jun 19 23:46:52 2003 +++ b/include/linux/if_bonding.h Thu Jun 19 23:46:52 2003 @@ -11,18 +11,38 @@ * This software may be used and distributed according to the terms * of the GNU Public License, incorporated herein by reference. * + * 2003/03/18 - Amir Noam + * - Added support for getting slave's speed and duplex via ethtool. + * Needed for 802.3ad and other future modes. + * + * 2003/03/18 - Tsippy Mendelson and + * Shmulik Hen + * - Enable support of modes that need to use the unique mac address of + * each slave. + * + * 2003/03/18 - Tsippy Mendelson and + * Amir Noam + * - Moved driver's private data types to bonding.h + * + * 2003/03/18 - Amir Noam , + * Tsippy Mendelson and + * Shmulik Hen + * - Added support for IEEE 802.3ad Dynamic link aggregation mode. + * + * 2003/05/01 - Amir Noam + * - Added ABI version control to restore compatibility between + * new/old ifenslave and new/old bonding. */ #ifndef _LINUX_IF_BONDING_H #define _LINUX_IF_BONDING_H -#ifdef __KERNEL__ -#include #include -#include -#endif /* __KERNEL__ */ - #include +#include + +/* userland - kernel ABI version (2003/05/08) */ +#define BOND_ABI_VERSION 1 /* * We can remove these ioctl definitions in 2.5. People should use the @@ -41,6 +61,9 @@ #define BOND_MODE_ACTIVEBACKUP 1 #define BOND_MODE_XOR 2 #define BOND_MODE_BROADCAST 3 +#define BOND_MODE_8023AD 4 +#define BOND_MODE_TLB 5 +#define BOND_MODE_ALB 6 /* TLB + RLB (receive load balancing) */ /* each slave's link has 4 states */ #define BOND_LINK_UP 0 /* link is up and running */ @@ -58,11 +81,6 @@ #define BOND_MULTICAST_ACTIVE 1 #define BOND_MULTICAST_ALL 2 -struct bond_parm_tbl { - char *modename; - int mode; -}; - typedef struct ifbond { __s32 bond_mode; __s32 num_slaves; @@ -78,52 +96,15 @@ __u32 link_failure_count; } ifslave; -#ifdef __KERNEL__ -typedef struct slave { - struct slave *next; - struct slave *prev; - struct net_device *dev; - short delay; - unsigned long jiffies; - char link; /* one of BOND_LINK_XXXX */ - char state; /* one of BOND_STATE_XXXX */ - unsigned short original_flags; - u32 link_failure_count; -} slave_t; - -/* - * Here are the locking policies for the two bonding locks: - * - * 1) Get bond->lock when reading/writing slave list. - * 2) Get bond->ptrlock when reading/writing bond->current_slave. - * (It is unnecessary when the write-lock is put with bond->lock.) - * 3) When we lock with bond->ptrlock, we must lock with bond->lock - * beforehand. - */ -typedef struct bonding { - slave_t *next; - slave_t *prev; - slave_t *current_slave; - slave_t *primary_slave; - slave_t *current_arp_slave; - __s32 slave_cnt; - rwlock_t lock; - rwlock_t ptrlock; - struct timer_list mii_timer; - struct timer_list arp_timer; - struct net_device_stats *stats; -#ifdef CONFIG_PROC_FS - struct proc_dir_entry *bond_proc_dir; - struct proc_dir_entry *bond_proc_info_file; -#endif /* CONFIG_PROC_FS */ - struct bonding *next_bond; - struct net_device *device; - struct dev_mc_list *mc_list; - unsigned short flags; -} bonding_t; -#endif /* __KERNEL__ */ +struct ad_info { + __u16 aggregator_id; + __u16 ports; + __u16 actor_key; + __u16 partner_key; + __u8 partner_system[ETH_ALEN]; +}; -#endif /* _LINUX_BOND_H */ +#endif /* _LINUX_IF_BONDING_H */ /* * Local variables: diff -Nru a/include/linux/if_vlan.h b/include/linux/if_vlan.h --- a/include/linux/if_vlan.h Thu Jun 19 23:46:52 2003 +++ b/include/linux/if_vlan.h Thu Jun 19 23:46:52 2003 @@ -148,6 +148,7 @@ { struct net_device_stats *stats; + skb->real_dev = skb->dev; skb->dev = grp->vlan_devices[vlan_tag & VLAN_VID_MASK]; if (skb->dev == NULL) { kfree_skb(skb); diff -Nru a/include/linux/skbuff.h b/include/linux/skbuff.h --- a/include/linux/skbuff.h Thu Jun 19 23:46:51 2003 +++ b/include/linux/skbuff.h Thu Jun 19 23:46:51 2003 @@ -135,6 +135,10 @@ struct sock *sk; /* Socket we are owned by */ struct timeval stamp; /* Time we arrived */ struct net_device *dev; /* Device we arrived on/are leaving by */ + struct net_device *real_dev; /* For support of point to point protocols + (e.g. 802.3ad) over bonding, we must save the + physical device that got the packet before + replacing skb->dev with the virtual device. */ /* Transport layer header */ union diff -Nru a/include/net/if_inet6.h b/include/net/if_inet6.h --- a/include/net/if_inet6.h Thu Jun 19 23:46:53 2003 +++ b/include/net/if_inet6.h Thu Jun 19 23:46:53 2003 @@ -195,5 +195,10 @@ buf[5]=0x00; } } + +static inline void ipv6_arcnet_mc_map(const struct in6_addr *addr, char *buf) +{ + buf[0] = 0x00; +} #endif #endif diff -Nru a/include/net/irda/irlan_common.h b/include/net/irda/irlan_common.h --- a/include/net/irda/irlan_common.h Thu Jun 19 23:46:52 2003 +++ b/include/net/irda/irlan_common.h Thu Jun 19 23:46:52 2003 @@ -195,8 +195,6 @@ struct irlan_cb *irlan_open(__u32 saddr, __u32 daddr); void irlan_close(struct irlan_cb *self); void irlan_close_tsaps(struct irlan_cb *self); -void irlan_mod_inc_use_count(void); -void irlan_mod_dec_use_count(void); int irlan_register_netdev(struct irlan_cb *self); void irlan_ias_register(struct irlan_cb *self, __u8 tsap_sel); diff -Nru a/net/core/dev.c b/net/core/dev.c --- a/net/core/dev.c Thu Jun 19 23:46:52 2003 +++ b/net/core/dev.c Thu Jun 19 23:46:52 2003 @@ -1372,8 +1372,10 @@ { struct net_device *dev = skb->dev; - if (dev->master) + if (dev->master) { + skb->real_dev = skb->dev; skb->dev = dev->master; + } } static void net_tx_action(struct softirq_action *h) diff -Nru a/net/core/skbuff.c b/net/core/skbuff.c --- a/net/core/skbuff.c Thu Jun 19 23:46:52 2003 +++ b/net/core/skbuff.c Thu Jun 19 23:46:52 2003 @@ -231,6 +231,7 @@ skb->sk = NULL; skb->stamp.tv_sec=0; /* No idea about time */ skb->dev = NULL; + skb->real_dev = NULL; skb->dst = NULL; memset(skb->cb, 0, sizeof(skb->cb)); skb->pkt_type = PACKET_HOST; /* Default type */ @@ -362,6 +363,7 @@ n->sk = NULL; C(stamp); C(dev); + C(real_dev); C(h); C(nh); C(mac); @@ -417,6 +419,7 @@ new->list=NULL; new->sk=NULL; new->dev=old->dev; + new->real_dev=old->real_dev; new->priority=old->priority; new->protocol=old->protocol; new->dst=dst_clone(old->dst); diff -Nru a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c --- a/net/ipv6/addrconf.c Thu Jun 19 23:46:52 2003 +++ b/net/ipv6/addrconf.c Thu Jun 19 23:46:52 2003 @@ -30,6 +30,7 @@ * address validation timer. * Yuji SEKIYA @USAGI : Don't assign a same IPv6 * address on a same interface. + * YOSHIFUJI Hideaki @USAGI : ARCnet support */ #include @@ -42,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -827,6 +829,13 @@ eui[4] = 0xFE; eui[0] ^= 2; return 0; + case ARPHRD_ARCNET: + /* XXX: inherit EUI-64 fro mother interface -- yoshfuji */ + if (dev->addr_len != ARCNET_ALEN) + return -1; + memset(eui, 0, 7); + eui[7] = *(u8*)dev->dev_addr; + return 0; } return -1; } @@ -1355,7 +1364,8 @@ if ((dev->type != ARPHRD_ETHER) && (dev->type != ARPHRD_FDDI) && - (dev->type != ARPHRD_IEEE802_TR)) { + (dev->type != ARPHRD_IEEE802_TR) && + (dev->type != ARPHRD_ARCNET)) { /* Alas, we support only Ethernet autoconfiguration. */ return; } @@ -2188,6 +2198,7 @@ case ARPHRD_ETHER: case ARPHRD_FDDI: case ARPHRD_IEEE802_TR: + case ARPHRD_ARCNET: addrconf_dev_config(dev); break; default:; diff -Nru a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c --- a/net/ipv6/ndisc.c Thu Jun 19 23:46:51 2003 +++ b/net/ipv6/ndisc.c Thu Jun 19 23:46:51 2003 @@ -226,6 +226,9 @@ case ARPHRD_IEEE802_TR: ipv6_tr_mc_map(addr,buf); return 0; + case ARPHRD_ARCNET: + ipv6_arcnet_mc_map(addr, buf); + return 0; default: if (dir) { memcpy(buf, dev->broadcast, dev->addr_len); diff -Nru a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c --- a/net/irda/irlan/irlan_eth.c Thu Jun 19 23:46:52 2003 +++ b/net/irda/irlan/irlan_eth.c Thu Jun 19 23:46:52 2003 @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -61,6 +62,7 @@ dev->hard_start_xmit = irlan_eth_xmit; dev->get_stats = irlan_eth_get_stats; dev->set_multicast_list = irlan_eth_set_multicast_list; + SET_MODULE_OWNER(dev); /* NETIF_F_DYNALLOC feature was set by irlan_eth_init() and would * cause the unregister_netdev() to do asynch completion _and_ @@ -122,8 +124,6 @@ self->disconnect_reason = 0; irlan_client_wakeup(self, self->saddr, self->daddr); - irlan_mod_inc_use_count(); - /* Make sure we have a hardware address before we return, so DHCP clients gets happy */ interruptible_sleep_on(&self->open_wait); @@ -148,8 +148,6 @@ /* Stop device */ netif_stop_queue(dev); - irlan_mod_dec_use_count(); - irlan_close_data_channel(self); irlan_close_tsaps(self);