diff -u --recursive --new-file v1.1.19/linux/Makefile linux/Makefile --- v1.1.19/linux/Makefile Fri Jun 17 15:20:04 1994 +++ linux/Makefile Thu Jun 16 10:31:59 1994 @@ -1,6 +1,6 @@ VERSION = 1 PATCHLEVEL = 1 -SUBLEVEL = 19 +SUBLEVEL = 20 all: Version zImage diff -u --recursive --new-file v1.1.19/linux/config.in linux/config.in --- v1.1.19/linux/config.in Thu Jun 2 13:50:54 1994 +++ linux/config.in Fri Jun 17 14:11:05 1994 @@ -80,7 +80,7 @@ # bool ' SLIP debugging on' SL_DUMP y fi bool 'PPP (point-to-point) support' CONFIG_PPP n -bool 'Load balancing support (very experimental)' CONFIG_SLAVE_BALANCING n +bool 'Load balancing support (experimental)' CONFIG_SLAVE_BALANCING n bool 'PLIP (parallel port) support' CONFIG_PLIP n bool 'NE2000/NE1000 support' CONFIG_NE2000 n bool 'WD80*3 support' CONFIG_WD80x3 n @@ -95,6 +95,7 @@ bool 'AT1700 support' CONFIG_AT1700 n #bool 'Zenith Z-Note support' CONFIG_ZNET n #bool 'EtherExpress support' CONFIG_EEXPRESS n +#bool 'Apricot Xen-II on board ethernet' CONFIG_APRICOT n bool 'DEPCA support' CONFIG_DEPCA n #bool 'NI52EE support' CONFIG_NI52 n #bool 'NI65EE support' CONFIG_NI65 n @@ -107,7 +108,7 @@ comment 'CD-ROM drivers' -bool 'Sony CDU31A CDROM driver support' CONFIG_CDU31A n +bool 'Sony CDU31A/CDU33A CDROM driver support' CONFIG_CDU31A n bool 'Mitsumi CDROM driver support' CONFIG_MCD n bool 'Matsushita/Panasonic CDROM driver support' CONFIG_SBPCD n diff -u --recursive --new-file v1.1.19/linux/drivers/block/cdu31a.c linux/drivers/block/cdu31a.c --- v1.1.19/linux/drivers/block/cdu31a.c Mon Jan 10 17:57:05 1994 +++ linux/drivers/block/cdu31a.c Fri Jun 10 17:52:16 1994 @@ -35,6 +35,12 @@ * asyncronous events from the drive informing the driver that a disk * has been inserted, removed, etc. * + * NEWS FLASH - The driver now supports interrupts and DMA, but they are + * turned off by default. Use of interrupts is highly encouraged, it + * cuts CPU usage down to a reasonable level. For a single-speed drive, + * DMA is ok, but the 8-bit DMA cannot keep up with the double speed + * drives. + * * One thing about these drives: They talk in MSF (Minute Second Frame) format. * There are 75 frames a second, 60 seconds a minute, and up to 75 minutes on a * disk. The funny thing is that these are sent to the drive in BCD, but the @@ -58,8 +64,6 @@ * */ - - #include #include #include @@ -69,10 +73,12 @@ #include #include #include +#include #include #include #include +#include #include #include @@ -82,17 +88,30 @@ #define CDU31A_MAX_CONSECUTIVE_ATTENTIONS 10 -static unsigned short cdu31a_addresses[] = -{ - 0x340, /* Standard configuration Sony Interface */ - 0x1f88, /* Fusion CD-16 */ - 0x230, /* SoundBlaster 16 card */ - 0x360, /* Secondary standard Sony Interface */ - 0x320, /* Secondary standard Sony Interface */ - 0x330, /* Secondary standard Sony Interface */ - 0 -}; +/* Define the following if you have data corruption problems. */ +#undef SONY_POLL_EACH_BYTE +/* +** Edit the following data to change interrupts, DMA channels, etc. +** Default is polled and no DMA. DMA is not recommended for double-speed +** drives. +*/ +static struct +{ + unsigned short base; /* I/O Base Address */ + short dma_num; /* DMA Number (-1 means no DMA) */ + short int_num; /* Interrupt Number (-1 means scan for it, + 0 means don't use) */ +} cdu31a_addresses[] = +{ + { 0x340, -1, 0 }, /* Standard configuration Sony Interface */ + { 0x1f88, -1, 0 }, /* Fusion CD-16 */ + { 0x230, -1, 0 }, /* SoundBlaster 16 card */ + { 0x360, -1, 0 }, /* Secondary standard Sony Interface */ + { 0x320, -1, 0 }, /* Secondary standard Sony Interface */ + { 0x330, -1, 0 }, /* Secondary standard Sony Interface */ + { 0 } +}; static int handle_sony_cd_attention(void); static int read_subcode(void); @@ -157,6 +176,8 @@ static struct task_struct *has_cd_task = NULL; /* The task that is currently using the CDROM drive, or NULL if none. */ +static int is_double_speed = 0; /* Is the drive a CDU33A? */ + /* * The audio status uses the values from read subchannel data as specified * in include/linux/cdrom.h. @@ -170,9 +191,16 @@ * position during a pause so a resume can restart it. It uses the * audio status variable above to tell if it is paused. */ -unsigned volatile char cur_pos_msf[3] = { 0, 0, 0 }; -unsigned volatile char final_pos_msf[3] = { 0, 0, 0 }; +static unsigned volatile char cur_pos_msf[3] = { 0, 0, 0 }; +static unsigned volatile char final_pos_msf[3] = { 0, 0, 0 }; + +static int irq_used = -1; +static int dma_channel = -1; +static struct wait_queue *cdu31a_irq_wait = NULL; +static int curr_control_reg = 0; /* Current value of the control register */ + + /* * This routine returns 1 if the disk has been changed since the last * check or 0 if it hasn't. Setting flag to 0 resets the changed flag. @@ -199,6 +227,30 @@ return retval; } +static inline void +enable_interrupts(void) +{ + curr_control_reg |= ( SONY_ATTN_INT_EN_BIT + | SONY_RES_RDY_INT_EN_BIT + | SONY_DATA_RDY_INT_EN_BIT); + outb(curr_control_reg, sony_cd_control_reg); +} + +static inline void +disable_interrupts(void) +{ + curr_control_reg &= ~( SONY_ATTN_INT_EN_BIT + | SONY_RES_RDY_INT_EN_BIT + | SONY_DATA_RDY_INT_EN_BIT); + outb(curr_control_reg, sony_cd_control_reg); +} + +static void +cdu31a_interrupt(int unused) +{ + disable_interrupts(); + wake_up(&cdu31a_irq_wait); +} /* * Wait a little while (used for polling the drive). If in initialization, @@ -207,9 +259,19 @@ static inline void sony_sleep(void) { - current->state = TASK_INTERRUPTIBLE; - current->timeout = jiffies; - schedule(); + if (irq_used <= 0) + { + current->state = TASK_INTERRUPTIBLE; + current->timeout = jiffies; + schedule(); + } + else /* Interrupt driven */ + { + cli(); + enable_interrupts(); + interruptible_sleep_on(&cdu31a_irq_wait); + sti(); + } } @@ -256,31 +318,32 @@ static inline void reset_drive(void) { + curr_control_reg = 0; outb(SONY_DRIVE_RESET_BIT, sony_cd_control_reg); } static inline void clear_attention(void) { - outb(SONY_ATTN_CLR_BIT, sony_cd_control_reg); + outb(curr_control_reg | SONY_ATTN_CLR_BIT, sony_cd_control_reg); } static inline void clear_result_ready(void) { - outb(SONY_RES_RDY_CLR_BIT, sony_cd_control_reg); + outb(curr_control_reg | SONY_RES_RDY_CLR_BIT, sony_cd_control_reg); } static inline void clear_data_ready(void) { - outb(SONY_DATA_RDY_CLR_BIT, sony_cd_control_reg); + outb(curr_control_reg | SONY_DATA_RDY_CLR_BIT, sony_cd_control_reg); } static inline void clear_param_reg(void) { - outb(SONY_PARAM_CLR_BIT, sony_cd_control_reg); + outb(curr_control_reg | SONY_PARAM_CLR_BIT, sony_cd_control_reg); } static inline unsigned char @@ -310,8 +373,8 @@ static inline void write_cmd(unsigned char cmd) { + outb(curr_control_reg | SONY_RES_RDY_INT_EN_BIT, sony_cd_control_reg); outb(cmd, sony_cd_cmd_reg); - outb(SONY_RES_RDY_INT_EN_BIT, sony_cd_control_reg); } /* @@ -327,7 +390,11 @@ params[0] = SONY_SD_MECH_CONTROL; - params[1] = 0x03; + params[1] = 0x03; /* Set auto spin up and auto eject */ + if (is_double_speed) + { + params[1] |= 0x04; /* Set the drive to double speed if possible */ + } do_sony_cd_cmd(SONY_SET_DRIVE_PARAM_CMD, params, 2, @@ -532,18 +599,57 @@ } } +static void +read_data_dma(unsigned char *data, + unsigned int data_size, + unsigned char *result_buffer, + unsigned int *result_size) +{ + unsigned int retry_count; + + + cli(); + disable_dma(dma_channel); + clear_dma_ff(dma_channel); + set_dma_mode(dma_channel, DMA_MODE_READ); + set_dma_addr(dma_channel, (int) data); + set_dma_count(dma_channel, data_size); + enable_dma(dma_channel); + sti(); + + retry_count = jiffies + SONY_JIFFIES_TIMEOUT; + while ( (retry_count > jiffies) + && (!is_data_ready()) + && (!is_result_ready())) + { + while (handle_sony_cd_attention()) + ; + + sony_sleep(); + } + if (!is_data_requested()) + { + result_buffer[0] = 0x20; + result_buffer[1] = SONY_TIMEOUT_OP_ERR; + *result_size = 2; + return; + } +} + /* * Read in a 2048 byte block of data. */ static void read_data_block(unsigned char *data, + unsigned int data_size, unsigned char *result_buffer, unsigned int *result_size) { +#ifdef SONY_POLL_EACH_BYTE int i; unsigned int retry_count; - for (i=0; i<2048; i++) + for (i=0; i jiffies) && (!is_data_requested())) @@ -564,6 +670,9 @@ *data = read_data_register(); data++; } +#else + insb(sony_cd_read_reg, data, data_size); +#endif } /* @@ -596,22 +705,6 @@ unsigned int data_size = orig_data_size; - cli(); - while (sony_inuse) - { - interruptible_sleep_on(&sony_wait); - if (current->signal & ~current->blocked) - { - result_buffer[0] = 0x20; - result_buffer[1] = SONY_SIGNAL_OP_ERR; - *result_size = 2; - return 0; - } - } - sony_inuse = 1; - has_cd_task = current; - sti(); - num_retries = 0; retry_data_operation: result_buffer[0] = 0; @@ -679,14 +772,28 @@ result_read = 1; get_result(result_buffer, result_size); } - else /* Handle data next */ + /* Handle data next */ + else if (dma_channel > 0) + { + clear_data_ready(); + read_data_dma(data, 2048, result_buffer, result_size); + data += 2048; + data_size -= 2048; + cur_offset = cur_offset + 2048; + num_sectors_read++; + } + else { /* * The drive has to be polled for status on a byte-by-byte basis - * to know if the data is ready. Yuck. I really wish I could use DMA. + * to know if the data is ready. Yuck. I really wish I could use + * DMA all the time. + * + * NEWS FLASH - I am no longer polling on a byte-by-byte basis. + * It seems to work ok, but the spec says you shouldn't. */ clear_data_ready(); - read_data_block(data, result_buffer, result_size); + read_data_block(data, 2048, result_buffer, result_size); data += 2048; data_size -= 2048; cur_offset = cur_offset + 2048; @@ -734,10 +841,6 @@ goto retry_data_operation; } - has_cd_task = NULL; - sony_inuse = 0; - wake_up_interruptible(&sony_wait); - return(num_sectors_read); } @@ -989,6 +1092,23 @@ unsigned int read_size; + /* + * Make sure no one else is using the driver; wait for them + * to finish if it is so. + */ + cli(); + while (sony_inuse) + { + interruptible_sleep_on(&sony_wait); + if (current->signal & ~current->blocked) + { + return; + } + } + sony_inuse = 1; + has_cd_task = current; + sti(); + if (!sony_spun_up) { scd_open (NULL,NULL); @@ -1003,7 +1123,7 @@ */ if (!(CURRENT) || CURRENT->dev < 0) { - return; + goto end_do_cdu31a_request; } INIT_REQUEST; @@ -1111,6 +1231,11 @@ panic("Unkown SONY CD cmd"); } } + +end_do_cdu31a_request: + has_cd_task = NULL; + sony_inuse = 0; + wake_up_interruptible(&sony_wait); } @@ -1771,6 +1896,15 @@ } +static struct sigaction cdu31a_sigaction = { + cdu31a_interrupt, + 0, + SA_INTERRUPT, + NULL +}; + +static int cdu31a_block_size; + /* * Initialize the driver. */ @@ -1781,6 +1915,7 @@ unsigned int res_size; int i; int drive_found; + int tmp_irq; /* @@ -1795,20 +1930,20 @@ i = 0; drive_found = 0; - while ( (cdu31a_addresses[i] != 0) + while ( (cdu31a_addresses[i].base != 0) && (!drive_found)) { - if (check_region(cdu31a_addresses[i], 4)) { + if (check_region(cdu31a_addresses[i].base, 4)) { i++; continue; } - get_drive_configuration(cdu31a_addresses[i], + get_drive_configuration(cdu31a_addresses[i].base, drive_config.exec_status, &res_size); if ((res_size > 2) && ((drive_config.exec_status[0] & 0x20) == 0x00)) { drive_found = 1; - snarf_region(cdu31a_addresses[i], 4); + snarf_region(cdu31a_addresses[i].base, 4); if (register_blkdev(MAJOR_NR,"cdu31a",&scd_fops)) { @@ -1816,6 +1951,49 @@ return mem_start; } + if (SONY_HWC_DOUBLE_SPEED(drive_config)) + { + is_double_speed = 1; + } + + tmp_irq = cdu31a_addresses[i].int_num; + if (tmp_irq < 0) + { + autoirq_setup(0); + enable_interrupts(); + reset_drive(); + tmp_irq = autoirq_report(10); + disable_interrupts(); + + set_drive_params(); + irq_used = tmp_irq; + } + else + { + set_drive_params(); + irq_used = tmp_irq; + } + + if (irq_used > 0) + { + if (irqaction(irq_used,&cdu31a_sigaction)) + { + irq_used = 0; + printk("Unable to grab IRQ%d for the CDU31A driver\n", irq_used); + } + } + + dma_channel = cdu31a_addresses[i].dma_num; + if (dma_channel > 0) + { + if (request_dma(dma_channel)) + { + dma_channel = -1; + printk("Unable to grab DMA%d for the CDU31A driver\n", + dma_channel); + } + } + sony_buffer_size = mem_size[SONY_HWC_GET_BUF_MEM_SIZE(drive_config)]; sony_buffer_sectors = sony_buffer_size / 2048; @@ -1827,19 +2005,59 @@ printk(" using %d byte buffer", sony_buffer_size); if (SONY_HWC_AUDIO_PLAYBACK(drive_config)) { - printk(", capable of audio playback"); + printk(", audio"); + } + if (SONY_HWC_EJECT(drive_config)) + { + printk(", eject"); + } + if (SONY_HWC_LED_SUPPORT(drive_config)) + { + printk(", LED"); + } + if (SONY_HWC_ELECTRIC_VOLUME(drive_config)) + { + printk(", elec. Vol"); } + if (SONY_HWC_ELECTRIC_VOLUME_CTL(drive_config)) + { + printk(", sep. Vol"); + } + if (is_double_speed) + { + printk(", double speed"); + } + if (irq_used > 0) + { + printk(", irq %d", irq_used); + } + if (dma_channel > 0) + { + printk(", drq %d", dma_channel); + } printk("\n"); - set_drive_params(); - blk_dev[MAJOR_NR].request_fn = DEVICE_REQUEST; - read_ahead[MAJOR_NR] = 8; /* 8 sector (4kB) read-ahead */ + read_ahead[MAJOR_NR] = 32; /* 32 sector (16kB) read-ahead */ + cdu31a_block_size = 2048; /* 2kB block size */ + /* use 'mount -o block=2048' */ + blksize_size[MAJOR_NR] = &cdu31a_block_size; sony_toc = (struct s_sony_toc *) mem_start; mem_start += sizeof(*sony_toc); last_sony_subcode = (struct s_sony_subcode *) mem_start; mem_start += sizeof(*last_sony_subcode); + + /* If memory will not fit into the current 64KB block, align it + so the block will not cross a 64KB boundary. This is + because DMA cannot cross 64KB boundaries. */ + if ( (dma_channel > 0) + && ( ((mem_start) & (~0xffff)) + != (((mem_start) + sony_buffer_size) & (~0xffff)))) + { + mem_start = (((int)mem_start) + 0x10000) & (~0xffff); + } + sony_buffer = (unsigned char *) mem_start; mem_start += sony_buffer_size; } diff -u --recursive --new-file v1.1.19/linux/drivers/char/mem.c linux/drivers/char/mem.c --- v1.1.19/linux/drivers/char/mem.c Sat May 7 14:54:02 1994 +++ linux/drivers/char/mem.c Thu Jun 16 13:03:19 1994 @@ -103,6 +103,7 @@ mpnt->vm_start = addr; mpnt->vm_end = addr + len; mpnt->vm_page_prot = prot; + mpnt->vm_flags = 0; mpnt->vm_share = NULL; mpnt->vm_inode = inode; inode->i_count++; @@ -197,6 +198,7 @@ mpnt->vm_start = addr; mpnt->vm_end = addr + len; mpnt->vm_page_prot = prot; + mpnt->vm_flags = 0; mpnt->vm_share = NULL; mpnt->vm_inode = NULL; mpnt->vm_offset = off; diff -u --recursive --new-file v1.1.19/linux/drivers/net/3c501.c linux/drivers/net/3c501.c --- v1.1.19/linux/drivers/net/3c501.c Tue May 31 12:48:17 1994 +++ linux/drivers/net/3c501.c Fri Jun 17 07:53:51 1994 @@ -291,8 +291,7 @@ if (el_debug > 2) printk(" queued xmit.\n"); - if (skb->free) - kfree_skb (skb, FREE_WRITE); + dev_kfree_skb (skb, FREE_WRITE); return 0; } @@ -514,9 +513,9 @@ #ifdef MODULE char kernel_version[] = UTS_RELEASE; static struct device dev_3c501 = { - "" /*"3c501"*/, + " " /*"3c501"*/, 0, 0, 0, 0, - 0x280, 7, + 0x280, 5, 0, 0, 0, NULL, el1_probe }; int diff -u --recursive --new-file v1.1.19/linux/drivers/net/3c507.c linux/drivers/net/3c507.c --- v1.1.19/linux/drivers/net/3c507.c Tue May 24 00:34:50 1994 +++ linux/drivers/net/3c507.c Fri Jun 17 07:53:52 1994 @@ -479,8 +479,7 @@ outb(0x84, ioaddr + MISC_CTRL); } - if (skb->free) - kfree_skb (skb, FREE_WRITE); + dev_kfree_skb (skb, FREE_WRITE); /* You might need to clean up and record Tx statistics here. */ diff -u --recursive --new-file v1.1.19/linux/drivers/net/3c509.c linux/drivers/net/3c509.c --- v1.1.19/linux/drivers/net/3c509.c Tue May 24 00:34:50 1994 +++ linux/drivers/net/3c509.c Fri Jun 17 07:53:52 1994 @@ -388,8 +388,7 @@ outw(0x9000 + 1536, ioaddr + EL3_CMD); } - if (skb->free) - kfree_skb (skb, FREE_WRITE); + dev_kfree_skb (skb, FREE_WRITE); /* Clear the Tx status stack. */ { @@ -669,7 +668,7 @@ #ifdef MODULE char kernel_version[] = UTS_RELEASE; static struct device dev_3c509 = { - "" /*"3c509"*/, 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, el3_probe }; + " " /*"3c509"*/, 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, el3_probe }; int init_module(void) diff -u --recursive --new-file v1.1.19/linux/drivers/net/8390.c linux/drivers/net/8390.c --- v1.1.19/linux/drivers/net/8390.c Tue May 24 00:34:50 1994 +++ linux/drivers/net/8390.c Fri Jun 17 07:53:52 1994 @@ -230,8 +230,7 @@ ei_local->irqlock = 0; outb_p(ENISR_ALL, e8390_base + EN0_IMR); - if (skb->free) - kfree_skb (skb, FREE_WRITE); + dev_kfree_skb (skb, FREE_WRITE); return 0; } @@ -556,7 +555,7 @@ rely on higher-level filtering for now. */ outb_p(E8390_RXCONFIG | 0x08, ioaddr + EN0_RXCR); } else if (num_addrs < 0) - outb_p(E8390_RXCONFIG | 0x10, ioaddr + EN0_RXCR); + outb_p(E8390_RXCONFIG | 0x18, ioaddr + EN0_RXCR); else outb_p(E8390_RXCONFIG, ioaddr + EN0_RXCR); } diff -u --recursive --new-file v1.1.19/linux/drivers/net/Makefile linux/drivers/net/Makefile --- v1.1.19/linux/drivers/net/Makefile Thu Jun 2 13:50:54 1994 +++ linux/drivers/net/Makefile Fri Jun 17 07:53:52 1994 @@ -136,6 +136,9 @@ NETDRV_OBJS := $(NETDRV_OBJS) net.a(ac3200.o) CONFIG_8390 = CONFIG_8390 endif +ifdef CONFIG_APRICOT +NETDRV_OBJS := $(NETDRV_OBJS) net.a(apricot.o) +endif ifdef CONFIG_8390 NETDRV_OBJS := $(NETDRV_OBJS) net.a(8390.o) diff -u --recursive --new-file v1.1.19/linux/drivers/net/apricot.c linux/drivers/net/apricot.c --- v1.1.19/linux/drivers/net/apricot.c Thu Jan 1 02:00:00 1970 +++ linux/drivers/net/apricot.c Fri Jun 17 07:53:52 1994 @@ -0,0 +1,930 @@ +/* apricot.c: An Apricot 82596 ethernet driver for linux. */ +/* + Apricot + Written 1994 by Mark Evans. + This driver is for the Apricot 82596 bus-master interface + + Driver skeleton + Written 1993 by Donald Becker. + Copyright 1993 United States Government as represented by the Director, + National Security Agency. This software may only be used and distributed + according to the terms of the GNU Public License as modified by SRC, + incorporated herein by reference. + + The author may be reached as becker@super.org or + C/O Supercomputing Research Ctr., 17100 Science Dr., Bowie MD 20715 + + +*/ + +static char *version = "apricot.c:v0.02 19/05/94\n"; + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#ifndef HAVE_PORTRESERVE +#define check_region(addr, size) 0 +#define snarf_region(addr, size) do ; while(0) +#endif + +#ifndef HAVE_ALLOC_SKB +#define alloc_skb(size, priority) (struct sk_buff *) kmalloc(size,priority) +#define kfree_skbmem(buff, size) kfree_s(buff,size) +#endif + +struct device *init_etherdev(struct device *dev, int sizeof_private, + unsigned long *mem_start); + +#define APRICOT_DEBUG 1 + +#ifdef APRICOT_DEBUG +int i596_debug = APRICOT_DEBUG; +#else +int i596_debug = 1; +#endif + +#define APRICOT_TOTAL_SIZE 17 + +#define CMD_EOL 0x8000 /* The last command of the list, stop. */ +#define CMD_SUSP 0x4000 /* Suspend after doing cmd. */ +#define CMD_INTR 0x2000 /* Interrupt after doing cmd. */ + +#define CMD_FLEX 0x0008 /* Enable flexable memory model */ + +enum commands { + CmdNOp = 0, CmdSASetup = 1, CmdConfigure = 2, CmdMulticastList = 3, + CmdTx = 4, CmdTDR = 5, CmdDump = 6, CmdDiagnose = 7}; + +#define STAT_C 0x8000 /* Set to 0 after execution */ +#define STAT_B 0x4000 /* Command being executed */ +#define STAT_OK 0x2000 /* Command executed ok */ +#define STAT_A 0x1000 /* Command aborted */ + +#define CUC_START 0x0100 +#define CUC_RESUME 0x0200 +#define CUC_SUSPEND 0x0300 +#define CUC_ABORT 0x0400 +#define RX_START 0x0010 +#define RX_RESUME 0x0020 +#define RX_SUSPEND 0x0030 +#define RX_ABORT 0x0040 + +struct i596_cmd { + unsigned short status; + unsigned short command; + struct i596_cmd *next; +}; + +#define EOF 0x8000 +#define SIZE_MASK 0x3fff + +struct i596_tbd { + unsigned short size; + unsigned short pad; + struct i596_tbd *next; + char *data; +}; + +struct tx_cmd { + struct i596_cmd cmd; + struct i596_tbd *tbd; + unsigned short size; + unsigned short pad; +}; + +struct i596_rfd { + unsigned short stat; + unsigned short cmd; + struct i596_rfd *next; + long rbd; + unsigned short count; + unsigned short size; + char data[1532]; +}; + +#define RX_RING_SIZE 16 + +struct i596_scb { + unsigned short status; + unsigned short command; + struct i596_cmd *cmd; + struct i596_rfd *rfd; + unsigned long crc_err; + unsigned long align_err; + unsigned long resource_err; + unsigned long over_err; + unsigned long rcvdt_err; + unsigned long short_err; + unsigned short t_on; + unsigned short t_off; +}; + +struct i596_iscp { + unsigned long stat; + struct i596_scb *scb; +}; + +struct i596_scp { + unsigned long sysbus; + unsigned long pad; + struct i596_iscp *iscp; +}; + +struct i596_private { + struct i596_scp scp; + struct i596_iscp iscp; + struct i596_scb scb; + struct i596_cmd set_add; + char eth_addr[8]; + struct i596_cmd set_conf; + char i596_config[16]; + struct i596_cmd tdr; + unsigned long stat; + struct i596_rfd rx[RX_RING_SIZE]; + int last_restart; + struct i596_rfd *rx_tail; + struct i596_cmd *cmd_tail; + struct i596_cmd *cmd_head; + int cmd_backlog; + unsigned long last_cmd; + struct enet_statistics stats; +}; + +char init_setup[] = { + 0x8E, /* length, prefetch on */ + 0xC8, /* fifo to 8, monitor off */ + 0x80, /* don't save bad frames */ + 0x2E, /* No source address insertion, 8 byte preamble */ + 0x00, /* priority and backoff defaults */ + 0x60, /* interframe spacing */ + 0x00, /* slot time LSB */ + 0xf2, /* slot time and retries */ + 0x00, /* promiscuous mode */ + 0x00, /* collision detect */ + 0x40, /* minimum frame length */ + 0xff, + 0x00, + 0x7f /* *multi IA */ }; + +char adds[] = {0x00, 0x00, 0x49, 0x20, 0x54, 0xDA, 0x80, 0x00, 0x4e, 0x02, 0xb7, 0xb8}; + +static int i596_open(struct device *dev); +static int i596_start_xmit(struct sk_buff *skb, struct device *dev); +static void i596_interrupt(int reg_ptr); +static int i596_close(struct device *dev); +static struct enet_statistics *i596_get_stats(struct device *dev); +static void i596_add_cmd(struct device *dev, struct i596_cmd *cmd); +static void i596_cleanup_cmd(struct i596_private *lp); +static void print_eth(char *); +#ifdef HAVE_MULTICAST +static void set_multicast_list(struct device *dev, int num_addrs, void *addrs); +#endif + + + +static inline void +init_rx_bufs(struct device *dev) +{ + struct i596_private *lp = (struct i596_private *)dev->priv; + int i; + int boguscnt = 50; + short ioaddr = dev->base_addr; + + if (i596_debug > 1) printk ("%s: init_rx_bufs.\n", dev->name); + + for (i = 0; i < RX_RING_SIZE; i++) + { + if (i == 0) + { + lp->scb.rfd = &lp->rx[0]; + } + if (i == (RX_RING_SIZE - 1)) + { + lp->rx_tail = &(lp->rx[i]); + lp->rx[i].next = &lp->rx[0]; + lp->rx[i].cmd = CMD_EOL; + } + else + { + lp->rx[i].next = &lp->rx[i+1]; + lp->rx[i].cmd = 0x0000; + } + lp->rx[i].stat = 0x0000; + lp->rx[i].rbd = 0xffffffff; + lp->rx[i].count = 0; + lp->rx[i].size = 1532; + } + + while (lp->scb.status, lp->scb.command) + if (--boguscnt == 0) + { + printk("%s: init_rx_bufs timed out with status %4.4x, cmd %4.4x.\n", + dev->name, lp->scb.status, lp->scb.command); + break; + } + + lp->scb.command = RX_START; + outw(0, ioaddr+4); + + return; + +} + +static inline void +init_i596_mem(struct device *dev) +{ + struct i596_private *lp = (struct i596_private *)dev->priv; + short ioaddr = dev->base_addr; + int boguscnt = 50; + + /* change the scp address */ + outw(0, ioaddr); + outw(0, ioaddr); + outb(4, ioaddr+0xf); + outw(((((int)&lp->scp) & 0xffff) | 2), ioaddr); + outw((((int)&lp->scp)>>16) & 0xffff, ioaddr); + + lp->last_cmd=jiffies; + + lp->scp.sysbus = 0x00440000; + lp->scp.iscp = &(lp->iscp); + lp->iscp.scb = &(lp->scb); + lp->iscp.stat = 0x0001; + lp->cmd_backlog = 0; + + lp->cmd_head = lp->scb.cmd = (struct i596_cmd *) -1; + + if (i596_debug > 2) printk("%s: starting i82596.\n", dev->name); + + (void) inb (ioaddr+0x10); + outb(4, ioaddr+0xf); + outw(0, ioaddr+4); + + while (lp->iscp.stat) + if (--boguscnt == 0) + { + printk("%s: i82596 initialization timed out with status %4.4x, cmd %4.4x.\n", + dev->name, lp->scb.status, lp->scb.command); + break; + } + + memcpy (lp->i596_config, init_setup, 14); + lp->set_conf.command = CmdConfigure; + i596_add_cmd(dev, &lp->set_conf); + + memcpy (lp->eth_addr, dev->dev_addr, 6); + lp->set_add.command = CmdSASetup; + i596_add_cmd(dev, &lp->set_add); + + lp->tdr.command = CmdTDR; + i596_add_cmd(dev, &lp->tdr); + + init_rx_bufs(dev); + + return; + +} + +static inline int +i596_rx(struct device *dev) +{ + struct i596_private *lp = (struct i596_private *)dev->priv; + int frames=0; + + if (i596_debug > 3) printk ("i596_rx()\n"); + + while ((lp->scb.rfd->stat) & STAT_C) + { + if (i596_debug >2) print_eth(lp->scb.rfd->data); + + if ((lp->scb.rfd->stat) & STAT_OK) + { + /* a good frame */ + int pkt_len = lp->scb.rfd->count & 0x3fff; + struct sk_buff *skb = alloc_skb(pkt_len, GFP_ATOMIC); + + frames++; + + if (skb == NULL) + { + printk ("%s: i596_rx Memory squeeze, dropping packet.\n", dev->name); + lp->stats.rx_dropped++; + break; + } + + skb->len = pkt_len; + skb->dev=dev; + memcpy(skb->data, lp->scb.rfd->data, pkt_len); + + netif_rx(skb); + lp->stats.rx_packets++; + + if (i596_debug > 4) print_eth(skb->data); + } + else + { + lp->stats.rx_errors++; + if ((lp->scb.rfd->stat) & 0x0001) lp->stats.collisions++; + if ((lp->scb.rfd->stat) & 0x0080) lp->stats.rx_length_errors++; + if ((lp->scb.rfd->stat) & 0x0100) lp->stats.rx_over_errors++; + if ((lp->scb.rfd->stat) & 0x0200) lp->stats.rx_fifo_errors++; + if ((lp->scb.rfd->stat) & 0x0400) lp->stats.rx_frame_errors++; + if ((lp->scb.rfd->stat) & 0x0800) lp->stats.rx_crc_errors++; + if ((lp->scb.rfd->stat) & 0x1000) lp->stats.rx_length_errors++; + } + + lp->scb.rfd->stat=0; + lp->rx_tail->cmd=0; + lp->rx_tail=lp->scb.rfd; + lp->scb.rfd=lp->scb.rfd->next; + lp->rx_tail->count=0; + lp->rx_tail->cmd=CMD_EOL; + + } + + if (i596_debug > 3) printk ("frames %d\n", frames); + + return 0; +} + + +static void i596_add_cmd(struct device *dev, struct i596_cmd *cmd) +{ + struct i596_private *lp = (struct i596_private *)dev->priv; + int ioaddr = dev->base_addr; + unsigned long flags; + int boguscnt = 50; + + if (i596_debug > 4) printk ("i596_add_cmd\n"); + + cmd->status = 0; + cmd->command |= (CMD_EOL|CMD_INTR); + cmd->next = (struct i596_cmd *) -1; + + save_flags(flags); + cli(); + if (lp->cmd_head != (struct i596_cmd *) -1) + lp->cmd_tail->next = cmd; + else + { + lp->cmd_head=cmd; + while (lp->scb.status, lp->scb.command) + if (--boguscnt == 0) + { + printk("i596_add_cmd timed out with status %4.4x, cmd %4.4x.\n", + lp->scb.status, lp->scb.command); + break; + } + + lp->scb.cmd = cmd; + lp->scb.command = CUC_START; + outw (0, ioaddr+4); + } + lp->cmd_tail=cmd; + lp->cmd_backlog++; + + lp->cmd_head=lp->scb.cmd; + restore_flags(flags); + + if (lp->cmd_backlog > 8) + { + int tickssofar = jiffies - lp->last_cmd; + if (tickssofar < 10) + return; + printk("%s: command unit timed out, status resetting.\n", + dev->name); + + boguscnt = 50; + while (lp->scb.status, lp->scb.command) + if (--boguscnt == 0) + { + printk("i596_add_cmd timed out with status %4.4x, cmd %4.4x.\n", + lp->scb.status, lp->scb.command); + break; + } + lp->scb.command=CUC_ABORT|RX_ABORT; + outw(0, ioaddr+4); + + i596_cleanup_cmd(lp); + i596_rx(dev); + init_i596_mem(dev); + } + +} + + + +static void i596_cleanup_cmd(struct i596_private *lp) +{ + struct i596_cmd *ptr; + int boguscnt = 50; + + if (i596_debug > 4) printk ("i596_cleanup_cmd\n"); + + while (lp->cmd_head != (struct i596_cmd *) -1) + { + ptr = lp->cmd_head; + + lp->cmd_head = lp->cmd_head->next; + lp->cmd_backlog--; + + switch ((ptr->command) & 0x7) + { + case CmdTx: + { + struct tx_cmd *tx_cmd = (struct tx_cmd *) ptr; + struct sk_buff *skb = ((struct sk_buff *)(tx_cmd->tbd->data)) -1; + + dev_kfree_skb(skb, FREE_WRITE); + + lp->stats.tx_errors++; + lp->stats.tx_aborted_errors++; + + ptr->next = (struct i596_cmd * ) -1; + kfree_s((unsigned char *)tx_cmd, (sizeof (struct tx_cmd) + sizeof (struct i596_tbd))); + break; + } + case CmdMulticastList: + { + unsigned short count = *((unsigned short *) (ptr + 1)); + + ptr->next = (struct i596_cmd * ) -1; + kfree_s((unsigned char *)ptr, (sizeof (struct i596_cmd) + count + 2)); + break; + } + default: + ptr->next = (struct i596_cmd * ) -1; + } + } + + while (lp->scb.status, lp->scb.command) + if (--boguscnt == 0) + { + printk("i596_cleanup_cmd timed out with status %4.4x, cmd %4.4x.\n", + lp->scb.status, lp->scb.command); + break; + } + + lp->scb.cmd = lp->cmd_head; +} + + + + +static int +i596_open(struct device *dev) +{ + if (request_irq(dev->irq, &i596_interrupt)) { + return -EAGAIN; + } + + irq2dev_map[dev->irq] = dev; + + if (i596_debug > 1) + printk("%s: i596_open() irq %d.\n", + dev->name, dev->irq); + + dev->tbusy = 0; + dev->interrupt = 0; + dev->start = 1; + + /* Initialize the 82596 memory */ + init_i596_mem(dev); + + return 0; /* Always succeed */ +} + +static int +i596_start_xmit(struct sk_buff *skb, struct device *dev) +{ + struct i596_private *lp = (struct i596_private *)dev->priv; + int ioaddr = dev->base_addr; + struct tx_cmd *tx_cmd; + + if (i596_debug > 2) printk ("%s: Apricot start xmit\n", dev->name); + + /* Transmitter timeout, serious problems. */ + if (dev->tbusy) { + int tickssofar = jiffies - dev->trans_start; + if (tickssofar < 5) + return 1; + printk("%s: transmit timed out, status resetting.\n", + dev->name); + lp->stats.tx_errors++; + /* Try to restart the adaptor */ + if (lp->last_restart == lp->stats.tx_packets) { + if (i596_debug > 1) printk ("Resetting board.\n"); + /* Shutdown and restart */ + + lp->scb.command=CUC_ABORT|RX_ABORT; + outw(0, ioaddr+4); + + i596_cleanup_cmd(lp); + init_i596_mem(dev); + } else { + /* Issue a channel attention signal */ + if (i596_debug > 1) printk ("Kicking board.\n"); + + lp->scb.command=CUC_START|RX_START; + outw(0, ioaddr+4); + + lp->last_restart = lp->stats.tx_packets; + } + dev->tbusy = 0; + dev->trans_start = jiffies; + } + + /* If some higher level thinks we've misses a tx-done interrupt + we are passed NULL. n.b. dev_tint handles the cli()/sti() + itself. */ + if (skb == NULL) { + dev_tint(dev); + return 0; + } + + /* shouldn't happen */ + if (skb->len <= 0) return 0; + + if (i596_debug > 3) printk("%s: i596_start_xmit() called\n", dev->name); + + /* Block a timer-based transmit from overlapping. This could better be + done with atomic_swap(1, dev->tbusy), but set_bit() works as well. */ + if (set_bit(0, (void*)&dev->tbusy) != 0) + printk("%s: Transmitter access conflict.\n", dev->name); + else + { + short length = ETH_ZLEN < skb->len ? skb->len : ETH_ZLEN; + dev->trans_start=jiffies; + + tx_cmd = (struct tx_cmd *) kmalloc ((sizeof (struct tx_cmd) + sizeof (struct i596_tbd)), GFP_ATOMIC); + if (tx_cmd == NULL) + { + printk ("%s: i596_xmit Memory squeeze, dropping packet.\n", dev->name); + lp->stats.tx_dropped++; + + dev_kfree_skb(skb, FREE_WRITE); + } + else + { + tx_cmd->tbd = (struct i596_tbd *) (tx_cmd + 1); + tx_cmd->tbd->next = (struct i596_tbd *) -1; + + tx_cmd->cmd.command = CMD_FLEX|CmdTx; + + tx_cmd->pad = 0; + tx_cmd->size = 0; + tx_cmd->tbd->pad = 0; + tx_cmd->tbd->size = EOF | length; + + tx_cmd->tbd->data = skb->data; + + if (i596_debug > 3) print_eth(skb->data); + + i596_add_cmd(dev, (struct i596_cmd *)tx_cmd); + + lp->stats.tx_packets++; + } + } + + dev->tbusy = 0; + + return 0; +} + + + +static void print_eth(char *add) +{ + int i; + + printk ("Dest "); + for (i = 0; i < 6; i++) + printk(" %2.2X", (unsigned char)add[i]); + printk ("\n"); + + printk ("Source"); + for (i = 0; i < 6; i++) + printk(" %2.2X", (unsigned char)add[i+6]); + printk ("\n"); + printk ("type %2.2X%2.2X\n", (unsigned char)add[12], (unsigned char)add[13]); +} + +unsigned long apricot_init(unsigned long mem_start, unsigned long mem_end) +{ + struct device *dev; + int i; + int checksum = 0; + int ioaddr = 0x300; + + /* this is easy the ethernet interface can only be at 0x300 */ + /* first check nothing is already registered here */ + + if (check_region(ioaddr, APRICOT_TOTAL_SIZE)) + return mem_start; + + /* very similar to the SMC card except that the checksum is 0x200 */ + for (i = 0; i < 8; i++) + checksum += inb(ioaddr + 8 + i); + + if (checksum != 0x200) return mem_start; + + dev = init_etherdev(0, (sizeof (struct i596_private) + 0xf), &mem_start); + + printk("%s: Apricot 82596 at %#3x,", dev->name, ioaddr); + + for (i = 0; i < 6; i++) + printk(" %2.2X", dev->dev_addr[i] = inb(ioaddr +8 + i)); + + dev->base_addr = ioaddr; + dev->irq = 10; + printk(" IRQ %d.\n", dev->irq); + + snarf_region(ioaddr, APRICOT_TOTAL_SIZE); + + if (i596_debug > 0) + printk(version); + + /* The APRICOT-specific entries in the device structure. */ + dev->open = &i596_open; + dev->stop = &i596_close; + dev->hard_start_xmit = &i596_start_xmit; + dev->get_stats = &i596_get_stats; +#ifdef HAVE_MULTICAST + dev->set_multicast_list = &set_multicast_list; +#endif + + /* align for scp */ + dev->priv = (void *)(((int) dev->priv + 0xf) & 0xfffffff0); + + return mem_start; +} + + +static void +i596_interrupt(int reg_ptr) +{ + int irq = -(((struct pt_regs *)reg_ptr)->orig_eax+2); + struct device *dev = (struct device *)(irq2dev_map[irq]); + struct i596_private *lp; + short ioaddr; + int boguscnt = 100; + unsigned short status, ack_cmd=0; + + if (dev == NULL) { + printk ("i596_interrupt(): irq %d for unknown device.\n", irq); + return; + } + + if (i596_debug > 3) printk ("%s: i596_interrupt(): irq %d\n",dev->name, irq); + + if (dev->interrupt) + printk("%s: Re-entering the interrupt handler.\n", dev->name); + + dev->interrupt = 1; + + ioaddr = dev->base_addr; + + lp = (struct i596_private *)dev->priv; + + while (lp->scb.status, lp->scb.command) + if (--boguscnt == 0) + { + printk("%s: i596 interrupt, timeout status %4.4x command %4.4x.\n", dev->name, lp->scb.status, lp->scb.command); + break; + } + status = lp->scb.status; + + if (i596_debug > 4) + printk("%s: i596 interrupt, status %4.4x.\n", dev->name, status); + + ack_cmd = status & 0xf000; + + if ((status & 0x8000) || (status & 0x2000)) + { + struct i596_cmd *ptr; + + if ((i596_debug > 4) && (status & 0x8000)) + printk("%s: i596 interrupt completed command.\n", dev->name); + if ((i596_debug > 4) && (status & 0x2000)) + printk("%s: i596 interrupt command unit inactive %x.\n", dev->name, status & 0x0700); + + while ((lp->cmd_head != (struct i596_cmd *) -1) && (lp->cmd_head->status & STAT_C)) + { + ptr = lp->cmd_head; + + lp->cmd_head = lp->cmd_head->next; + lp->cmd_backlog--; + + switch ((ptr->command) & 0x7) + { + case CmdTx: + { + struct tx_cmd *tx_cmd = (struct tx_cmd *) ptr; + struct sk_buff *skb = ((struct sk_buff *)(tx_cmd->tbd->data)) -1; + + dev_kfree_skb(skb, FREE_WRITE); + + if ((ptr->status) & STAT_OK) + { + if (i596_debug >2) print_eth(skb->data); + } + else + { + lp->stats.tx_errors++; + if ((ptr->status) & 0x0020) lp->stats.collisions++; + if (!((ptr->status) & 0x0040)) lp->stats.tx_heartbeat_errors++; + if ((ptr->status) & 0x0400) lp->stats.tx_carrier_errors++; + if ((ptr->status) & 0x0800) lp->stats.collisions++; + if ((ptr->status) & 0x1000) lp->stats.tx_aborted_errors++; + } + + + ptr->next = (struct i596_cmd * ) -1; + kfree_s((unsigned char *)tx_cmd, (sizeof (struct tx_cmd) + sizeof (struct i596_tbd))); + break; + } + case CmdMulticastList: + { + unsigned short count = *((unsigned short *) (ptr + 1)); + + ptr->next = (struct i596_cmd * ) -1; + kfree_s((unsigned char *)ptr, (sizeof (struct i596_cmd) + count + 2)); + break; + } + case CmdTDR: + { + unsigned long status = *((unsigned long *) (ptr + 1)); + + if (status & 0x8000) + { + if (i596_debug > 3) + printk("%s: link ok.\n", dev->name); + } + else + { + if (status & 0x4000) + printk("%s: Transceiver problem.\n", dev->name); + if (status & 0x2000) + printk("%s: Termination problem.\n", dev->name); + if (status & 0x1000) + printk("%s: Short circuit.\n", dev->name); + + printk("%s: Time %ld.\n", dev->name, status & 0x07ff); + } + } + default: + ptr->next = (struct i596_cmd * ) -1; + + lp->last_cmd=jiffies; + } + } + + ptr = lp->cmd_head; + while ((ptr != (struct i596_cmd *) -1) && (ptr != lp->cmd_tail)) + { + ptr->command &= 0x1fff; + ptr = ptr->next; + } + + if ((lp->cmd_head != (struct i596_cmd *) -1) && (dev->start)) ack_cmd |= CUC_START; + lp->scb.cmd = lp->cmd_head; + } + + if ((status & 0x1000) || (status & 0x4000)) + { + if ((i596_debug > 4) && (status & 0x4000)) + printk("%s: i596 interrupt received a frame.\n", dev->name); + if ((i596_debug > 4) && (status & 0x1000)) + printk("%s: i596 interrupt receive unit inactive %x.\n", dev->name, status & 0x0070); + + i596_rx(dev); + + if (dev->start) ack_cmd |= RX_START; + } + + /* acknowlage the interrupt */ + +/* + if ((lp->scb.cmd != (struct i596_cmd *) -1) && (dev->start)) ack_cmd |= CUC_START; +*/ + boguscnt = 100; + while (lp->scb.status, lp->scb.command) + if (--boguscnt == 0) + { + printk("%s: i596 interrupt, timeout status %4.4x command %4.4x.\n", dev->name, lp->scb.status, lp->scb.command); + break; + } + lp->scb.command = ack_cmd; + + (void) inb (ioaddr+0x10); + outb (4, ioaddr+0xf); + outw (0, ioaddr+4); + + if (i596_debug > 4) + printk("%s: exiting interrupt.\n", dev->name); + + dev->interrupt = 0; + return; +} + +static int +i596_close(struct device *dev) +{ + int ioaddr = dev->base_addr; + struct i596_private *lp = (struct i596_private *)dev->priv; + + dev->start = 0; + dev->tbusy = 1; + + if (i596_debug > 1) + printk("%s: Shutting down ethercard, status was %4.4x.\n", + dev->name, lp->scb.status); + + lp->scb.command = CUC_ABORT|RX_ABORT; + outw(0, ioaddr+4); + + i596_cleanup_cmd(lp); + + free_irq(dev->irq); + irq2dev_map[dev->irq] = 0; + + return 0; +} + +static struct enet_statistics * +i596_get_stats(struct device *dev) +{ + struct i596_private *lp = (struct i596_private *)dev->priv; + + return &lp->stats; +} + +#ifdef HAVE_MULTICAST +/* Set or clear the multicast filter for this adaptor. + num_addrs == -1 Promiscuous mode, receive all packets + num_addrs == 0 Normal mode, clear multicast list + num_addrs > 0 Multicast mode, receive normal and MC packets, and do + best-effort filtering. + */ +static void +set_multicast_list(struct device *dev, int num_addrs, void *addrs) +{ + struct i596_private *lp = (struct i596_private *)dev->priv; + struct i596_cmd *cmd; + + if (i596_debug > 1) + printk ("%s: set multicast list %d\n", dev->name, num_addrs); + + if (num_addrs > 0) { + cmd = (struct i596_cmd *) kmalloc(sizeof(struct i596_cmd)+2+num_addrs*6, GFP_ATOMIC); + if (cmd == NULL) + { + printk ("%s: set_multicast Memory squeeze.\n", dev->name); + return; + } + + cmd->command = CmdMulticastList; + *((unsigned short *) (cmd + 1)) = num_addrs * 6; + memcpy (((char *)(cmd + 1))+2, addrs, num_addrs * 6); + print_eth (((char *)(cmd + 1)) + 2); + + i596_add_cmd(dev, cmd); + } else + { + if (lp->set_conf.next != (struct i596_cmd * ) -1) return; + if (num_addrs == 0) + lp->i596_config[8] &= ~0x01; + else + lp->i596_config[8] |= 0x01; + + i596_add_cmd(dev, &lp->set_conf); + } + +} +#endif + +#ifdef HAVE_DEVLIST +static unsigned int apricot_portlist[] = {0x300, 0}; +struct netdev_entry apricot_drv = +{"apricot", apricot_init, APRICOT_TOTAL_SIZE, apricot_portlist}; +#endif + +/* + * Local variables: + * compile-command: "gcc -D__KERNEL__ -I/usr/src/linux/net/inet -Wall -Wstrict-prototypes -O6 -m486 -c apricot.c" + * End: + */ diff -u --recursive --new-file v1.1.19/linux/drivers/net/at1700.c linux/drivers/net/at1700.c --- v1.1.19/linux/drivers/net/at1700.c Tue May 24 00:34:51 1994 +++ linux/drivers/net/at1700.c Fri Jun 17 07:53:53 1994 @@ -406,8 +406,7 @@ /* Turn on Tx interrupts back on. */ outb(0x82, ioaddr + TX_INTR); } - if (skb->free) - kfree_skb (skb, FREE_WRITE); + dev_kfree_skb (skb, FREE_WRITE); return 0; } diff -u --recursive --new-file v1.1.19/linux/drivers/net/atp.c linux/drivers/net/atp.c --- v1.1.19/linux/drivers/net/atp.c Tue May 24 00:34:51 1994 +++ linux/drivers/net/atp.c Fri Jun 17 07:53:53 1994 @@ -471,8 +471,7 @@ write_reg_high(ioaddr, IMR, ISRh_RxErr); } - if (skb->free) - kfree_skb (skb, FREE_WRITE); + dev_kfree_skb (skb, FREE_WRITE); return 0; } diff -u --recursive --new-file v1.1.19/linux/drivers/net/de600.c linux/drivers/net/de600.c --- v1.1.19/linux/drivers/net/de600.c Tue May 24 00:34:51 1994 +++ linux/drivers/net/de600.c Fri Jun 17 07:53:53 1994 @@ -453,8 +453,7 @@ sti(); /* interrupts back on */ - if (skb->free) - kfree_skb (skb, FREE_WRITE); + dev_kfree_skb (skb, FREE_WRITE); return 0; } @@ -751,7 +750,7 @@ #ifdef MODULE char kernel_version[] = UTS_RELEASE; static struct device de600_dev = { - "" /*"de600"*/, 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, de600_probe }; + " " /*"de600"*/, 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, de600_probe }; int init_module(void) diff -u --recursive --new-file v1.1.19/linux/drivers/net/depca.c linux/drivers/net/depca.c --- v1.1.19/linux/drivers/net/depca.c Tue Apr 19 22:20:34 1994 +++ linux/drivers/net/depca.c Fri Jun 17 07:53:53 1994 @@ -914,9 +914,7 @@ dev->tbusy=0; } - if (skb->free) { - kfree_skb (skb, FREE_WRITE); - } + dev_kfree_skb (skb, FREE_WRITE); } return 0; diff -u --recursive --new-file v1.1.19/linux/drivers/net/dummy.c linux/drivers/net/dummy.c --- v1.1.19/linux/drivers/net/dummy.c Thu Jun 2 13:50:54 1994 +++ linux/drivers/net/dummy.c Fri Jun 17 07:53:53 1994 @@ -89,8 +89,7 @@ if (skb == NULL || dev == NULL) return 0; - if (skb->free) - kfree_skb(skb, FREE_WRITE); + dev_kfree_skb(skb, FREE_WRITE); #if DUMMY_STATS stats = (struct enet_statistics *)dev->priv; diff -u --recursive --new-file v1.1.19/linux/drivers/net/eexpress.c linux/drivers/net/eexpress.c --- v1.1.19/linux/drivers/net/eexpress.c Tue May 24 00:34:51 1994 +++ linux/drivers/net/eexpress.c Fri Jun 17 07:53:54 1994 @@ -494,8 +494,7 @@ outb(0x08 | irqrmap[dev->irq], ioaddr + SET_IRQ); } - if (skb->free) - kfree_skb (skb, FREE_WRITE); + dev_kfree_skb (skb, FREE_WRITE); /* You might need to clean up and record Tx statistics here. */ lp->stats.tx_aborted_errors++; diff -u --recursive --new-file v1.1.19/linux/drivers/net/lance.c linux/drivers/net/lance.c --- v1.1.19/linux/drivers/net/lance.c Tue May 24 00:34:51 1994 +++ linux/drivers/net/lance.c Fri Jun 17 07:53:54 1994 @@ -555,14 +555,8 @@ memcpy(&lp->tx_bounce_buffs[entry], skb->data, skb->len); lp->tx_ring[entry].base = (int)(lp->tx_bounce_buffs + entry) | 0x83000000; - if (skb->free) - kfree_skb (skb, FREE_WRITE); + dev_kfree_skb (skb, FREE_WRITE); } else { - /* We can't free the packet yet, so we inform the memory management - code that we are still using it. */ - - skb_kept_by_device(skb); - lp->tx_ring[entry].base = (int)(skb->data) | 0x83000000; } lp->cur_tx++; @@ -650,7 +644,7 @@ if (databuff >= (void*)(&lp->tx_bounce_buffs[TX_RING_SIZE]) || databuff < (void*)(lp->tx_bounce_buffs)) { struct sk_buff *skb = ((struct sk_buff *)databuff) - 1; - skb_device_release(skb,FREE_WRITE); + dev_kfree_skb(skb,FREE_WRITE); /* Warning: skb may well vanish at the point you call device_release! */ diff -u --recursive --new-file v1.1.19/linux/drivers/net/loopback.c linux/drivers/net/loopback.c --- v1.1.19/linux/drivers/net/loopback.c Tue May 24 00:34:51 1994 +++ linux/drivers/net/loopback.c Fri Jun 17 07:53:54 1994 @@ -56,9 +56,12 @@ } dev->tbusy = 1; sti(); + + /* FIXME: Optimise so buffers with skb->free=1 are not copied but + instead are lobbed from tx queue to rx queue */ done = dev_rint(skb->data, skb->len, 0, dev); - if (skb->free) kfree_skb(skb, FREE_WRITE); + dev_kfree_skb(skb, FREE_WRITE); while (done != 1) { done = dev_rint(NULL, 0, 0, dev); diff -u --recursive --new-file v1.1.19/linux/drivers/net/net_init.c linux/drivers/net/net_init.c --- v1.1.19/linux/drivers/net/net_init.c Thu Jun 2 13:50:54 1994 +++ linux/drivers/net/net_init.c Fri Jun 17 07:53:54 1994 @@ -52,7 +52,10 @@ static struct device *ethdev_index[MAX_ETH_CARDS]; unsigned long lance_init(unsigned long mem_start, unsigned long mem_end); +unsigned long pi_init(unsigned long mem_start, unsigned long mem_end); +unsigned long apricot_init(unsigned long mem_start, unsigned long mem_end); + /* net_dev_init() is our network device initialization routine. It's called from init/main.c with the start and end of free memory, @@ -67,6 +70,9 @@ #endif #if defined(CONFIG_PI) mem_start = pi_init(mem_start, mem_end); +#endif +#if defined(CONFIG_APRICOT) + mem_start = apricot_init(mem_start, mem_end); #endif return mem_start; } diff -u --recursive --new-file v1.1.19/linux/drivers/net/plip.c linux/drivers/net/plip.c --- v1.1.19/linux/drivers/net/plip.c Thu Jun 2 13:50:55 1994 +++ linux/drivers/net/plip.c Fri Jun 17 07:53:55 1994 @@ -1,56 +1,29 @@ +/* plip.c: A parallel port "network" driver for linux. */ +/* This driver is for parallel port with 5-bit cable (LapLink (R) cable). */ /* - * Plip.c: A parallel port "network" driver for linux. - */ - -/* - * Developement History: - * - * Original version and the name 'PLIP' from Donald Becker - * inspired by Russ Nelson's parallel port packet driver. - * Further development by Tommy Thorn - * Some changes by Tanabe Hiroyasu - * Upgraded for PL12 by Donald Becker - * Minor hacks by Alan Cox to get it working - * more reliably (Ha!) - * Changes even more Peter Bauer (100136.3530@compuserve.com) - * Protocol changed back to original plip as in crynwr's packet-drivers. - * Tested this against ncsa-telnet 2.3 and pcip_pkt using plip.com (which - * contains "version equ 0" and ";History:562,1" in the firts 2 - * source-lines 28-Mar-94 - * - * Modularised it (Alan Cox). Will upgrade to Niibe's PLIP once its settled - * down better. - * + * Authors: Donald Becker, + * Tommy Thorn, + * Tanabe Hiroyasu, + * Alan Cox, + * Peter Bauer, <100136.3530@compuserve.com> + * Niibe Yutaka, * - * This is parallel port packet pusher. It's actually more general - * than the "IP" in its name suggests -- but 'plip' is just such a - * great name! - * - * - * Bugs: Please read this: The PLIP driver is a nasty hack and like all nasty hacks - * has some 'features'. + * This is the all improved state based PLIP that Niibe Yutaka has contributed. * - * Can lock machines solid if one end goes down or crashes, or due to cable faults. - * Can lock both machines solid on a broadcast collision. - * Some laptops don't have all the wires we use. - * Doesn't match the original Russ Nelson protocol so won't talk to Amiga or PC drivers. - * Waits far too long with interrupts off [X is unbearable, forget action games, xntp is a joke] - * Doesn't work on some fast 486DX machines + * Modularization by Alan Cox. I also added the plipconfig program to tune the timeouts + * and ifmap support for funny serial port settings or setting odd values using the + * modular plip. I also took the panic() calls out. I don't like panic - especially when + * it can be avoided. * - * If it works be thankful, if not fix it! - * - * Info: - * I got 15K/second NFS throughput (about 20-25K second IP). I also got some ethernet cards - * so don't ask me for help. This code needs a real major rewrite. Any volunteers ? - * - ***** So we can all compare loads of different PLIP drivers for a bit I've modularised this beastie too. - ***** In addition a seperate bidirectional plip module can be done. - * - * WARNING: The PRE 1.1.16 plip will NOT work with this PLIP driver. We - * can't avoid this due to an error in the old plip module. If you must - * mix PLIP's you'll need to fix the _OLD_ one to use 0xFC 0xFC as its - * MAC header not 0xFD. - * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* + * Original version and the name 'PLIP' from Donald Becker + * inspired by Russ Nelson's parallel port packet driver. */ static char *version = @@ -58,29 +31,28 @@ #ifdef MODULE "MODULAR " #endif - "PLIP.010 (from plip.c:v0.15 for 0.99pl12+, 8/11/93)\n"; + "PLIP.010+ gniibe@mri.co.jp\n"; #include /* Sources: - Ideas and protocols came from Russ Nelson's (nelson@crynwr.com) + Ideas and protocols came from Russ Nelson's "parallel.asm" parallel port packet driver. - TANABE Hiroyasu changes the protocol. + The "Crynwr" parallel port standard specifies the following protocol: send header nibble '8' - type octet '0xfd' or '0xfc' count-low octet count-high octet ... data octets checksum octet -Each octet is sent as + Each octet is sent as >4)&0x0F)> The cable used is a de facto standard parallel null cable -- sold as a "LapLink" cable by various places. You'll need a 10-conductor cable to make one yourself. The wiring is: - INIT 16 - 16 SLCTIN 17 - 17 + SLCTIN 17 - 17 GROUND 25 - 25 D0->ERROR 2 - 15 15 - 2 D1->SLCT 3 - 13 13 - 3 @@ -89,7 +61,7 @@ D4->BUSY 6 - 11 11 - 6 Do not connect the other pins. They are D5,D6,D7 are 7,8,9 - STROBE is 1, FEED is 14 + STROBE is 1, FEED is 14, INIT is 16 extra grounds are 18,19,20,21,22,23,24 */ @@ -105,157 +77,298 @@ #include #include #include +#include #include #include #include +#include +#include +#include +#include +#include + #ifdef MODULE #include #include "../../tools/version.h" #endif -#ifdef PRINTK -#undef PRINTK -#endif -#ifdef PRINTK2 -#undef PRINTK2 +/* use 0 for production, 1 for verification, >2 for debug */ +#ifndef NET_DEBUG +#define NET_DEBUG 3 #endif +static unsigned int net_debug = NET_DEBUG; -#define PLIP_DEBUG /* debugging */ -#undef PLIP_DEBUG2 /* debugging with more varbose report */ +/* constants */ +#define PLIP_MTU 1500 -#ifdef PLIP_DEBUG -#define PRINTK(x) printk x -#else -#define PRINTK(x) /**/ -#endif -#ifdef PLIP_DEBUG2 -#define PRINTK2(x) printk x -#else -#define PRINTK2(x) /**/ -#endif +/* In micro second */ +#define PLIP_DELAY_UNIT 1 -/* The map from IRQ number (as passed to the interrupt handler) to - 'struct device'. */ -extern struct device *irq2dev_map[16]; +/* Connection time out = PLIP_TRIGGER_WAIT * PLIP_DELAY_UNIT usec */ +#define PLIP_TRIGGER_WAIT 500 -/* Network statistics, with the same names as 'struct enet_statistics'. */ -#define netstats enet_statistics +/* Nibble time out = PLIP_NIBBLE_WAIT * PLIP_DELAY_UNIT usec */ +#define PLIP_NIBBLE_WAIT 3000 -/* constants */ -#define PAR_DATA 0 -#define PAR_STATUS 1 -#define PAR_CONTROL 2 -#define PLIP_MTU 1600 -#define PLIP_HEADER_TYPE1 0xfd -#define PLIP_HEADER_TYPE2 0xfc +#define PAR_DATA(dev) (dev->base_addr+0) +#define PAR_STATUS(dev) (dev->base_addr+1) +#define PAR_CONTROL(dev) (dev->base_addr+2) /* Index to functions, as function prototypes. */ -extern int plip_probe(int ioaddr, struct device *dev); +static int plip_tx_packet(struct sk_buff *skb, struct device *dev); static int plip_open(struct device *dev); static int plip_close(struct device *dev); -static int plip_tx_packet(struct sk_buff *skb, struct device *dev); -static int plip_header (unsigned char *buff, struct device *dev, - unsigned short type, void *dest, - void *source, unsigned len, struct sk_buff *skb); - -/* variables used internally. */ -#define INITIALTIMEOUTFACTOR 4 -#define MAXTIMEOUTFACTOR 20 -static int timeoutfactor = INITIALTIMEOUTFACTOR; +static int plip_header(unsigned char *buff, struct device *dev, + unsigned short type, void *dest, + void *source, unsigned len, struct sk_buff *skb); +static struct enet_statistics *plip_get_stats(struct device *dev); +static int plip_rebuild_header(void *buff, struct device *dev, + unsigned long raddr, struct sk_buff *skb); + +enum plip_state { + PLIP_ST_DONE=0, + PLIP_ST_TRANSMIT_BEGIN, + PLIP_ST_TRIGGER, + PLIP_ST_LENGTH_LSB, + PLIP_ST_LENGTH_MSB, + PLIP_ST_DATA, + PLIP_ST_CHECKSUM, + PLIP_ST_ERROR +}; + +enum plip_nibble_state { + PLIP_NST_BEGIN, + PLIP_NST_1, + PLIP_NST_2, + PLIP_NST_END +}; + +#define PLIP_STATE_STRING(x) \ + (((x) == PLIP_ST_DONE)?"0":\ + ((x) == PLIP_ST_TRANSMIT_BEGIN)?"b":\ + ((x) == PLIP_ST_TRIGGER)?"t":\ + ((x) == PLIP_ST_LENGTH_LSB)?"l":\ + ((x) == PLIP_ST_LENGTH_MSB)?"m":\ + ((x) == PLIP_ST_DATA)?"d":\ + ((x) == PLIP_ST_CHECKSUM)?"s":"B") + +struct plip_local { + enum plip_state state; + enum plip_nibble_state nibble; + unsigned short length; + unsigned short count; + unsigned short byte; + unsigned char checksum; + unsigned char data; + struct sk_buff *skb; +}; + +struct net_local { + struct enet_statistics e; + struct timer_list tl; + struct plip_local snd_data; + struct plip_local rcv_data; + unsigned long trigger_us; + unsigned long nibble_us; +}; /* Routines used internally. */ static void plip_device_clear(struct device *dev); -static void plip_receiver_error(struct device *dev); -static void plip_set_physicaladdr(struct device *dev, unsigned long ipaddr); -static int plip_addrcmp(struct ethhdr *eth); -static void cold_sleep(int tics); -static void plip_interrupt(int reg_ptr); /* Dispatch from interrupts. */ -static int plip_receive_packet(struct device *dev); -static int plip_send_packet(struct device *dev, unsigned char *buf, int length); -static int plip_send_start(struct device *dev, struct ethhdr *eth); -static void double_timeoutfactor(void); -static struct enet_statistics *plip_get_stats(struct device *dev); +static void plip_error(struct device *dev); +static int plip_receive(struct device *dev, enum plip_nibble_state *ns_p, + unsigned char *data_p); +static void plip_receive_packet(struct device *dev); +static void plip_interrupt(int reg_ptr); +static int plip_send(struct device *dev, enum plip_nibble_state *ns_p, + unsigned char data); +static void plip_send_packet(struct device *dev); +static int plip_ioctl(struct device *dev, struct ifreq *ifr); +static int plip_config(struct device *dev, struct ifmap *map); + int plip_init(struct device *dev) { - int port_base = dev->base_addr; int i; + struct net_local *pl; /* Check that there is something at base_addr. */ - outb(0x00, port_base + PAR_CONTROL); - outb(0x55, port_base + PAR_DATA); - if (inb(port_base + PAR_DATA) != 0x55) + outb(0x00, PAR_CONTROL(dev)); + outb(0x00, PAR_DATA(dev)); + if (inb(PAR_DATA(dev)) != 0x00) return -ENODEV; /* Alpha testers must have the version number to report bugs. */ -#ifdef PLIP_DEBUG - { - static int version_shown = 0; - if (! version_shown) - printk(version), version_shown++; + if (net_debug) + printk(version); + + if (dev->irq) { + printk("%s: configured for parallel port at %#3x, IRQ %d.\n", + dev->name, dev->base_addr, dev->irq); + } else { + printk("%s: configured for parallel port at %#3x", + dev->name, dev->base_addr); + autoirq_setup(0); + outb(0x00, PAR_CONTROL(dev)); + outb(0x10, PAR_CONTROL(dev)); + outb(0x00, PAR_CONTROL(dev)); + dev->irq = autoirq_report(1); + if (dev->irq) + printk(", probed IRQ %d.\n", dev->irq); + else { + printk(", failed to detect IRQ line.\n"); + return -ENODEV; + } } -#endif /* Initialize the device structure. */ - dev->priv = kmalloc(sizeof(struct netstats), GFP_KERNEL); - memset(dev->priv, 0, sizeof(struct netstats)); + dev->rmem_end = (unsigned long) NULL; + dev->rmem_start = (unsigned long) NULL; + dev->mem_end = (unsigned long) NULL; + dev->mem_start = (unsigned long) NULL; + + dev->priv = kmalloc(sizeof (struct net_local), GFP_KERNEL); + memset(dev->priv, 0, sizeof(struct net_local)); + pl=dev->priv; + + pl->trigger_us = PLIP_TRIGGER_WAIT; + pl->nibble_us = PLIP_NIBBLE_WAIT; + + dev->mtu = PLIP_MTU; + dev->hard_start_xmit = plip_tx_packet; + dev->open = plip_open; + dev->stop = plip_close; + dev->hard_header = plip_header; + dev->type_trans = eth_type_trans; + dev->get_stats = plip_get_stats; + dev->set_config = plip_config; + dev->do_ioctl = plip_ioctl; + + dev->hard_header_len = ETH_HLEN; + dev->addr_len = ETH_ALEN; + dev->type = ARPHRD_ETHER; + dev->rebuild_header = plip_rebuild_header; + for (i = 0; i < DEV_NUMBUFFS; i++) skb_queue_head_init(&dev->buffs[i]); - dev->hard_header = &plip_header; - dev->rebuild_header = eth_rebuild_header; - dev->type_trans = eth_type_trans; - - dev->open = &plip_open; - dev->stop = &plip_close; - dev->hard_start_xmit = &plip_tx_packet; - dev->get_stats = &plip_get_stats; - - /* These are ethernet specific. */ - dev->type = ARPHRD_ETHER; - dev->hard_header_len = ETH_HLEN; - dev->mtu = PLIP_MTU; /* PLIP may later negotiate max pkt size */ - dev->addr_len = ETH_ALEN; for (i = 0; i < dev->addr_len; i++) { dev->broadcast[i]=0xff; dev->dev_addr[i] = 0; } - printk("%s: configured for parallel port at %#3x, IRQ %d.\n", - dev->name, dev->base_addr, dev->irq); + + /* New-style flags. */ + dev->flags = 0; + dev->family = AF_INET; + dev->pa_addr = 0; + dev->pa_brdaddr = 0; + dev->pa_dstaddr = 0; + dev->pa_mask = 0; + dev->pa_alen = sizeof(unsigned long); - /* initialize internal value */ - timeoutfactor = INITIALTIMEOUTFACTOR; return 0; } +static int +plip_tx_packet (struct sk_buff *skb, struct device *dev) +{ + struct net_local *lp = (struct net_local *)dev->priv; + struct plip_local *snd = &lp->snd_data; + + if (dev->tbusy) { + /* it is sending a packet now */ + int tickssofar = jiffies - dev->trans_start; + if (tickssofar < 100) /* please try later, again */ + return 1; + + /* something wrong... force to reset */ + printk("%s: transmit timed out, cable problem??\n", dev->name); + plip_device_clear(dev); + } + + /* If some higher layer thinks we've missed an tx-done interrupt + we are passed NULL. Caution: dev_tint() handles the cli()/sti() + itself. */ + if (skb == NULL) { + dev_tint(dev); + return 0; + } + + cli(); + if (set_bit(0, (void *)&dev->tbusy) != 0) { + sti(); + printk("%s: Transmitter access conflict.\n", dev->name); + return 1; + } + if (dev->interrupt) { + sti(); + return 1; + } + snd->state = PLIP_ST_TRANSMIT_BEGIN; + sti(); + + dev->trans_start = jiffies; + if (net_debug > 4) + printk("Ss"); + + if (skb->len > dev->mtu) { + printk("%s: packet too big, %d.\n", dev->name, (int)skb->len); + return 0; + } + + snd->skb = skb; + snd->length = skb->len; + snd->count = 0; + + cli(); + if (dev->interrupt == 0) { + /* set timer */ + lp->tl.expires = 0; + lp->tl.data = (unsigned long)dev; + lp->tl.function = (void (*)(unsigned long))plip_send_packet; + add_timer(&lp->tl); + mark_bh(TIMER_BH); + } + snd->state = PLIP_ST_TRIGGER; + sti(); + + return 0; +} + /* Open/initialize the board. This is called (in the current kernel) - sometime after booting when the 'config name>' program is + sometime after booting when the 'ifconfig' program is run. This routine gets exclusive access to the parallel port by allocating its IRQ line. */ - -static int plip_open(struct device *dev) +static int +plip_open(struct device *dev) { - if (dev->irq == 0) - dev->irq = 7; + struct net_local *lp = (struct net_local *)dev->priv; + struct plip_local *rcv = &lp->rcv_data; + + rcv->skb = alloc_skb(dev->mtu, GFP_KERNEL); + if (rcv->skb == NULL) { + printk("%s: couldn't get memory for receiving packet.\n", dev->name); + return -EAGAIN; + } + rcv->skb->len = dev->mtu; + rcv->skb->dev = dev; cli(); - if (request_irq(dev->irq , &plip_interrupt) != 0) { - sti(); - PRINTK(("%s: couldn't get IRQ %d.\n", dev->name, dev->irq)); + if (request_irq(dev->irq , plip_interrupt) != 0) { + sti(); + printk("%s: couldn't get IRQ %d.\n", dev->name, dev->irq); return -EAGAIN; } - irq2dev_map[dev->irq] = dev; sti(); + /* enable rx interrupt. */ + outb(0x10, PAR_CONTROL(dev)); plip_device_clear(dev); - dev->tbusy = 0; - dev->interrupt = 0; dev->start = 1; #ifdef MODULE MOD_INC_USE_COUNT; @@ -263,516 +376,590 @@ return 0; } -/* The inverse routine to plip_open(). */ +/* The inverse routine to plip_open (). */ static int plip_close(struct device *dev) { + struct net_local *lp = (struct net_local *)dev->priv; + dev->tbusy = 1; dev->start = 0; cli(); free_irq(dev->irq); irq2dev_map[dev->irq] = NULL; sti(); - outb(0x00, dev->base_addr); /* Release the interrupt. */ + outb(0x00, PAR_DATA(dev)); + /* make sure that we don't register the timer */ + del_timer(&lp->tl); + /* release the interrupt. */ + outb(0x00, PAR_CONTROL(dev)); #ifdef MODULE MOD_DEC_USE_COUNT; #endif return 0; } +/* Fill in the MAC-level header. */ static int -plip_tx_packet(struct sk_buff *skb, struct device *dev) +plip_header(unsigned char *buff, struct device *dev, + unsigned short type, void *daddr, + void *saddr, unsigned len, struct sk_buff *skb) { - int ret_val; + int i; - if (dev->tbusy || dev->interrupt) { /* Do timeouts, to avoid hangs. */ - int tickssofar = jiffies - dev->trans_start; - if (tickssofar < 50) - return 1; - printk("%s: transmit timed out\n", dev->name); - /* Try to restart the adaptor. */ - plip_device_clear(dev); - return 0; + if (dev->dev_addr[0] == 0) { + for (i=0; i < ETH_ALEN - sizeof(unsigned long); i++) + dev->dev_addr[i] = 0xfc; + memcpy(&(dev->dev_addr[i]), &dev->pa_addr, sizeof(unsigned long)); } - /* If some higher layer thinks we've missed an tx-done interrupt - we are passed NULL. Caution: dev_tint() handles the cli()/sti() - itself. */ - if (skb == NULL) { - dev_tint(dev); - return 0; - } + return eth_header(buff, dev, type, daddr, saddr, len, skb); +} - dev->trans_start = jiffies; - ret_val = plip_send_packet(dev, skb->data, skb->len); - if (skb->free) - kfree_skb (skb, FREE_WRITE); - dev->tbusy = 0; - mark_bh (NET_BH); - return 0/*ret_val*/; +static struct enet_statistics * +plip_get_stats(struct device *dev) +{ + struct enet_statistics *localstats = (struct enet_statistics*)dev->priv; + return localstats; } +/* We don't need to send arp, for plip is point-to-point. */ static int -plip_header (unsigned char *buff, struct device *dev, - unsigned short type, void *daddr , - void *saddr, unsigned len, struct sk_buff *skb) +plip_rebuild_header(void *buff, struct device *dev, unsigned long dst, + struct sk_buff *skb) { - if (dev->dev_addr[0] == 0) { - /* set physical address */ - plip_set_physicaladdr(dev, dev->pa_addr); + struct ethhdr *eth = (struct ethhdr *)buff; + int i; + + if (eth->h_proto != htons(ETH_P_IP)) { + printk("plip_rebuild_header: Don't know how to resolve type %d addreses?\n",(int)eth->h_proto); + memcpy(eth->h_source, dev->dev_addr, dev->addr_len); + return 0; } - return eth_header(buff, dev, type, daddr, saddr, len, skb); + + for (i=0; i < ETH_ALEN - sizeof(unsigned long); i++) + eth->h_dest[i] = 0xfc; + memcpy(&(eth->h_dest[i]), &dst, sizeof(unsigned long)); + return 0; } static void - plip_device_clear(struct device *dev) +plip_device_clear(struct device *dev) { - dev->interrupt = 0; - dev->tbusy = 0; - outb(0x00, dev->base_addr + PAR_DATA); - outb(0x10, dev->base_addr + PAR_CONTROL); /* Enable the rx interrupt. */ -} + struct net_local *lp = (struct net_local *)dev->priv; -static void - plip_receiver_error(struct device *dev) -{ - dev->interrupt = 0; + outb (0x00, PAR_DATA(dev)); + lp->snd_data.state = PLIP_ST_DONE; + lp->rcv_data.state = PLIP_ST_DONE; + cli(); dev->tbusy = 0; - outb(0x02, dev->base_addr + PAR_DATA); - outb(0x10, dev->base_addr + PAR_CONTROL); /* Enable the rx interrupt. */ -} - -static int - get_byte(struct device *dev) -{ - unsigned char val, oldval; - unsigned char low_nibble; - int timeout; - int error = 0; - val = inb(dev->base_addr + PAR_STATUS); - timeout = jiffies + timeoutfactor * 2; - do { - oldval = val; - val = inb(dev->base_addr + PAR_STATUS); - if ( oldval != val ) continue; /* it's unstable */ - if ( timeout < jiffies ) { - error++; - break; - } - } while ( (val & 0x80) ); - val = inb(dev->base_addr + PAR_STATUS); - low_nibble = (val >> 3) & 0x0f; - outb(0x10, dev->base_addr + PAR_DATA); - timeout = jiffies + timeoutfactor * 2; - do { - oldval = val; - val = inb(dev->base_addr + PAR_STATUS); - if (oldval != val) continue; /* it's unstable */ - if ( timeout < jiffies ) { - error++; - break; - } - } while ( !(val & 0x80) ); - val = inb(dev->base_addr + PAR_STATUS); - PRINTK2(("%02x %s ", low_nibble | ((val << 1) & 0xf0), - error ? "t":"")); - outb(0x00, dev->base_addr + PAR_DATA); - if (error) { - /* timeout error */ - double_timeoutfactor(); - return -1; - } - return low_nibble | ((val << 1) & 0xf0); + dev->interrupt = 0; + /* make sure that we don't register the timer */ + del_timer(&lp->tl); + sti(); + enable_irq(dev->irq); } -/* The typical workload of the driver: - Handle the parallel port interrupts. */ static void - plip_interrupt(int reg_ptr) +plip_error(struct device *dev) { - int irq = -(((struct pt_regs *)reg_ptr)->orig_eax+2); - struct device *dev = irq2dev_map[irq]; - struct netstats *localstats; + struct net_local *lp = (struct net_local *)dev->priv; + struct plip_local *snd = &((struct net_local *)dev->priv)->snd_data; + struct plip_local *rcv = &lp->rcv_data; + unsigned char status; - if (dev == NULL) { - PRINTK(("plip_interrupt(): irq %d for unknown device.\n", irq)); - return; - } - localstats = (struct netstats*) dev->priv; - if (dev->tbusy || dev->interrupt) return; - dev->interrupt = 1; - outb(0x00, dev->base_addr + PAR_CONTROL); /* Disable the rx interrupt. */ - sti(); /* Allow other interrupts. */ - PRINTK2(("%s: interrupt. ", dev->name)); - - { - /* check whether the interrupt is valid or not.*/ - int timeout = jiffies + timeoutfactor; - while ((inb(dev->base_addr + PAR_STATUS) & 0xf8) != 0xc0) { - if ( timeout < jiffies ) { - PRINTK2(("%s: No interrupt (status=%#02x)!\n", - dev->name, inb(dev->base_addr + PAR_STATUS))); - plip_device_clear(dev); - return; - } + outb(0x00, PAR_DATA(dev)); + cli(); + del_timer(&lp->tl); + snd->state = PLIP_ST_ERROR; + sti(); + if (rcv->skb == NULL) { + rcv->skb = alloc_skb(dev->mtu, GFP_ATOMIC); + if (rcv->skb == NULL) { + printk("%s: couldn't get memory.\n", dev->name); + goto again; } + rcv->skb->len = dev->mtu; + rcv->skb->dev = dev; } - if (plip_receive_packet(dev)) { - /* get some error while receiving data */ - localstats->rx_errors++; - plip_receiver_error(dev); - } else { + + status = inb(PAR_STATUS(dev)); + if ((status & 0xf8) == 0x80) { plip_device_clear(dev); + mark_bh(NET_BH); + } else { + again: + lp->tl.expires = 1; + lp->tl.data = (unsigned long)dev; + lp->tl.function = (void (*)(unsigned long))plip_error; + add_timer(&lp->tl); } } - + +/* PLIP_RECEIVE --- receive a byte(two nibbles) + Return 0 on success, return 1 on failure */ static int -plip_receive_packet(struct device *dev) +plip_receive(struct device *dev, enum plip_nibble_state *ns_p, + unsigned char *data_p) { - unsigned length; - int checksum = 0; - struct sk_buff *skb; - struct netstats *localstats; - struct ethhdr eth; + unsigned char c0, c1; + unsigned int cx; + struct net_local *nl=(struct net_local *)dev->priv; + + while (1) + switch (*ns_p) { + case PLIP_NST_BEGIN: + cx = nl->nibble_us; + while (1) { + c0 = inb(PAR_STATUS(dev)); + udelay(PLIP_DELAY_UNIT); + if ((c0 & 0x80) == 0) { + c1 = inb(PAR_STATUS(dev)); + if (c0 == c1) + break; + } + if (--cx == 0) + return 1; + } + *data_p = (c0 >> 3) & 0x0f; + outb(0x10, PAR_DATA(dev)); /* send ACK */ + *ns_p = PLIP_NST_1; + break; - localstats = (struct netstats*) dev->priv; - - outb(1, dev->base_addr + PAR_DATA); /* Ack: 'Ready' */ + case PLIP_NST_1: + cx = nl->nibble_us; + while (1) { + c0 = inb(PAR_STATUS(dev)); + udelay(PLIP_DELAY_UNIT); + if (c0 & 0x80) { + c1 = inb(PAR_STATUS(dev)); + if (c0 == c1) + break; + } + if (--cx == 0) + return 1; + } + *data_p |= (c0 << 1) & 0xf0; + outb(0x00, PAR_DATA(dev)); /* send ACK */ + *ns_p = PLIP_NST_2; + return 0; + break; - { - /* get header octet and length of packet */ - - length = get_byte(dev); - length |= get_byte(dev) << 8; - { - int i; - unsigned char *eth_p = (unsigned char*)ð - for ( i = 0; i < sizeof(eth); i++, eth_p++) { - *eth_p = get_byte(dev); - } - } - PRINTK2(("length = %d\n", length)); - if (length > dev->mtu || length < 8) { - PRINTK2(("%s: bogus packet size %d.\n", dev->name, length)); - return 1; - } - } - { - /* get skb area from kernel and - * set appropriate values to skb - */ - skb = alloc_skb(length, GFP_ATOMIC); - if (skb == NULL) { - PRINTK(("%s: Couldn't allocate a sk_buff of size %d.\n", - dev->name,length)); - return 1; - } - skb->lock = 0; - } - { - /* phase of receiving the data */ - /* 'skb->data' points to the start of sk_buff data area. */ - unsigned char *buf = skb->data; - unsigned char *eth_p = (unsigned char *)ð - int i; - for ( i = 0; i < sizeof(eth); i++) { - checksum += *eth_p; - *buf++ = *eth_p++; - } - for ( i = 0; i < length - sizeof(eth); i++) { - unsigned char new_byte = get_byte(dev); - checksum += new_byte; - *buf++ = new_byte; - } - checksum &= 0xff; - if (checksum != get_byte(dev)) { - localstats->rx_crc_errors++; - PRINTK(("checksum error\n")); - return 1; - } else if(dev_rint((unsigned char *)skb, length, IN_SKBUFF, dev)) { - printk("%s: rcv buff full.\n", dev->name); - localstats->rx_dropped++; + default: + printk("plip:receive state error\n"); + *ns_p = PLIP_NST_2; return 1; - } - } - { - /* phase of terminating this connection */ - int timeout; - - timeout = jiffies + length * timeoutfactor / 16; - outb(0x00, dev->base_addr + PAR_DATA); - /* Wait for the remote end to reset. */ - while ( (inb(dev->base_addr + PAR_STATUS) & 0xf8) != 0x80 ) { - if (timeout < jiffies ) { - double_timeoutfactor(); - PRINTK(("Remote has not reset.\n")); - break; - } + break; } - } - localstats->rx_packets++; - return 0; } - -static int send_byte(struct device *dev, unsigned char val) +static void +plip_receive_packet(struct device *dev) { - int timeout; - int error = 0; - PRINTK2((" S%02x", val)); - outb((val & 0xf), dev->base_addr); /* this makes data bits more stable */ - /* (especially the &0xf :-> PB ) */ - outb(0x10 | (val & 0xf), dev->base_addr); - timeout = jiffies + timeoutfactor; - while( inb(dev->base_addr+PAR_STATUS) & 0x80 ) - if ( timeout < jiffies ) { - error++; - break; - } - outb(0x10 | (val >> 4), dev->base_addr); - outb(val >> 4, dev->base_addr); - timeout = jiffies + timeoutfactor; - while( (inb(dev->base_addr+PAR_STATUS) & 0x80) == 0 ) - if ( timeout < jiffies ) { - error++; - break; - } - if (error) { - /* timeout error */ - double_timeoutfactor(); - PRINTK2(("t")); - return -1; - } - return 0; -} -/* - * plip_send_start - * trigger remoto rx interrupt and establish a connection. - * - * return value - * 0 : establish the connection - * -1 : connection failed. - */ -static int -plip_send_start(struct device *dev, struct ethhdr *eth) -{ - int timeout; - int status; - int lasttrigger; - struct netstats *localstats = (struct netstats*) dev->priv; - - /* This starts the packet protocol by triggering a remote IRQ. */ - timeout = jiffies + timeoutfactor * 16; - lasttrigger = jiffies; - while ( ((status = inb(dev->base_addr+PAR_STATUS)) & 0x08) == 0 ) { - dev->tbusy = 1; - outb(0x00, dev->base_addr + PAR_CONTROL); /* Disable my rx intr. */ - outb(0x08, dev->base_addr + PAR_DATA); /* Trigger remote rx intr. */ - if (status & 0x40) { - /* The remote end is also trying to send a packet. - * Only one end may go to the receiving phase, - * so we use the "ethernet" address (set from the IP address) - * to determine which end dominates. - */ - if ( plip_addrcmp(eth) > 0 ) { - localstats->collisions++; - PRINTK2(("both ends are trying to send a packet.\n")); - if (plip_receive_packet(dev)) { - /* get some error while receiving data */ - localstats->rx_errors++; - outb(0x02, dev->base_addr + PAR_DATA); - } else { - outb(0x00, dev->base_addr + PAR_DATA); - } - cold_sleep(2); /* make sure that remote end is ready */ + struct net_local *lp = (struct net_local *)dev->priv; + struct enet_statistics *stats = (struct enet_statistics *) dev->priv; + struct plip_local *snd = &lp->snd_data; + struct plip_local *rcv = &lp->rcv_data; + unsigned char *lbuf = rcv->skb->data; + unsigned char c0; + unsigned char *s = PLIP_STATE_STRING(rcv->state); + + if (net_debug > 4) + printk("R%s",s); + + while (1) { + switch (rcv->state) { + case PLIP_ST_TRIGGER: + disable_irq(dev->irq); + rcv->state = PLIP_ST_LENGTH_LSB; + rcv->nibble = PLIP_NST_BEGIN; + break; + + case PLIP_ST_LENGTH_LSB: + if (plip_receive(dev, &rcv->nibble, (unsigned char *)&rcv->length)) + goto try_again; + + rcv->state = PLIP_ST_LENGTH_MSB; + rcv->nibble = PLIP_NST_BEGIN; + break; + + case PLIP_ST_LENGTH_MSB: + if (plip_receive(dev, &rcv->nibble, + (unsigned char *)&rcv->length+1)) + goto try_again; + + if (rcv->length > rcv->skb->len || rcv->length < 8) { + printk("%s: bogus packet size %d.\n", dev->name, rcv->length); + plip_error(dev); + return; } - continue; /* restart send sequence */ - } - if (lasttrigger != jiffies) { - /* trigger again */ - outb(0x00, dev->base_addr + PAR_DATA); - cold_sleep(1); - lasttrigger = jiffies; - } - if (timeout < jiffies) { - double_timeoutfactor(); + rcv->skb->len = rcv->length; + rcv->state = PLIP_ST_DATA; + rcv->nibble = PLIP_NST_BEGIN; + rcv->byte = 0; + rcv->checksum = 0; + break; + + case PLIP_ST_DATA: + if (plip_receive(dev, &rcv->nibble, &lbuf[rcv->byte])) + goto try_again; + + rcv->checksum += lbuf[rcv->byte]; + rcv->byte++; + rcv->nibble = PLIP_NST_BEGIN; + if (rcv->byte == rcv->length) + rcv->state = PLIP_ST_CHECKSUM; + break; + + case PLIP_ST_CHECKSUM: + if (plip_receive(dev, &rcv->nibble, &rcv->data)) + goto try_again; + if (rcv->data != rcv->checksum) { + stats->rx_crc_errors++; + if (net_debug) + printk("%s: checksum error\n", dev->name); + plip_error(dev); + return; + } + + rcv->state = PLIP_ST_DONE; + netif_rx(rcv->skb); + + /* Malloc up new buffer. */ + rcv->skb = alloc_skb(dev->mtu, GFP_ATOMIC); + if (rcv->skb == NULL) { + printk("%s: Memory squeeze.\n", dev->name); + plip_error(dev); + return; + } + rcv->skb->len = dev->mtu; + rcv->skb->dev = dev; + stats->rx_packets++; + if (net_debug > 4) + printk("R(%4.4d)", rcv->length); + + if (snd->state == PLIP_ST_TRANSMIT_BEGIN) { + dev->interrupt = 0; + enable_irq(dev->irq); + } else if (snd->state == PLIP_ST_TRIGGER) { + cli(); + dev->interrupt = 0; + if (net_debug > 3) + printk("%%"); + lp->tl.expires = 0; + lp->tl.data = (unsigned long)dev; + lp->tl.function + = (void (*)(unsigned long))plip_send_packet; + add_timer(&lp->tl); + mark_bh(TIMER_BH); + enable_irq(dev->irq); + sti(); + } else + plip_device_clear(dev); + return; + + default: + printk("plip: bad STATE?? %04d", rcv->state); plip_device_clear(dev); - localstats->tx_errors++; - PRINTK(("%s: Connect failed in send_packet().\n", - dev->name)); - /* We failed to send the packet. To emulate the ethernet we - should pretent the send worked fine */ - return -1; + return; } } - return 0; + + try_again: + if (++rcv->count > 2) { /* timeout */ + s = PLIP_STATE_STRING(rcv->state); + c0 = inb(PAR_STATUS(dev)); + stats->rx_dropped++; + if (net_debug > 1) + printk("%s: receive timeout(%s,%02x)... reset interface.\n", + dev->name, s, (unsigned int)c0); + plip_error(dev); + } else { + s = PLIP_STATE_STRING(rcv->state); + if (net_debug > 3) + printk("r%s",s); + + /* set timer */ + lp->tl.expires = 1; + lp->tl.data = (unsigned long)dev; + lp->tl.function = (void (*)(unsigned long))plip_receive_packet; + add_timer(&lp->tl); + } } -static int -plip_send_packet(struct device *dev, unsigned char *buf, int length) + +/* Handle the parallel port interrupts. */ +static void +plip_interrupt(int reg_ptr) { - int error = 0; - struct netstats *localstats; + int irq = -(((struct pt_regs *)reg_ptr)->orig_eax+2); + struct device *dev = irq2dev_map[irq]; + struct net_local *lp = (struct net_local *)dev->priv; + struct plip_local *rcv = &lp->rcv_data; + struct plip_local *snd = &lp->snd_data; + unsigned char c0; - PRINTK2(("%s: plip_send_packet(%d) %02x %02x %02x %02x %02x...", - dev->name, length, buf[0], buf[1], buf[2], buf[3], buf[4])); - if (length > dev->mtu) { - printk("%s: packet too big, %d.\n", dev->name, length); - return 0; + if (dev == NULL) { + if (net_debug) + printk ("plip_interrupt: irq %d for unknown device.\n", irq); + return; } - localstats = (struct netstats*) dev->priv; - { - /* phase of checking remote status */ - int i; - int timeout = jiffies + timeoutfactor * 8; - while ( (i = (inb(dev->base_addr+PAR_STATUS) & 0xe8)) != 0x80 ) { - if (i == 0x78) { - /* probably cable is not connected */ - /* Implementation Note: - * This status should result in 'Network unreachable'. - * but I don't know the way. - */ - return 0; - } - if (timeout < jiffies) { - /* remote end is not ready */ - double_timeoutfactor(); - localstats->tx_errors++; - PRINTK(("remote end is not ready.\n")); - return 1; /* Failed to send the packet */ - } - } + if (dev->interrupt) { + if (net_debug > 3) + printk("2"); + return; } - /* phase of making a connection */ - if (plip_send_start(dev, (struct ethhdr *)buf) < 0) - return 1; - { - /* send packet's length - the byte order has changed now and then. Today it's sent as in - the original crynwr-plip ... - Gruss PB - */ - send_byte(dev, length); - send_byte(dev, length >> 8); - } - { - /* phase of sending data */ - int i; - int checksum = 0; - - for ( i = 0; i < sizeof(struct ethhdr); i++ ) { - send_byte(dev, *buf); - checksum += *buf++; - } - - for (i = 0; i < length - sizeof(struct ethhdr); i++) { - checksum += buf[i]; - if (send_byte(dev, buf[i]) < 0) { - error++; - break; - } + if (dev->tbusy) { + if (snd->state > PLIP_ST_TRIGGER) { + printk("%s: rx interrupt in transmission\n", dev->name); + return; } - send_byte(dev, checksum & 0xff); + if (net_debug > 3) + printk("3"); } - { - /* phase of terminating this connection */ - int timeout; - - outb(0x00, dev->base_addr + PAR_DATA); - /* Wait for the remote end to reset. */ - timeout = jiffies + ((length * timeoutfactor) >> 4); - while ((inb(dev->base_addr + PAR_STATUS) & 0xe8) != 0x80) { - if (timeout < jiffies ) { - double_timeoutfactor(); - PRINTK(("Remote end has not reset.\n")); - error++; - break; - } - } - if (inb(dev->base_addr + PAR_STATUS) & 0x10) { - /* receiver reports error */ - error++; - } + + if (snd->state == PLIP_ST_ERROR) + return; + + c0 = inb(PAR_STATUS(dev)); + if ((c0 & 0xf8) != 0xc0) { + if (net_debug > 3) + printk("?"); + return; } - plip_device_clear(dev); - localstats->tx_packets++; - PRINTK2(("plip_send_packet(%d) done.\n", length)); - return error?1:0; + + dev->interrupt = 1; + + if (net_debug > 3) + printk("!"); + + dev->last_rx = jiffies; + outb(0x01, PAR_DATA(dev)); /* send ACK */ + rcv->state = PLIP_ST_TRIGGER; + rcv->count = 0; + + /* set timer */ + del_timer(&lp->tl); + lp->tl.expires = 0; + lp->tl.data = (unsigned long)dev; + lp->tl.function = (void (*)(unsigned long))plip_receive_packet; + add_timer(&lp->tl); + mark_bh (TIMER_BH); } -/* - * some trivial functions - */ -static void -plip_set_physicaladdr(struct device *dev, unsigned long ipaddr) +/* PLIP_SEND --- send a byte (two nibbles) + Return 0 on success, return 1 on failure */ +static int +plip_send(struct device *dev, enum plip_nibble_state *ns_p, unsigned char data) { - /* - * set physical address to - * 0xfc.0xfc.ipaddr - */ + unsigned char c0; + unsigned int cx; + struct net_local *nl= (struct net_local *)dev->priv; + + while (1) + switch (*ns_p) { + case PLIP_NST_BEGIN: + outb((data & 0x0f), PAR_DATA(dev)); + *ns_p = PLIP_NST_1; + break; - unsigned char *addr = dev->dev_addr; - int i; + case PLIP_NST_1: + outb(0x10 | (data & 0x0f), PAR_DATA(dev)); + cx = nl->nibble_us; + while (1) { + c0 = inb(PAR_STATUS(dev)); + if ((c0 & 0x80) == 0) + break; + if (--cx == 0) /* time out */ + return 1; + } + outb(0x10 | (data >> 4), PAR_DATA(dev)); + *ns_p = PLIP_NST_2; + break; - if ((ipaddr >> 24) == 0 || (ipaddr >> 24) == 0xff) return; - PRINTK2(("%s: set physical address to %08x\n", dev->name, ipaddr)); - for (i=0; i < ETH_ALEN - sizeof(unsigned long); i++) { - addr[i] = 0xfd; - } - memcpy(&(addr[i]), &ipaddr, sizeof(unsigned long)); -} + case PLIP_NST_2: + outb((data >> 4), PAR_DATA(dev)); + cx = nl->nibble_us; + while (1) { + c0 = inb(PAR_STATUS(dev)); + if (c0 & 0x80) + break; + if (--cx == 0) /* time out */ + return 1; + } + return 0; -static int -plip_addrcmp(struct ethhdr *eth) -{ - int i; - for ( i = ETH_ALEN - 1; i >= 0; i-- ) { - if (eth->h_dest[i] > eth->h_source[i]) return -1; - if (eth->h_dest[i] < eth->h_source[i]) return 1; - } - PRINTK2(("h_dest = %08x%04x h_source = %08x%04x\n", - *(long*)ð->h_dest[2],*(short*)ð->h_dest[0], - *(long*)ð->h_source[2],*(short*)ð->h_source[0])); - return 0; + default: + printk("plip:send state error\n"); + return 1; + } } -/* This function is evil, evil, evil. This should be a - _kernel_, rescheduling sleep!. */ static void -cold_sleep(int tics) +plip_send_packet(struct device *dev) { - int start = jiffies; - while(jiffies < start + tics) - ; /* do nothing */ - return; -} + struct enet_statistics *stats = (struct enet_statistics *) dev->priv; + struct net_local *lp = (struct net_local *)dev->priv; + struct plip_local *snd = &lp->snd_data; + unsigned char *lbuf = snd->skb->data; + unsigned char c0; + unsigned int cx; + unsigned char *s = PLIP_STATE_STRING(snd->state); + + if (net_debug > 4) + printk("S%s",s); + + while (1) { + switch (snd->state) { + case PLIP_ST_TRIGGER: + /* Trigger remote rx interrupt. */ + outb(0x08, PAR_DATA(dev)); + cx = lp->trigger_us; + while (1) { + if (dev->interrupt) { + stats->collisions++; + if (net_debug > 3) + printk("$"); + mark_bh(TIMER_BH); + return; + } + cli(); + c0 = inb(PAR_STATUS(dev)); + if (c0 & 0x08) { + disable_irq(dev->irq); + if (net_debug > 3) + printk("+"); + /* OK, connection established! */ + snd->state = PLIP_ST_LENGTH_LSB; + snd->nibble = PLIP_NST_BEGIN; + snd->count = 0; + sti(); + break; + } + sti(); + udelay(PLIP_DELAY_UNIT); + if (--cx == 0) { + outb(0x00, PAR_DATA(dev)); + goto try_again; + } + } + break; -static void - double_timeoutfactor() -{ - timeoutfactor *= 2; - if (timeoutfactor >= MAXTIMEOUTFACTOR) { - timeoutfactor = MAXTIMEOUTFACTOR; + case PLIP_ST_LENGTH_LSB: + if (plip_send(dev, &snd->nibble, snd->length & 0xff)) /* timeout */ + goto try_again; + + snd->state = PLIP_ST_LENGTH_MSB; + snd->nibble = PLIP_NST_BEGIN; + break; + + case PLIP_ST_LENGTH_MSB: + if (plip_send(dev, &snd->nibble, snd->length >> 8)) /* timeout */ + goto try_again; + + snd->state = PLIP_ST_DATA; + snd->nibble = PLIP_NST_BEGIN; + snd->byte = 0; + snd->checksum = 0; + break; + + case PLIP_ST_DATA: + if (plip_send(dev, &snd->nibble, lbuf[snd->byte])) /* timeout */ + goto try_again; + + snd->nibble = PLIP_NST_BEGIN; + snd->checksum += lbuf[snd->byte]; + snd->byte++; + if (snd->byte == snd->length) + snd->state = PLIP_ST_CHECKSUM; + break; + + case PLIP_ST_CHECKSUM: + if (plip_send(dev, &snd->nibble, snd->checksum)) /* timeout */ + goto try_again; + + mark_bh(NET_BH); + plip_device_clear(dev); + if (net_debug > 4) + printk("S(%4.4d)", snd->length); + dev_kfree_skb(snd->skb, FREE_WRITE); + stats->tx_packets++; + return; + + default: + printk("plip: BAD STATE?? %04d", snd->state); + plip_device_clear(dev); + return; + } + } + + try_again: + if (++snd->count > 3) { + /* timeout */ + s = PLIP_STATE_STRING(snd->state); + c0 = inb(PAR_STATUS(dev)); + stats->tx_errors++; + stats->tx_aborted_errors++; + if (net_debug > 1) + printk("%s: transmit timeout(%s,%02x)... reset interface.\n", + dev->name, s, (unsigned int)c0); + dev_kfree_skb(snd->skb,FREE_WRITE); + plip_error(dev); + } else { + s = PLIP_STATE_STRING(snd->state); + if (net_debug > 3) + printk("s%s",s); + + cli(); + if (dev->interrupt == 0) { + /* set timer */ + lp->tl.expires = 1; + lp->tl.data = (unsigned long)dev; + lp->tl.function = (void (*)(unsigned long))plip_send_packet; + add_timer(&lp->tl); + } + sti(); } - return; } -static struct enet_statistics * -plip_get_stats(struct device *dev) +static int plip_config(struct device *dev, struct ifmap *map) { - struct netstats *localstats = (struct netstats*) dev->priv; - return localstats; -} - + if(dev->flags&IFF_UP) + return -EBUSY; /* - * Local variables: - * compile-command: "gcc -D__KERNEL__ -Wall -O6 -fomit-frame-pointer -x c++ -c plip.c" - * version-control: t - * kept-new-versions: 5 - * End: + * We could probe this for verification, but since they told us + * to do it then they can suffer. */ + if(map->base_addr!= (unsigned short)-1) + dev->base_addr=map->base_addr; + if(map->irq!= (unsigned char)-1) + dev->irq= map->irq; + return 0; +} + +static int plip_ioctl(struct device *dev, struct ifreq *rq) +{ + struct net_local *nl=(struct net_local *)dev->priv; + struct plipconf *pc=(struct plipconf *)rq->ifr_data; + + switch(pc->pcmd) + { + case PLIP_GET_TIMEOUT: + pc->trigger=nl->trigger_us; + pc->nibble=nl->nibble_us; + break; + case PLIP_SET_TIMEOUT: + nl->trigger_us=pc->trigger; + nl->nibble_us=pc->nibble; + break; + default: + return -EOPNOTSUPP; + } + return 0; +} + #ifdef MODULE char kernel_version[] = UTS_RELEASE; @@ -827,21 +1014,34 @@ unregister_netdev(&dev_plip0); if(dev_plip0.priv) { - kfree_s(dev_plip0.priv,sizeof(struct netstats)); + kfree_s(dev_plip0.priv,sizeof(struct net_local)); dev_plip0.priv=NULL; } unregister_netdev(&dev_plip1); if(dev_plip1.priv) { - kfree_s(dev_plip1.priv,sizeof(struct netstats)); + kfree_s(dev_plip1.priv,sizeof(struct net_local)); dev_plip0.priv=NULL; } unregister_netdev(&dev_plip2); if(dev_plip2.priv) { - kfree_s(dev_plip2.priv,sizeof(struct netstats)); + kfree_s(dev_plip2.priv,sizeof(struct net_local)); dev_plip2.priv=NULL; } } } #endif /* MODULE */ + +/* + * Local variables: + * compile-command: "gcc -D__KERNEL__ -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer -c plip.c" + * c-indent-level: 4 + * c-continued-statement-offset: 4 + * c-brace-offset: -4 + * c-argdecl-indent: 4 + * c-label-offset: -4 + * version-control: t + * kept-new-versions: 10 + * End: + */ diff -u --recursive --new-file v1.1.19/linux/drivers/net/ppp.c linux/drivers/net/ppp.c --- v1.1.19/linux/drivers/net/ppp.c Fri Jun 17 15:20:05 1994 +++ linux/drivers/net/ppp.c Fri Jun 17 07:53:55 1994 @@ -1798,8 +1798,7 @@ ppp_kick_tty(ppp); done: - if (skb->free) - kfree_skb(skb, FREE_WRITE); + dev_kfree_skb(skb, FREE_WRITE); return 0; } diff -u --recursive --new-file v1.1.19/linux/drivers/net/skeleton.c linux/drivers/net/skeleton.c --- v1.1.19/linux/drivers/net/skeleton.c Tue May 24 00:34:52 1994 +++ linux/drivers/net/skeleton.c Fri Jun 17 07:53:55 1994 @@ -292,8 +292,7 @@ hardware_send_packet(ioaddr, buf, length); dev->trans_start = jiffies; } - if (skb->free) - kfree_skb (skb, FREE_WRITE); + dev_kfree_skb (skb, FREE_WRITE); /* You might need to clean up and record Tx statistics here. */ if (inw(ioaddr) == /*RU*/81) diff -u --recursive --new-file v1.1.19/linux/drivers/net/slip.c linux/drivers/net/slip.c --- v1.1.19/linux/drivers/net/slip.c Fri Jun 17 15:20:05 1994 +++ linux/drivers/net/slip.c Fri Jun 17 07:53:56 1994 @@ -464,41 +464,11 @@ /* We were not, so we are now... :-) */ if (skb != NULL) { -#if 0 -#ifdef CONFIG_AX25 - if(sl->mode & SL_MODE_AX25) - { - if(!skb->arp && dev->rebuild_header(skb->data,dev)) - { - skb->dev=dev; - arp_queue(skb); - return 0; - } - skb->arp=1; - } -#endif -#endif sl_lock(sl); size=skb->len; -#if 0 - if(!(sl->mode&SL_MODE_AX25)) - { - if(sizedata))->tot_len; - size=ntohs(size); - /* sl_hex_dump(skb->data,skb->len);*/ - } - } -#endif sl_encaps(sl, skb->data, size); - if (skb->free) - kfree_skb(skb, FREE_WRITE); + dev_kfree_skb(skb, FREE_WRITE); } return(0); } diff -u --recursive --new-file v1.1.19/linux/drivers/scsi/scsi_ioctl.c linux/drivers/scsi/scsi_ioctl.c --- v1.1.19/linux/drivers/scsi/scsi_ioctl.c Tue Apr 19 10:52:48 1994 +++ linux/drivers/scsi/scsi_ioctl.c Fri Jun 17 08:11:58 1994 @@ -147,7 +147,7 @@ Scsi_Cmnd * SCpnt; unsigned char opcode; int inlen, outlen, cmdlen; - int needed; + int needed, buf_needed; int result; if (!buffer) @@ -159,11 +159,11 @@ cmd_in = (char *) ( ((int *)buffer) + 2); opcode = get_fs_byte(cmd_in); - needed = (inlen > outlen ? inlen : outlen); - if(needed){ - needed = (needed + 511) & ~511; - if (needed > MAX_BUF) needed = MAX_BUF; - buf = (char *) scsi_malloc(needed); + needed = buf_needed = (inlen > outlen ? inlen : outlen); + if(buf_needed){ + buf_needed = (buf_needed + 511) & ~511; + if (buf_needed > MAX_BUF) buf_needed = MAX_BUF; + buf = (char *) scsi_malloc(buf_needed); if (!buf) return -ENOMEM; } else buf = NULL; @@ -202,7 +202,7 @@ }; result = SCpnt->result; SCpnt->request.dev = -1; /* Mark as not busy */ - if (buf) scsi_free(buf, needed); + if (buf) scsi_free(buf, buf_needed); if(scsi_devices[SCpnt->index].scsi_request_fn) (*scsi_devices[SCpnt->index].scsi_request_fn)(); diff -u --recursive --new-file v1.1.19/linux/drivers/scsi/ultrastor.c linux/drivers/scsi/ultrastor.c --- v1.1.19/linux/drivers/scsi/ultrastor.c Fri Jun 17 15:20:07 1994 +++ linux/drivers/scsi/ultrastor.c Fri Jun 17 08:11:57 1994 @@ -13,6 +13,7 @@ /* * TODO: * 1. Find out why scatter/gather is limited to 16 requests per command. + * This is fixed, at least on the 24F, as of version 1.12 - CAE. * 2. Look at command linking (mscp.command_link and * mscp.command_link_id). (Does not work with many disks, * and no performance increase. ERY). @@ -38,6 +39,15 @@ * unfinished, questionable, or wrong. */ +/* Changes from version 1.11 alpha to 1.12 + * + * Increased the size of the scatter-gather list to 33 entries for + * the 24F adapter (it was 16). I don't have the specs for the 14F + * or the 34F, so they may support larger s-g lists as well. + * + * Caleb Epstein + */ + /* Changes from version 1.9 to 1.11 * * Patches to bring this driver up to speed with the default kernel @@ -138,7 +148,7 @@ #define ULTRASTOR_DEBUG (UD_ABORT|UD_CSIR|UD_RESET) #endif -#define VERSION "1.11 alpha" +#define VERSION "1.12" #define ARRAY_SIZE(arr) (sizeof (arr) / sizeof (arr)[0]) @@ -183,7 +193,7 @@ the MSCP structure because they are associated with SCSI requests. */ void (*done)(Scsi_Cmnd *); Scsi_Cmnd *SCint; - ultrastor_sg_list sglist[ULTRASTOR_14F_MAX_SG]; + ultrastor_sg_list sglist[ULTRASTOR_24F_MAX_SG]; /* use larger size for 24F */ }; @@ -504,6 +514,7 @@ static int ultrastor_24f_detect(int hostnum) { register int i; + struct Scsi_Host * shpnt = NULL; #if (ULTRASTOR_DEBUG & UD_DETECT) printk("US24F: detect"); @@ -580,8 +591,13 @@ config.host_number = hostnum; scsi_hosts[hostnum].this_id = config.ha_scsi_id; scsi_hosts[hostnum].unchecked_isa_dma = 0; - scsi_hosts[hostnum].sg_tablesize = ULTRASTOR_14F_MAX_SG; + scsi_hosts[hostnum].sg_tablesize = ULTRASTOR_24F_MAX_SG; + shpnt = scsi_register(hostnum, 0); + shpnt->irq = config.interrupt; + shpnt->dma_channel = config.dma_channel; + shpnt->io_port = config.port_address; + #if ULTRASTOR_MAX_CMDS > 1 config.mscp_free = ~0; #endif @@ -594,7 +610,7 @@ outb(ultrastor_bus_reset ? 0xc2 : 0x82, LCL_DOORBELL_MASK(addr+12)); outb(0x02, SYS_DOORBELL_MASK(addr+12)); printk("UltraStor driver version " VERSION ". Using %d SG lists.\n", - ULTRASTOR_14F_MAX_SG); + scsi_hosts[hostnum].sg_tablesize); return TRUE; } return FALSE; @@ -810,6 +826,10 @@ if(config.slot) return SCSI_ABORT_SNOOZE; /* Do not attempt an abort for the 24f */ + /* Simple consistency checking */ + if(!SCpnt->host_scribble) + return SCSI_ABORT_NOT_RUNNING; + mscp_index = ((struct mscp *)SCpnt->host_scribble) - config.mscp; if (mscp_index >= ULTRASTOR_MAX_CMDS) panic("Ux4F aborting invalid MSCP"); @@ -899,7 +919,7 @@ #if ULTRASTOR_DEBUG & UD_ABORT if (config.mscp[mscp_index].SCint != SCpnt) - printk("abort: command mismatch, %x != %x\n", + printk("abort: command mismatch, %p != %p\n", config.mscp[mscp_index].SCint, SCpnt); #endif if (config.mscp[mscp_index].SCint == 0) @@ -915,7 +935,6 @@ /* Need to set a timeout here in case command never completes. */ return SCSI_ABORT_SUCCESS; - } int ultrastor_reset(Scsi_Cmnd * SCpnt) diff -u --recursive --new-file v1.1.19/linux/drivers/scsi/ultrastor.h linux/drivers/scsi/ultrastor.h --- v1.1.19/linux/drivers/scsi/ultrastor.h Fri May 27 10:49:12 1994 +++ linux/drivers/scsi/ultrastor.h Fri Jun 17 08:11:57 1994 @@ -21,6 +21,8 @@ int ultrastor_biosparam(int, int, int *); #define ULTRASTOR_14F_MAX_SG 16 +#define ULTRASTOR_24F_MAX_SG 33 + #define ULTRASTOR_MAX_CMDS_PER_LUN 5 #define ULTRASTOR_MAX_CMDS 16 diff -u --recursive --new-file v1.1.19/linux/fs/exec.c linux/fs/exec.c --- v1.1.19/linux/fs/exec.c Fri May 27 10:49:12 1994 +++ linux/fs/exec.c Fri Jun 17 09:13:34 1994 @@ -42,6 +42,7 @@ #include #include +#include #include #include @@ -55,12 +56,15 @@ static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs); static int load_aout_library(int fd); +static int aout_core_dump(long signr, struct pt_regs * regs); /* * Here are the actual binaries that will be accepted: * add more with "register_binfmt()".. */ -static struct linux_binfmt aout_format = { NULL, load_aout_binary, load_aout_library }; +static struct linux_binfmt aout_format = { + NULL, NULL, load_aout_binary, load_aout_library, aout_core_dump +}; static struct linux_binfmt *formats = &aout_format; int register_binfmt(struct linux_binfmt * fmt) @@ -154,7 +158,7 @@ * field, which also makes sure the core-dumps won't be recursive if the * dumping of the process results in another error.. */ -int core_dump(long signr, struct pt_regs * regs) +static int aout_core_dump(long signr, struct pt_regs * regs) { struct inode * inode = NULL; struct file file; @@ -318,12 +322,12 @@ mpnt->vm_start = PAGE_MASK & (unsigned long) p; mpnt->vm_end = TASK_SIZE; mpnt->vm_page_prot = PAGE_PRIVATE|PAGE_DIRTY; + mpnt->vm_flags = VM_GROWSDOWN; mpnt->vm_share = NULL; mpnt->vm_inode = NULL; mpnt->vm_offset = 0; mpnt->vm_ops = NULL; insert_vm_struct(current, mpnt); - current->mm->stk_vma = mpnt; } sp = (unsigned long *) (0xfffffffc & (unsigned long) p); sp -= envc+1; @@ -529,7 +533,6 @@ mpnt = current->mm->mmap; current->mm->mmap = NULL; - current->mm->stk_vma = NULL; while (mpnt) { mpnt1 = mpnt->vm_next; if (mpnt->vm_ops && mpnt->vm_ops->close) @@ -571,9 +574,6 @@ if (last_task_used_math == current) last_task_used_math = NULL; current->used_math = 0; - current->personality = 0; - current->lcall7 = no_lcall7; - current->signal_map = current->signal_invmap = ident_map; } /* @@ -764,25 +764,6 @@ return error; } -/* - * signal mapping: this is the default identity mapping used for normal - * linux binaries (it's both the reverse and the normal map, of course) - */ -unsigned long ident_map[33] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, - 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, - 23, 24, 25, 26, 27, 28, 29, 30, 31, 32 -}; - -/* - * default lcall7 handler.. The native linux stuff doesn't - * use it at all, so we just segfault on it. - */ -asmlinkage void no_lcall7(struct pt_regs * regs) -{ - send_sig(SIGSEGV, current, 1); -} - static void set_brk(unsigned long start, unsigned long end) { start = PAGE_ALIGN(start); @@ -805,6 +786,7 @@ struct file * file; int fd, error; unsigned long p = bprm->p; + unsigned long fd_offset; ex = *((struct exec *) bprm->buf); /* exec-header */ if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC && @@ -814,17 +796,19 @@ return -ENOEXEC; } - if (N_MAGIC(ex) == ZMAGIC && - (N_TXTOFF(ex) < bprm->inode->i_sb->s_blocksize)) { - printk("N_TXTOFF < BLOCK_SIZE. Please convert binary."); + current->personality = PER_LINUX; + fd_offset = N_TXTOFF(ex); + if (N_MAGIC(ex) == ZMAGIC && fd_offset != BLOCK_SIZE) { + printk(KERN_NOTICE "N_TXTOFF != BLOCK_SIZE. See a.out.h.\n"); return -ENOEXEC; } - if (N_TXTOFF(ex) != BLOCK_SIZE && N_MAGIC(ex) == ZMAGIC) { - printk("N_TXTOFF != BLOCK_SIZE. See a.out.h."); + if (N_MAGIC(ex) == ZMAGIC && ex.a_text && + (fd_offset < bprm->inode->i_sb->s_blocksize)) { + printk(KERN_NOTICE "N_TXTOFF < BLOCK_SIZE. Please convert binary.\n"); return -ENOEXEC; } - + /* OK, This is the point of no return */ flush_old_exec(bprm); @@ -845,7 +829,7 @@ read_exec(bprm->inode, 32, (char *) 0, ex.a_text+ex.a_data); } else { if (ex.a_text & 0xfff || ex.a_data & 0xfff) - printk("%s: executable not page aligned\n", current->comm); + printk(KERN_NOTICE "executable not page aligned\n"); fd = open_inode(bprm->inode, O_RDONLY); @@ -857,23 +841,26 @@ do_mmap(NULL, 0, ex.a_text+ex.a_data, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_FIXED|MAP_PRIVATE, 0); - read_exec(bprm->inode, N_TXTOFF(ex), + read_exec(bprm->inode, fd_offset, (char *) N_TXTADDR(ex), ex.a_text+ex.a_data); goto beyond_if; } - error = do_mmap(file, N_TXTADDR(ex), ex.a_text, + + if (ex.a_text) { + error = do_mmap(file, N_TXTADDR(ex), ex.a_text, PROT_READ | PROT_EXEC, - MAP_FIXED | MAP_SHARED, N_TXTOFF(ex)); + MAP_FIXED | MAP_SHARED, fd_offset); - if (error != N_TXTADDR(ex)) { - sys_close(fd); - send_sig(SIGSEGV, current, 0); - return 0; - }; + if (error != N_TXTADDR(ex)) { + sys_close(fd); + send_sig(SIGSEGV, current, 0); + return 0; + }; + } error = do_mmap(file, N_TXTADDR(ex) + ex.a_text, ex.a_data, PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_FIXED | MAP_PRIVATE, N_TXTOFF(ex) + ex.a_text); + MAP_FIXED | MAP_PRIVATE, fd_offset + ex.a_text); sys_close(fd); if (error != N_TXTADDR(ex) + ex.a_text) { send_sig(SIGSEGV, current, 0); @@ -883,11 +870,24 @@ bprm->inode->i_count++; } beyond_if: + if (current->exec_domain && current->exec_domain->use_count) + (*current->exec_domain->use_count)--; + if (current->binfmt && current->binfmt->use_count) + (*current->binfmt->use_count)--; + current->exec_domain = lookup_exec_domain(current->personality); + current->binfmt = &aout_format; + if (current->exec_domain && current->exec_domain->use_count) + (*current->exec_domain->use_count)++; + if (current->binfmt && current->binfmt->use_count) + (*current->binfmt->use_count)++; + set_brk(current->mm->start_brk, current->mm->brk); p += change_ldt(ex.a_text,bprm->page); p -= MAX_ARG_PAGES*PAGE_SIZE; - p = (unsigned long) create_tables((char *)p,bprm->argc,bprm->envc,0); + p = (unsigned long)create_tables((char *)p, + bprm->argc, bprm->envc, + current->personality != PER_LINUX); current->mm->start_stack = p; regs->eip = ex.a_entry; /* eip, magic happens :-) */ regs->esp = p; /* stack pointer */ diff -u --recursive --new-file v1.1.19/linux/fs/isofs/inode.c linux/fs/isofs/inode.c --- v1.1.19/linux/fs/isofs/inode.c Wed Apr 27 09:46:08 1994 +++ linux/fs/isofs/inode.c Fri Jun 17 08:11:57 1994 @@ -63,46 +63,59 @@ NULL }; +struct iso9660_options{ + char map; + char rock; + char cruft; + unsigned char conversion; + unsigned int blocksize; + gid_t gid; + uid_t uid; +}; - -static int parse_options(char *options,char *map,char *conversion, char * rock, char * cruft, unsigned int * blocksize) +static int parse_options(char *options, struct iso9660_options * popt) { char *this_char,*value; - *map = 'n'; - *rock = 'y'; - *cruft = 'n'; - *conversion = 'a'; - *blocksize = 1024; + popt->map = 'n'; + popt->rock = 'y'; + popt->cruft = 'n'; + popt->conversion = 'a'; + popt->blocksize = 1024; + popt->gid = 0; + popt->uid = 0; if (!options) return 1; for (this_char = strtok(options,","); this_char; this_char = strtok(NULL,",")) { if (strncmp(this_char,"norock",6) == 0) { - *rock = 'n'; + popt->rock = 'n'; continue; }; if (strncmp(this_char,"cruft",5) == 0) { - *cruft = 'y'; + popt->cruft = 'y'; continue; }; if ((value = strchr(this_char,'=')) != NULL) *value++ = 0; if (!strcmp(this_char,"map") && value) { if (value[0] && !value[1] && strchr("on",*value)) - *map = *value; - else if (!strcmp(value,"off")) *map = 'o'; - else if (!strcmp(value,"normal")) *map = 'n'; + popt->map = *value; + else if (!strcmp(value,"off")) popt->map = 'o'; + else if (!strcmp(value,"normal")) popt->map = 'n'; else return 0; } else if (!strcmp(this_char,"conv") && value) { if (value[0] && !value[1] && strchr("bta",*value)) - *conversion = *value; - else if (!strcmp(value,"binary")) *conversion = 'b'; - else if (!strcmp(value,"text")) *conversion = 't'; - else if (!strcmp(value,"mtext")) *conversion = 'm'; - else if (!strcmp(value,"auto")) *conversion = 'a'; + popt->conversion = *value; + else if (!strcmp(value,"binary")) popt->conversion = 'b'; + else if (!strcmp(value,"text")) popt->conversion = 't'; + else if (!strcmp(value,"mtext")) popt->conversion = 'm'; + else if (!strcmp(value,"auto")) popt->conversion = 'a'; else return 0; } - else if (!strcmp(this_char,"block") && value) { + else if (value && + (!strcmp(this_char,"block") || + !strcmp(this_char,"uid") || + !strcmp(this_char,"gid"))) { char * vpnt = value; unsigned int ivalue; ivalue = 0; @@ -112,8 +125,18 @@ vpnt++; }; if (*vpnt) return 0; - if (ivalue != 1024 && ivalue != 2048) return 0; - *blocksize = ivalue; + switch(*this_char) { + case 'b': + if (ivalue != 1024 && ivalue != 2048) return 0; + popt->blocksize = ivalue; + break; + case 'g': + popt->uid = ivalue; + break; + case 'u': + popt->gid = ivalue; + break; + } } else return 0; } @@ -125,7 +148,7 @@ { struct buffer_head *bh; int iso_blknum; - unsigned int blocksize, blocksize_bits; + unsigned int blocksize_bits; int high_sierra; int dev=s->s_dev; struct iso_volume_descriptor *vdp; @@ -136,29 +159,39 @@ struct iso_directory_record *rootp; - char map, conversion, rock, cruft; + struct iso9660_options opt; - if (!parse_options((char *) data,&map,&conversion, &rock, &cruft, &blocksize)) { + if (!parse_options((char *) data,&opt)) { s->s_dev = 0; return NULL; } +#if 0 + printk("map = %c\n", opt.map); + printk("rock = %c\n", opt.rock); + printk("cruft = %c\n", opt.cruft); + printk("conversion = %c\n", opt.conversion); + printk("blocksize = %d\n", opt.blocksize); + printk("gid = %d\n", opt.gid); + printk("uid = %d\n", opt.uid); +#endif + blocksize_bits = 0; { - int i = blocksize; + int i = opt.blocksize; while (i != 1){ blocksize_bits++; i >>=1; }; }; - set_blocksize(dev, blocksize); + set_blocksize(dev, opt.blocksize); lock_super(s); s->u.isofs_sb.s_high_sierra = high_sierra = 0; /* default is iso9660 */ for (iso_blknum = 16; iso_blknum < 100; iso_blknum++) { - if (!(bh = bread(dev, iso_blknum << (ISOFS_BLOCK_BITS-blocksize_bits), blocksize))) { + if (!(bh = bread(dev, iso_blknum << (ISOFS_BLOCK_BITS-blocksize_bits), opt.blocksize))) { s->s_dev=0; printk("isofs_read_super: bread failed, dev 0x%x iso_blknum %d\n", dev, iso_blknum); @@ -178,7 +211,7 @@ s->u.isofs_sb.s_high_sierra = 1; high_sierra = 1; - rock = 'n'; + opt.rock = 'n'; h_pri = (struct hs_primary_descriptor *)vdp; break; }; @@ -235,7 +268,7 @@ to allow suid. (suid or devices will not show up unless we have Rock Ridge extensions) */ - s->s_flags = MS_RDONLY /* | MS_NODEV | MS_NOSUID */; + s->s_flags |= MS_RDONLY /* | MS_NODEV | MS_NOSUID */; if(s->u.isofs_sb.s_log_zone_size != (1 << ISOFS_BLOCK_BITS)) { printk("1 <s_dev = dev; s->s_op = &isofs_sops; - s->u.isofs_sb.s_mapping = map; - s->u.isofs_sb.s_rock = (rock == 'y' ? 1 : 0); - s->u.isofs_sb.s_conversion = conversion; - s->u.isofs_sb.s_cruft = cruft; - s->s_blocksize = blocksize; + s->u.isofs_sb.s_mapping = opt.map; + s->u.isofs_sb.s_rock = (opt.rock == 'y' ? 1 : 0); + s->u.isofs_sb.s_conversion = opt.conversion; + s->u.isofs_sb.s_cruft = opt.cruft; + s->u.isofs_sb.s_uid = opt.uid; + s->u.isofs_sb.s_gid = opt.gid; + s->s_blocksize = opt.blocksize; s->s_blocksize_bits = blocksize_bits; s->s_mounted = iget(s, isonum_733 (rootp->extent) << ISOFS_BLOCK_BITS); unlock_super(s); @@ -389,8 +424,8 @@ if(i == raw_inode->name_len[0] || raw_inode->name[i] == ';') inode->i_mode |= S_IXUGO; /* execute permission */ } - inode->i_uid = 0; - inode->i_gid = 0; + inode->i_uid = inode->i_sb->u.isofs_sb.s_uid; + inode->i_gid = inode->i_sb->u.isofs_sb.s_gid; inode->i_size = isonum_733 (raw_inode->size); /* There are defective discs out there - we do this to protect @@ -416,15 +451,6 @@ inode->i_size = 0; } -#ifdef DEBUG - /* I have no idea what extended attributes are used for, so - we will flag it for now */ - if(raw_inode->ext_attr_length[0] != 0){ - printk("Extended attributes present for ISO file (%ld).\n", - inode->i_ino); - } -#endif - /* I have no idea what file_unit_size is used for, so we will flag it for now */ if(raw_inode->file_unit_size[0] != 0){ @@ -446,7 +472,9 @@ inode->i_mtime = inode->i_atime = inode->i_ctime = iso_date(raw_inode->date, high_sierra); - inode->u.isofs_i.i_first_extent = isonum_733 (raw_inode->extent) << + inode->u.isofs_i.i_first_extent = + (isonum_733 (raw_inode->extent) + + isonum_711 (raw_inode->ext_attr_length)) << (ISOFS_BLOCK_BITS - ISOFS_BUFFER_BITS(inode)); inode->u.isofs_i.i_backlink = 0xffffffff; /* Will be used for previous directory */ diff -u --recursive --new-file v1.1.19/linux/fs/nfs/mmap.c linux/fs/nfs/mmap.c --- v1.1.19/linux/fs/nfs/mmap.c Sat May 7 14:54:08 1994 +++ linux/fs/nfs/mmap.c Thu Jun 16 13:03:19 1994 @@ -73,6 +73,7 @@ mpnt->vm_start = addr; mpnt->vm_end = addr + len; mpnt->vm_page_prot = prot; + mpnt->vm_flags = 0; mpnt->vm_share = NULL; mpnt->vm_inode = inode; inode->i_count++; diff -u --recursive --new-file v1.1.19/linux/ibcs/binfmt_elf.c linux/ibcs/binfmt_elf.c --- v1.1.19/linux/ibcs/binfmt_elf.c Tue May 24 08:47:07 1994 +++ linux/ibcs/binfmt_elf.c Fri Jun 17 08:11:57 1994 @@ -1,5 +1,12 @@ /* * linux/fs/binfmt_elf.c + * + * These are the functions used to load ELF format executables as used + * on SVr4 machines. Information on the format may be found in the book + * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support + * Tools". + * + * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com). */ #include #include @@ -14,6 +21,7 @@ #include #include #include +#include #include @@ -139,7 +147,7 @@ if((interp_elf_ex->e_type != ET_EXEC && interp_elf_ex->e_type != ET_DYN) || (interp_elf_ex->e_machine != EM_386 && interp_elf_ex->e_machine != EM_486) || - (!interpreter_inode->i_op || !interpreter_inode->i_op->bmap || + (!interpreter_inode->i_op || !interpreter_inode->i_op->default_file_ops->mmap)){ return 0xffffffff; }; @@ -251,7 +259,7 @@ #define INTERPRETER_AOUT 1 #define INTERPRETER_ELF 2 -int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) +static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) { struct elfhdr elf_ex; struct elfhdr interp_elf_ex; @@ -496,8 +504,17 @@ kfree(elf_phdata); - if(!elf_interpreter) sys_close(elf_exec_fileno); - current->elf_executable = 1; + if(interpreter_type != INTERPRETER_AOUT) sys_close(elf_exec_fileno); + + /* The following 3 lines need a little bit of work if we are loading + an iBCS2 binary. We should initially load it this way, and if + we get a lcall7, then we should look to see if the iBCS2 execution + profile is present. If it is, then switch to that, otherwise + bomb. */ + current->personality = PER_LINUX; + current->lcall7 = no_lcall7; + current->signal_map = current->signal_invmap = ident_map; + current->executable = bprm->inode; bprm->inode->i_count++; #ifdef LOW_ELF_STACK @@ -545,7 +562,7 @@ /* This is really simpleminded and specialized - we are loading an a.out library that is given an ELF header. */ -int load_elf_library(int fd){ +static int load_elf_library(int fd){ struct file * file; struct elfhdr elf_ex; struct elf_phdr *elf_phdata = NULL; @@ -576,7 +593,7 @@ /* First of all, some simple consistency checks */ if(elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 || (elf_ex.e_machine != EM_386 && elf_ex.e_machine != EM_486) || - (!inode->i_op || !inode->i_op->bmap || + (!inode->i_op || !inode->i_op->default_file_ops->mmap)){ return -ENOEXEC; }; @@ -634,3 +651,5 @@ kfree(elf_phdata); return 0; } + +struct linux_binfmt elf_format = { NULL, load_elf_binary, load_elf_library }; diff -u --recursive --new-file v1.1.19/linux/include/linux/binfmts.h linux/include/linux/binfmts.h --- v1.1.19/linux/include/linux/binfmts.h Tue May 24 08:47:07 1994 +++ linux/include/linux/binfmts.h Thu Jun 16 10:30:25 1994 @@ -30,8 +30,10 @@ */ struct linux_binfmt { struct linux_binfmt * next; + int *use_count; int (*load_binary)(struct linux_binprm *, struct pt_regs * regs); int (*load_shlib)(int fd); + int (*core_dump)(long signr, struct pt_regs * regs); }; extern int register_binfmt(struct linux_binfmt *); diff -u --recursive --new-file v1.1.19/linux/include/linux/cdu31a.h linux/include/linux/cdu31a.h --- v1.1.19/linux/include/linux/cdu31a.h Wed Dec 1 14:44:15 1993 +++ linux/include/linux/cdu31a.h Fri Jun 10 17:52:16 1994 @@ -135,6 +135,7 @@ #define SONY_HWC_GET_LOAD_MECH(c) (c.hw_config[0] & 0x03) #define SONY_HWC_EJECT(c) (c.hw_config[0] & 0x04) #define SONY_HWC_LED_SUPPORT(c) (c.hw_config[0] & 0x08) +#define SONY_HWC_DOUBLE_SPEED(c) (c.hw_config[0] & 0x10) #define SONY_HWC_GET_BUF_MEM_SIZE(c) ((c.hw_config[0] & 0xc0) >> 6) #define SONY_HWC_AUDIO_PLAYBACK(c) (c.hw_config[1] & 0x01) #define SONY_HWC_ELECTRIC_VOLUME(c) (c.hw_config[1] & 0x02) diff -u --recursive --new-file v1.1.19/linux/include/linux/if.h linux/include/linux/if.h --- v1.1.19/linux/include/linux/if.h Tue May 24 00:34:55 1994 +++ linux/include/linux/if.h Fri Jun 17 07:53:56 1994 @@ -60,7 +60,7 @@ /* * Device mapping structure. I'd just gone off and designed a * beautiful scheme using only loadable modules with arguments - * for driver options and along come the PCMICA people 8) + * for driver options and along come the PCMCIA people 8) * * Ah well. The get() side of this is good for WDSETUP, and it'll * be handy for debugging things. The set side is fine for now and diff -u --recursive --new-file v1.1.19/linux/include/linux/if_plip.h linux/include/linux/if_plip.h --- v1.1.19/linux/include/linux/if_plip.h Thu Jan 1 02:00:00 1970 +++ linux/include/linux/if_plip.h Fri Jun 17 07:53:56 1994 @@ -0,0 +1,28 @@ +/* + * NET3 PLIP tuning facilities for the new Niibe PLIP. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#ifndef _LINUX_IF_PLIP_H +#define _LINUX_IF_PLIP_H + +#include + +#define SIOCDEVPLIP SIOCDEVPRIVATE + +struct plipconf +{ + unsigned short pcmd; + unsigned long nibble; + unsigned long trigger; +}; + +#define PLIP_GET_TIMEOUT 0x1 +#define PLIP_SET_TIMEOUT 0x2 + +#endif diff -u --recursive --new-file v1.1.19/linux/include/linux/iso_fs_sb.h linux/include/linux/iso_fs_sb.h --- v1.1.19/linux/include/linux/iso_fs_sb.h Wed Dec 1 14:44:15 1993 +++ linux/include/linux/iso_fs_sb.h Fri Jun 17 08:11:57 1994 @@ -18,6 +18,10 @@ unsigned char s_cruft; /* Broken disks with high byte of length containing junk */ + unsigned char s_nosuid; + unsigned char s_nodev; + gid_t s_gid; + uid_t s_uid; }; #endif diff -u --recursive --new-file v1.1.19/linux/include/linux/ldt.h linux/include/linux/ldt.h --- v1.1.19/linux/include/linux/ldt.h Wed Dec 15 10:53:48 1993 +++ linux/include/linux/ldt.h Fri Jun 17 08:34:41 1994 @@ -19,6 +19,7 @@ unsigned int contents:2; unsigned int read_exec_only:1; unsigned int limit_in_pages:1; + unsigned int seg_not_present:1; }; #define MODIFY_LDT_CONTENTS_DATA 0 Only in v1.1.19/linux/include/linux: mktime.h diff -u --recursive --new-file v1.1.19/linux/include/linux/mm.h linux/include/linux/mm.h --- v1.1.19/linux/include/linux/mm.h Tue Apr 19 10:53:28 1994 +++ linux/include/linux/mm.h Fri Jun 17 11:42:15 1994 @@ -9,19 +9,8 @@ #define VERIFY_READ 0 #define VERIFY_WRITE 1 -int __verify_write(unsigned long addr, unsigned long count); +extern int verify_area(int, const void *, unsigned long); -extern inline int verify_area(int type, const void * addr, unsigned long size) -{ - if (TASK_SIZE <= (unsigned long) addr) - return -EFAULT; - if (size > TASK_SIZE - (unsigned long) addr) - return -EFAULT; - if (wp_works_ok || type == VERIFY_READ || !size) - return 0; - return __verify_write((unsigned long) addr,size); -} - /* * Linux kernel virtual memory manager primitives. * The idea being to have a "virtual" mm in the same way @@ -42,12 +31,19 @@ unsigned long vm_start; unsigned long vm_end; unsigned short vm_page_prot; + unsigned short vm_flags; struct vm_area_struct * vm_next; /* linked list */ struct vm_area_struct * vm_share; /* linked list */ struct inode * vm_inode; unsigned long vm_offset; struct vm_operations_struct * vm_ops; }; + +/* + * vm_flags.. + */ +#define VM_GROWSDOWN 0x01 +#define VM_GROWSUP 0x02 /* * These are the virtual MM functions - opening of an area, closing it (needed to diff -u --recursive --new-file v1.1.19/linux/include/linux/nfs_fs.h linux/include/linux/nfs_fs.h --- v1.1.19/linux/include/linux/nfs_fs.h Wed Dec 1 14:44:15 1993 +++ linux/include/linux/nfs_fs.h Fri Jun 17 12:35:30 1994 @@ -22,14 +22,6 @@ #define NFS_READDIR_CACHE_SIZE 64 -/* - * WARNING! The I/O buffer size cannot be bigger than about 3900 for now. - * It needs to fit inside a 4096-byte page and leave room for the RPC and - * NFS headers. But it ought to at least be a multiple of 512 and probably - * should be a power of 2. I don't think Linux TCP/IP can handle more than - * about 1800 yet. - */ - #define NFS_MAX_FILE_IO_BUFFER_SIZE (7*512) #define NFS_DEF_FILE_IO_BUFFER_SIZE 1024 diff -u --recursive --new-file v1.1.19/linux/include/linux/personality.h linux/include/linux/personality.h --- v1.1.19/linux/include/linux/personality.h Tue May 24 08:47:07 1994 +++ linux/include/linux/personality.h Thu Jun 16 10:30:25 1994 @@ -1,8 +1,17 @@ +#ifndef _PERSONALITY_H +#define _PERSONALITY_H + +#include +#include + + /* Flags for bug emulation. These occupy the top three bytes. */ -#define STICKY_TIMEOUTS 0x8000000 -#define WHOLE_SECONDS 0x4000000 +#define STICKY_TIMEOUTS 0x4000000 +#define WHOLE_SECONDS 0x2000000 -/* Personality types. These go in the low byte. */ +/* Personality types. These go in the low byte. Avoid using the top bit, + * it will conflict with error returns. + */ #define PER_MASK (0x00ff) #define PER_LINUX (0x0000) #define PER_SVR4 (0x0001 | STICKY_TIMEOUTS) @@ -10,3 +19,33 @@ #define PER_SCOSVR3 (0x0003 | STICKY_TIMEOUTS | WHOLE_SECONDS) #define PER_WYSEV386 (0x0004 | STICKY_TIMEOUTS) #define PER_ISCR4 (0x0005 | STICKY_TIMEOUTS) +#define PER_BSD (0x0006) + + +/* Prototype for an lcall7 syscall handler. */ +typedef asmlinkage void (*lcall7_func)(struct pt_regs *); + + +/* Description of an execution domain - personality range supported, + * lcall7 syscall handler, start up / shut down functions etc. + * N.B. The name and lcall7 handler must be where they are since the + * offset of the handler is hard coded in kernel/sys_call.S. + */ +struct exec_domain { + char *name; + lcall7_func handler; + unsigned char pers_low, pers_high; + unsigned long * signal_map; + unsigned long * signal_invmap; + int *use_count; + struct exec_domain *next; +}; + +extern struct exec_domain default_exec_domain; + +extern struct exec_domain *lookup_exec_domain(unsigned long personality); +extern int register_exec_domain(struct exec_domain *it); +extern int unregister_exec_domain(struct exec_domain *it); +extern asmlinkage int sys_personality(unsigned long personality); + +#endif /* _PERSONALITY_H */ diff -u --recursive --new-file v1.1.19/linux/include/linux/sched.h linux/include/linux/sched.h --- v1.1.19/linux/include/linux/sched.h Fri May 27 10:49:15 1994 +++ linux/include/linux/sched.h Thu Jun 16 12:59:24 1994 @@ -36,6 +36,8 @@ extern int EISA_bus; #define MCA_bus 0 +#include +#include #include #include @@ -228,7 +230,6 @@ short swap_page; /* current page */ #endif NEW_SWAP struct vm_area_struct * mmap; - struct vm_area_struct * stk_vma; }; #define INIT_MM { \ @@ -240,7 +241,7 @@ /* ?_flt */ 0, 0, 0, 0, \ 0, \ /* swap */ 0, 0, 0, 0, 0, \ - NULL, NULL } + NULL } struct task_struct { /* these are hardcoded - don't touch */ @@ -252,12 +253,11 @@ unsigned long flags; /* per process flags, defined below */ int errno; int debugreg[8]; /* Hardware debugging registers */ - asmlinkage void (*lcall7)(struct pt_regs *); + struct exec_domain *exec_domain; /* various fields */ + struct linux_binfmt *binfmt; struct task_struct *next_task, *prev_task; struct sigaction sigaction[32]; - unsigned long * signal_map; - unsigned long * signal_invmap; unsigned long saved_kernel_stack; unsigned long kernel_stack_page; int exit_code, exit_signal; @@ -326,9 +326,10 @@ #define INIT_TASK \ /* state etc */ { 0,15,15,0,0,0,0, \ /* debugregs */ { 0, }, \ -/* lcall 7 */ no_lcall7, \ +/* exec domain */&default_exec_domain, \ +/* binfmt */ NULL, \ /* schedlink */ &init_task,&init_task, \ -/* signals */ {{ 0, },}, ident_map, ident_map, \ +/* signals */ {{ 0, },}, \ /* stack */ 0,(unsigned long) &init_kernel_stack, \ /* ec,brk... */ 0,0,0,0,0, \ /* pid etc.. */ 0,0,0,0, \ @@ -360,9 +361,6 @@ extern unsigned long itimer_next; extern struct timeval xtime; extern int need_resched; - -extern unsigned long ident_map[33]; -extern asmlinkage void no_lcall7(struct pt_regs *); #define CURRENT_TIME (xtime.tv_sec) diff -u --recursive --new-file v1.1.19/linux/include/linux/skbuff.h linux/include/linux/skbuff.h --- v1.1.19/linux/include/linux/skbuff.h Tue May 31 12:48:19 1994 +++ linux/include/linux/skbuff.h Fri Jun 17 07:53:56 1994 @@ -108,11 +108,11 @@ extern void skb_unlink(struct sk_buff *buf); extern struct sk_buff * skb_peek_copy(struct sk_buff_head *list); extern struct sk_buff * alloc_skb(unsigned int size, int priority); -extern void kfree_skbmem(void *mem, unsigned size); +extern void kfree_skbmem(struct sk_buff *skb, unsigned size); extern struct sk_buff * skb_clone(struct sk_buff *skb, int priority); -extern void skb_kept_by_device(struct sk_buff *skb); -extern void skb_device_release(struct sk_buff *skb, - int mode); +extern void skb_device_lock(struct sk_buff *skb); +extern void skb_device_unlock(struct sk_buff *skb); +extern void dev_kfree_skb(struct sk_buff *skb, int mode); extern int skb_device_locked(struct sk_buff *skb); /* * Peek an sk_buff. Unlike most other operations you _MUST_ diff -u --recursive --new-file v1.1.19/linux/include/linux/socket.h linux/include/linux/socket.h --- v1.1.19/linux/include/linux/socket.h Tue Apr 19 22:20:34 1994 +++ linux/include/linux/socket.h Fri Jun 17 07:53:56 1994 @@ -33,12 +33,15 @@ #define AF_AX25 3 #define AF_IPX 4 +#define AF_MAX 8 /* For now.. */ + /* Protocol families, same as address families. */ #define PF_UNIX AF_UNIX #define PF_INET AF_INET #define PF_AX25 AF_AX25 #define PF_IPX AF_IPX +#define PF_MAX AF_MAX /* Flags we can use with send/ and recv. */ #define MSG_OOB 1 #define MSG_PEEK 2 diff -u --recursive --new-file v1.1.19/linux/include/linux/sockios.h linux/include/linux/sockios.h --- v1.1.19/linux/include/linux/sockios.h Tue May 24 00:34:57 1994 +++ linux/include/linux/sockios.h Fri Jun 17 07:53:56 1994 @@ -26,7 +26,7 @@ #define FIOGETOWN 0x8903 #define SIOCGPGRP 0x8904 #define SIOCATMARK 0x8905 -#define SIOCGSTAMP 0x8096 /* Get stamp */ +#define SIOCGSTAMP 0x8906 /* Get stamp */ /* Routing table calls. */ #define SIOCADDRT 0x890B /* add routing table entry */ diff -u --recursive --new-file v1.1.19/linux/include/linux/un.h linux/include/linux/un.h --- v1.1.19/linux/include/linux/un.h Wed Dec 1 14:44:15 1993 +++ linux/include/linux/un.h Fri Jun 17 07:53:56 1994 @@ -1,9 +1,11 @@ #ifndef _LINUX_UN_H #define _LINUX_UN_H +#define UNIX_PATH_MAX 108 + struct sockaddr_un { unsigned short sun_family; /* AF_UNIX */ - char sun_path[108]; /* pathname */ + char sun_path[UNIX_PATH_MAX]; /* pathname */ }; #endif /* _LINUX_UN_H */ diff -u --recursive --new-file v1.1.19/linux/include/linux/unistd.h linux/include/linux/unistd.h --- v1.1.19/linux/include/linux/unistd.h Sat May 7 14:54:12 1994 +++ linux/include/linux/unistd.h Thu Jun 16 10:30:25 1994 @@ -142,6 +142,7 @@ #define __NR_fchdir 133 #define __NR_bdflush 134 #define __NR_sysfs 135 +#define __NR_personality 136 extern int errno; diff -u --recursive --new-file v1.1.19/linux/kernel/Makefile linux/kernel/Makefile --- v1.1.19/linux/kernel/Makefile Thu Jun 9 18:56:12 1994 +++ linux/kernel/Makefile Thu Jun 16 10:30:26 1994 @@ -16,7 +16,7 @@ .c.o: $(CC) $(CFLAGS) -c $< -OBJS = sched.o sys_call.o traps.o irq.o dma.o fork.o \ +OBJS = sched.o sys_call.o traps.o irq.o dma.o fork.o exec_domain.o \ panic.o printk.o vsprintf.o sys.o module.o ksyms.o exit.o \ signal.o ptrace.o ioport.o itimer.o \ info.o ldt.o time.o tqueue.o vm86.o diff -u --recursive --new-file v1.1.19/linux/kernel/exec_domain.c linux/kernel/exec_domain.c --- v1.1.19/linux/kernel/exec_domain.c Thu Jan 1 02:00:00 1970 +++ linux/kernel/exec_domain.c Thu Jun 16 11:07:24 1994 @@ -0,0 +1,102 @@ +#include +#include +#include + + +static asmlinkage void no_lcall7(struct pt_regs * regs); + + +static unsigned long ident_map[32] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 +}; + +struct exec_domain default_exec_domain = { + "Linux", /* name */ + no_lcall7, /* lcall7 causes a seg fault. */ + 0, 0xff, /* All personalities. */ + ident_map, /* Identiy map signals. */ + ident_map, /* - both ways. */ + NULL, /* No usage counter. */ + NULL /* Nothing after this in the list. */ +}; + +static struct exec_domain *exec_domains = &default_exec_domain; + + +static asmlinkage void no_lcall7(struct pt_regs * regs) +{ + send_sig(SIGSEGV, current, 1); +} + +struct exec_domain *lookup_exec_domain(unsigned long personality) +{ + unsigned long pers = personality & PER_MASK; + struct exec_domain *it; + + for (it=exec_domains; it; it=it->next) + if (pers >= it->pers_low + && pers <= it->pers_high) + return it; + + /* Should never get this far. */ + printk(KERN_ERR "No execution domain for personality 0x%02lx\n", pers); + return NULL; +} + +int register_exec_domain(struct exec_domain *it) +{ + struct exec_domain *tmp; + + if (!it) + return -EINVAL; + if (it->next) + return -EBUSY; + for (tmp=exec_domains; tmp; tmp=tmp->next) + if (tmp == it) + return -EBUSY; + it->next = exec_domains; + exec_domains = it; + return 0; +} + +int unregister_exec_domain(struct exec_domain *it) +{ + struct exec_domain ** tmp; + + tmp = &exec_domains; + while (*tmp) { + if (it == *tmp) { + *tmp = it->next; + it->next = NULL; + return 0; + } + tmp = &(*tmp)->next; + } + return -EINVAL; +} + +asmlinkage int sys_personality(unsigned long personality) +{ + struct exec_domain *it; + unsigned long old_personality; + + if (personality == 0xffffffff) + return current->personality; + + it = lookup_exec_domain(personality); + if (!it) + return -EINVAL; + + old_personality = current->personality; + if (current->exec_domain && current->exec_domain->use_count) + (*current->exec_domain->use_count)--; + current->personality = personality; + current->exec_domain = it; + if (current->exec_domain->use_count) + (*current->exec_domain->use_count)++; + + return old_personality; +} diff -u --recursive --new-file v1.1.19/linux/kernel/exit.c linux/kernel/exit.c --- v1.1.19/linux/kernel/exit.c Fri May 27 10:49:15 1994 +++ linux/kernel/exit.c Thu Jun 16 10:30:26 1994 @@ -483,6 +483,10 @@ #ifdef DEBUG_PROC_TREE audit_ptree(); #endif + if (current->exec_domain && current->exec_domain->use_count) + (*current->exec_domain->use_count)--; + if (current->binfmt && current->binfmt->use_count) + (*current->binfmt->use_count)--; schedule(); /* * In order to get rid of the "volatile function does return" message diff -u --recursive --new-file v1.1.19/linux/kernel/fork.c linux/kernel/fork.c --- v1.1.19/linux/kernel/fork.c Sat May 7 14:54:15 1994 +++ linux/kernel/fork.c Thu Jun 16 12:57:46 1994 @@ -94,7 +94,6 @@ struct vm_area_struct * mpnt, **p, *tmp; tsk->mm->mmap = NULL; - tsk->mm->stk_vma = NULL; p = &tsk->mm->mmap; for (mpnt = current->mm->mmap ; mpnt ; mpnt = mpnt->vm_next) { tmp = (struct vm_area_struct *) kmalloc(sizeof(struct vm_area_struct), GFP_KERNEL); @@ -107,8 +106,6 @@ tmp->vm_inode->i_count++; *p = tmp; p = &tmp->vm_next; - if (current->mm->stk_vma == mpnt) - tsk->mm->stk_vma = tmp; } return 0; } @@ -184,6 +181,12 @@ goto bad_fork_free; task[nr] = p; *p = *current; + + if (p->exec_domain && p->exec_domain->use_count) + (*p->exec_domain->use_count)++; + if (p->binfmt && p->binfmt->use_count) + (*p->binfmt->use_count)++; + p->did_exec = 0; p->kernel_stack_page = 0; p->state = TASK_UNINTERRUPTIBLE; diff -u --recursive --new-file v1.1.19/linux/kernel/irq.c linux/kernel/irq.c --- v1.1.19/linux/kernel/irq.c Mon Mar 28 11:21:52 1994 +++ linux/kernel/irq.c Fri Jun 17 13:56:20 1994 @@ -306,7 +306,7 @@ static void math_error_irq(int cpl) { outb(0,0xF0); - if (ignore_irq13) + if (ignore_irq13 || !hard_math) return; math_error(); } diff -u --recursive --new-file v1.1.19/linux/kernel/ksyms.c linux/kernel/ksyms.c --- v1.1.19/linux/kernel/ksyms.c Tue May 24 08:47:08 1994 +++ linux/kernel/ksyms.c Fri Jun 17 11:43:28 1994 @@ -16,6 +16,7 @@ #include #include #include +#include #ifdef CONFIG_INET #include #endif @@ -61,7 +62,7 @@ X(wp_works_ok), /* process memory management */ - X(__verify_write), + X(verify_area), X(do_mmap), X(do_munmap), X(insert_vm_struct), @@ -96,6 +97,11 @@ /* executable format registration */ X(register_binfmt), X(unregister_binfmt), + + /* execution environment registration */ + X(lookup_exec_domain), + X(register_exec_domain), + X(unregister_exec_domain), /* interrupt handling */ X(request_irq), diff -u --recursive --new-file v1.1.19/linux/kernel/ldt.c linux/kernel/ldt.c --- v1.1.19/linux/kernel/ldt.c Wed Dec 15 11:01:09 1993 +++ linux/kernel/ldt.c Fri Jun 17 08:36:13 1994 @@ -58,8 +58,10 @@ limit *= PAGE_SIZE; limit += base; +#ifdef NOTDEF_KLUDGE if (limit < base || limit >= 0xC0000000) return -EINVAL; +#endif if (!current->ldt) { for (i=1 ; isignal_invmap[signr], frame+1); + if (current->exec_domain && current->exec_domain->signal_invmap) + put_fs_long(current->exec_domain->signal_invmap[signr], frame+1); + else + put_fs_long(signr, frame+1); put_fs_long(regs->gs, frame+2); put_fs_long(regs->fs, frame+3); put_fs_long(regs->es, frame+4); @@ -348,8 +349,10 @@ case SIGQUIT: case SIGILL: case SIGTRAP: case SIGIOT: case SIGFPE: case SIGSEGV: - if (core_dump(signr,regs)) - signr |= 0x80; + if (current->binfmt && current->binfmt->core_dump) { + if (current->binfmt->core_dump(signr, regs)) + signr |= 0x80; + } /* fall through */ default: current->signal |= _S(signr & 0x7f); diff -u --recursive --new-file v1.1.19/linux/kernel/sys.c linux/kernel/sys.c --- v1.1.19/linux/kernel/sys.c Tue May 24 00:34:58 1994 +++ linux/kernel/sys.c Fri Jun 17 13:33:40 1994 @@ -39,6 +39,9 @@ { int i; + if (current->pid != 0) + return -EPERM; + /* Map out the low memory: it's no longer needed */ for (i = 0 ; i < 768 ; i++) swapper_pg_dir[i] = 0; diff -u --recursive --new-file v1.1.19/linux/kernel/sys_call.S linux/kernel/sys_call.S --- v1.1.19/linux/kernel/sys_call.S Fri May 27 10:49:15 1994 +++ linux/kernel/sys_call.S Thu Jun 16 10:30:26 1994 @@ -78,7 +78,7 @@ errno = 24 dbgreg6 = 52 dbgreg7 = 56 -lcall7 = 60 +exec_domain = 60 ENOSYS = 38 @@ -144,7 +144,8 @@ movl %esp,%eax movl _current,%edx pushl %eax - movl lcall7(%edx),%edx + movl exec_domain(%edx),%edx # Get the execution domain + movl 4(%edx),%edx # Get the lcall7 handler for the domain call *%edx popl %eax jmp ret_from_sys_call @@ -535,5 +536,6 @@ .long _sys_fchdir .long _sys_bdflush .long _sys_sysfs /* 135 */ + .long _sys_personality - .space (NR_syscalls-135)*4 + .space (NR_syscalls-136)*4 diff -u --recursive --new-file v1.1.19/linux/kernel/traps.c linux/kernel/traps.c --- v1.1.19/linux/kernel/traps.c Sat May 7 14:54:16 1994 +++ linux/kernel/traps.c Fri Jun 17 09:09:47 1994 @@ -40,7 +40,7 @@ } #define get_seg_byte(seg,addr) ({ \ -register char __res; \ +register unsigned char __res; \ __asm__("push %%fs;mov %%ax,%%fs;movb %%fs:%2,%%al;pop %%fs" \ :"=a" (__res):"0" (seg),"m" (*(addr))); \ __res;}) @@ -123,21 +123,30 @@ DO_ERROR( 8, SIGSEGV, "double fault", double_fault, current) DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun, last_task_used_math) DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS, current) -DO_ERROR(11, SIGSEGV, "segment not present", segment_not_present, current) -DO_ERROR(12, SIGSEGV, "stack segment", stack_segment, current) +DO_ERROR(11, SIGBUS, "segment not present", segment_not_present, current) +DO_ERROR(12, SIGBUS, "stack segment", stack_segment, current) DO_ERROR(15, SIGSEGV, "reserved", reserved, current) DO_ERROR(17, SIGSEGV, "alignment check", alignment_check, current) asmlinkage void do_general_protection(struct pt_regs * regs, long error_code) { + int signr = SIGSEGV; + if (regs->eflags & VM_MASK) { handle_vm86_fault((struct vm86_regs *) regs, error_code); return; } + die_if_kernel("general protection",regs,error_code); + switch (get_seg_byte(regs->cs, (char *)regs->eip)) { + case 0xCD: /* INT */ + case 0xF4: /* HLT */ + case 0xFA: /* CLI */ + case 0xFB: /* STI */ + signr = SIGILL; + } current->tss.error_code = error_code; current->tss.trap_no = 13; - send_sig(SIGSEGV, current, 1); - die_if_kernel("general protection",regs,error_code); + send_sig(signr, current, 1); } asmlinkage void do_nmi(struct pt_regs * regs, long error_code) diff -u --recursive --new-file v1.1.19/linux/mm/memory.c linux/mm/memory.c --- v1.1.19/linux/mm/memory.c Thu Jun 9 18:56:13 1994 +++ linux/mm/memory.c Fri Jun 17 13:22:06 1994 @@ -59,6 +59,7 @@ extern void sound_mem_init(void); extern void die_if_kernel(char *,struct pt_regs *,long); +extern void show_net_buffers(void); /* * The free_area_list arrays point to the queue heads of the free areas @@ -78,11 +79,11 @@ /* * oom() prints a message (so that the user knows why the process died), - * and gives the process an untrappable SIGSEGV. + * and gives the process an untrappable SIGKILL. */ void oom(struct task_struct * task) { - printk("\nout of memory\n"); + printk("\nOut of memory.\n"); task->sigaction[SIGKILL-1].sa_handler = NULL; task->blocked &= ~(1<<(SIGKILL-1)); send_sig(SIGKILL,task,1); @@ -660,7 +661,7 @@ *pg_table = 0; } -int __verify_write(unsigned long start, unsigned long size) +static int __verify_write(unsigned long start, unsigned long size) { size--; size += start & ~PAGE_MASK; @@ -673,6 +674,38 @@ return 0; } +int verify_area(int type, const void * addr, unsigned long size) +{ + struct vm_area_struct * vma; + + for (vma = current->mm->mmap ; ; vma = vma->vm_next) { + if (!vma) + goto bad_area; + if (vma->vm_end > (unsigned long) addr) + break; + } + if (vma->vm_start <= (unsigned long) addr) + goto good_area; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; + if (vma->vm_end - (unsigned long) addr > current->rlim[RLIMIT_STACK].rlim_cur) + goto bad_area; +good_area: + while (vma->vm_end - (unsigned long) addr < size) { + struct vm_area_struct * next = vma->vm_next; + if (!next) + goto bad_area; + if (vma->vm_end != next->vm_start) + goto bad_area; + vma = next; + } + if (wp_works_ok || type == VERIFY_READ || !size) + return 0; + return __verify_write((unsigned long) addr,size); +bad_area: + return -EFAULT; +} + static inline void get_empty_page(struct task_struct * tsk, unsigned long address) { unsigned long tmp; @@ -843,8 +876,8 @@ tmp = *(unsigned long *) page; if (tmp & PAGE_PRESENT) return; - ++tsk->mm->rss; if (tmp) { + ++tsk->mm->rss; ++tsk->mm->maj_flt; swap_in((unsigned long *) page); return; @@ -859,10 +892,12 @@ continue; } if (!mpnt->vm_ops || !mpnt->vm_ops->nopage) { + ++tsk->mm->rss; ++tsk->mm->min_flt; get_empty_page(tsk,address); return; } + ++tsk->mm->rss; mpnt->vm_ops->nopage(error_code, mpnt, address); return; } @@ -870,7 +905,7 @@ goto ok_no_page; if (address >= tsk->mm->end_data && address < tsk->mm->brk) goto ok_no_page; - if (mpnt && mpnt == tsk->mm->stk_vma && + if (mpnt && (mpnt->vm_flags & VM_GROWSDOWN) && address - tmp > mpnt->vm_start - address && tsk->rlim[RLIMIT_STACK].rlim_cur > mpnt->vm_end - address) { mpnt->vm_start = address; @@ -883,6 +918,7 @@ if (error_code & 4) /* user level access? */ return; ok_no_page: + ++tsk->mm->rss; ++tsk->mm->min_flt; get_empty_page(tsk,address); } @@ -1021,6 +1057,7 @@ printk("%d reserved pages\n",reserved); printk("%d pages shared\n",shared); show_buffers(); + show_net_buffers(); } extern unsigned long free_area_init(unsigned long, unsigned long); diff -u --recursive --new-file v1.1.19/linux/mm/mmap.c linux/mm/mmap.c --- v1.1.19/linux/mm/mmap.c Sat May 7 14:54:17 1994 +++ linux/mm/mmap.c Thu Jun 16 13:02:19 1994 @@ -340,6 +340,7 @@ mpnt->vm_start = addr; mpnt->vm_end = addr + len; mpnt->vm_page_prot = prot; + mpnt->vm_flags = 0; mpnt->vm_share = NULL; mpnt->vm_inode = inode; inode->i_count++; @@ -459,6 +460,7 @@ mpnt->vm_start = addr; mpnt->vm_end = addr + len; mpnt->vm_page_prot = mask; + mpnt->vm_flags = 0; mpnt->vm_share = NULL; mpnt->vm_inode = NULL; mpnt->vm_offset = 0; diff -u --recursive --new-file v1.1.19/linux/net/inet/README linux/net/inet/README --- v1.1.19/linux/net/inet/README Thu Jun 2 13:50:55 1994 +++ linux/net/inet/README Fri Jun 17 07:53:57 1994 @@ -1,5 +1,88 @@ -This is snapshot 014 +This is snapshot 015 +Changes for the 015 snapshot + +o All read/write buffers are validated at the top level _only_ +o All address structures are moved to and from user mode at the top + level. Thus you can now issue proto->bind(....) calls and related + functions such as connect from another kernel task. All thats left + to fix now is a kernel alloc_socket()/free_socket() and accompanying + proto->make_kernel(socket) +o Small fixes to address behaviour caused by the above +o Max NFS size of 16K bytes +o Added the apricot driver as a test (#'ed out in config.in) +o Fixed a missing function definition in net_init.c +o Added G4KLX ax25_router code +o Added Niibe's PLIP driver and altered it to support timer + configuration and IRQ/port setting. Added if_plip.h. Comments and + feedback appreciated on this (both to Niibe and me). +o Added AF_MAX/PF_MAX defines +o Added a note that the DE600 driver also works for a noname 'PE1200'. +o Network buffer statistics on shift-scroll_lock +o Fixed a serious race in the device driver code. This was causing odd + crashes with the Lance drivers, lockups with the ne2000 cards and + a few other 'bad' goings on. All drivers are effected. See + README.DEV if porting a driver to this revision. + + If you see entries in your 'free while locked' count, those would + typically have crashed a pre 1.20 kernel. + +o TCP keeps the timers above 0.2sec round-trip time because of the use of + delayed ACK's by BSD style kernels. +o Fixed a small BSD error in the return from datagram socket + recv/recvfrom() calls when data is truncated. BSD returns the true + length of the message, Linux returned the amount copied which broke + programs that did a MSG_PEEK with a small buffer and grew it if need + be (some of the AV/RTP stuff notably). +o Added TIOCINQ/OUTQ to AX.25 and IPX. +o Added driver ioctl() calls to IPX. +o Corrected the skb->len==0 in the tcp_data reset on shutdown to check + skb->copied_seq. +o IP options reflect onto SO_PRIORITY. +o When a driver is downed its ARP entries are flushed. Should solve + the occasional crash when taking out a modular driver. +o Added Donald's multicast reception while promiscuous fix for the + 8390 drivers. +o Potential ARP TCP-retransmit clear race fixed. Incredibly + unlikely to occur but no doubt it will 8(. + +To Do + +o Fast path the tcp for packets in order with no flags set - we ought + to hit cable speed on slower machines if we fix that and the below. + (under test - define TCP_FASTPATH in tcp.c if you want to be brave + and report any findings.) +o Include the HP onboard lance fixes. +o Fix Unix domain sockets. +o Fix the _SLOW_ TCP window calculation junk in tcp_data/tcp_ack. +o Make the dev_add_proto() list hashed by protocol type. +o Remove the call to dev->header_type - load it into the skbuff. + instead to avoid the extra calls and cache misses. +o Include new sk_buff skb_push() code and move toward using it. +o Fix the PI driver so pi0a can be down when pi0b is up without getting + crashes. Also fix the stuff to allow piconfig to set the parameters. +o Make AX.25 set the packet type - certainly before it hits IP. +o sk_buff building at the top level - pure kernel interfaces to the + protocol layers +o Clean up NFS, merge NFS/TCP code. +o SIGIO +o IP forwarding use of options properly (needs new sk_buff code) +o Reroute TCP retransmits if needed (needs new sk_buffs) + + +Fixes added for 1.1.19 + +o Unix domain bind error code. +o skb->localroute starts cleared. +o Compiles with networking disabled + +Fixes added for 1.1.18 + +o Dummy driver includes correctly. +o PPP fixes from A.L. +o ifslave fixes +o Small error causing nfsd to coredump fixed by Linus. + Fixes added for 1.1.17 o Charles Hedrick's fixes broken fragmentation totally. Mended. @@ -10,10 +93,12 @@ o PLIP fix by Tanabe. Fixes added for 1.1.16 + o Charles Hedricks fixes to TCP. o Small fixes all over the place. Fixes added for 1.1.15 + o Modular PLIP and 3c501 drivers. Now you -can- have multiple 3c501's (sort of). o Integrated new AX.25 - this will be ready for the big time in a few diff -u --recursive --new-file v1.1.19/linux/net/inet/af_inet.c linux/net/inet/af_inet.c --- v1.1.19/linux/net/inet/af_inet.c Thu Jun 9 18:56:13 1994 +++ linux/net/inet/af_inet.c Fri Jun 17 07:53:57 1994 @@ -379,7 +379,9 @@ return sk->prot->setsockopt(sk,level,optname,optval,optlen); } - +/* + * Get a socket option on an AF_INET socket. + */ static int inet_getsockopt(struct socket *sock, int level, int optname, char *optval, int *optlen) @@ -393,6 +395,9 @@ return sk->prot->getsockopt(sk,level,optname,optval,optlen); } +/* + * Automatically bind an unbound socket. + */ static int inet_autobind(struct sock *sk) { @@ -408,6 +413,10 @@ return 0; } +/* + * Move a socket into listening state. + */ + static int inet_listen(struct socket *sock, int backlog) { struct sock *sk = (struct sock *) sock->data; @@ -728,10 +737,9 @@ static int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { - struct sockaddr_in addr; + struct sockaddr_in *addr=(struct sockaddr_in *)uaddr; struct sock *sk=(struct sock *)sock->data, *sk2; unsigned short snum; - int err; int chk_addr_ret; /* check this error. */ @@ -740,12 +748,10 @@ if (sk->num != 0) return(-EINVAL); - err=verify_area(VERIFY_READ, uaddr, addr_len); - if(err) - return err; - memcpy_fromfs(&addr, uaddr, min(sizeof(addr), addr_len)); + if(addr_lensin_port); /* * We can't just leave the socket bound wherever it is, it might @@ -759,12 +765,12 @@ if (snum < PROT_SOCK && !suser()) return(-EACCES); - chk_addr_ret = ip_chk_addr(addr.sin_addr.s_addr); - if (addr.sin_addr.s_addr != 0 && chk_addr_ret != IS_MYADDR) + chk_addr_ret = ip_chk_addr(addr->sin_addr.s_addr); + if (addr->sin_addr.s_addr != 0 && chk_addr_ret != IS_MYADDR) return(-EADDRNOTAVAIL); /* Source address MUST be ours! */ - if (chk_addr_ret || addr.sin_addr.s_addr == 0) - sk->saddr = addr.sin_addr.s_addr; + if (chk_addr_ret || addr->sin_addr.s_addr == 0) + sk->saddr = addr->sin_addr.s_addr; /* Make sure we are allowed to bind here. */ cli(); @@ -841,7 +847,7 @@ } if (sock->state == SS_CONNECTING && sk->protocol == IPPROTO_TCP && (flags & O_NONBLOCK)) - return -EALREADY; /* Connecting is currently in progress */ + return -EINPROGRESS; /* Connecting is currently in progress */ if (sock->state != SS_CONNECTING) { @@ -985,46 +991,27 @@ static int inet_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer) { - struct sockaddr_in sin; + struct sockaddr_in *sin=(struct sockaddr_in *)uaddr; struct sock *sk; - int len; - int err; - - - err = verify_area(VERIFY_WRITE,uaddr_len,sizeof(long)); - if(err) - return err; - - len=get_fs_long(uaddr_len); - err = verify_area(VERIFY_WRITE, uaddr, len); - if(err) - return err; - - /* Check this error. */ - if (len < sizeof(sin)) - return(-EINVAL); - - sin.sin_family = AF_INET; + sin->sin_family = AF_INET; sk = (struct sock *) sock->data; if (peer) { if (!tcp_connected(sk->state)) return(-ENOTCONN); - sin.sin_port = sk->dummy_th.dest; - sin.sin_addr.s_addr = sk->daddr; + sin->sin_port = sk->dummy_th.dest; + sin->sin_addr.s_addr = sk->daddr; } else { - sin.sin_port = sk->dummy_th.source; + sin->sin_port = sk->dummy_th.source; if (sk->saddr == 0) - sin.sin_addr.s_addr = ip_my_addr(); + sin->sin_addr.s_addr = ip_my_addr(); else - sin.sin_addr.s_addr = sk->saddr; + sin->sin_addr.s_addr = sk->saddr; } - len = sizeof(sin); - memcpy_tofs(uaddr, &sin, sizeof(sin)); - put_fs_long(len, uaddr_len); + *uaddr_len = sizeof(*sin); return(0); } @@ -1033,40 +1020,46 @@ * The assorted BSD I/O operations */ - -static int inet_recv(struct socket *sock, void *ubuf, int size, int noblock, - unsigned flags) +static int inet_recvfrom(struct socket *sock, void *ubuf, int size, int noblock, + unsigned flags, struct sockaddr *sin, int *addr_len ) { struct sock *sk = (struct sock *) sock->data; - int err; + if (sk->prot->recvfrom == NULL) + return(-EOPNOTSUPP); if(sk->err) return inet_error(sk); - if(size<0) - return -EINVAL; - if(size==0) - return 0; - err=verify_area(VERIFY_WRITE,ubuf,size); - if(err) - return err; - /* We may need to bind the socket. */ - if(inet_autobind(sk)) - return(-EAGAIN); - return(sk->prot->read(sk, (unsigned char *) ubuf, size, noblock, flags)); + if(inet_autobind(sk)!=0) + return(-EAGAIN); + return(sk->prot->recvfrom(sk, (unsigned char *) ubuf, size, noblock, flags, + (struct sockaddr_in*)sin, addr_len)); } +static int inet_recv(struct socket *sock, void *ubuf, int size, int noblock, + unsigned flags) +{ + /* BSD explicitly states these are the same - so we do it this way to be sure */ + return inet_recvfrom(sock,ubuf,size,noblock,flags,NULL,NULL); +} + static int inet_read(struct socket *sock, char *ubuf, int size, int noblock) { - return inet_recv(sock,ubuf,size,noblock,0); + struct sock *sk = (struct sock *) sock->data; + + if(sk->err) + return inet_error(sk); + /* We may need to bind the socket. */ + if(inet_autobind(sk)) + return(-EAGAIN); + return(sk->prot->read(sk, (unsigned char *) ubuf, size, noblock, 0)); } static int inet_send(struct socket *sock, void *ubuf, int size, int noblock, unsigned flags) { struct sock *sk = (struct sock *) sock->data; - int err; if (sk->shutdown & SEND_SHUTDOWN) { send_sig(SIGPIPE, current, 1); @@ -1074,13 +1067,6 @@ } if(sk->err) return inet_error(sk); - if(size<0) - return -EINVAL; - if(size==0) - return 0; - err=verify_area(VERIFY_READ,ubuf,size); - if(err) - return err; /* We may need to bind the socket. */ if(inet_autobind(sk)!=0) return(-EAGAIN); @@ -1095,7 +1081,6 @@ static int inet_sendto(struct socket *sock, void *ubuf, int size, int noblock, unsigned flags, struct sockaddr *sin, int addr_len) { - int err; struct sock *sk = (struct sock *) sock->data; if (sk->shutdown & SEND_SHUTDOWN) { @@ -1106,16 +1091,7 @@ return(-EOPNOTSUPP); if(sk->err) return inet_error(sk); - if(size<0) - return -EINVAL; - if(size==0) - return 0; - err=verify_area(VERIFY_READ,ubuf,size); - if(err) - return err; - /* We may need to bind the socket. */ - if(inet_autobind(sk)!=0) return -EAGAIN; return(sk->prot->sendto(sk, (unsigned char *) ubuf, size, noblock, flags, @@ -1123,32 +1099,6 @@ } -static int inet_recvfrom(struct socket *sock, void *ubuf, int size, int noblock, - unsigned flags, struct sockaddr *sin, int *addr_len ) -{ - struct sock *sk = (struct sock *) sock->data; - int err; - - if (sk->prot->recvfrom == NULL) - return(-EOPNOTSUPP); - if(sk->err) - return inet_error(sk); - if(size<0) - return -EINVAL; - if(size==0) - return 0; - err=verify_area(VERIFY_WRITE,ubuf,size); - if(err) - return err; - - /* We may need to bind the socket. */ - if(inet_autobind(sk)!=0) - return(-EAGAIN); - return(sk->prot->recvfrom(sk, (unsigned char *) ubuf, size, noblock, flags, - (struct sockaddr_in*)sin, addr_len)); -} - - static int inet_shutdown(struct socket *sock, int how) { struct sock *sk=(struct sock*)sock->data; @@ -1352,7 +1302,8 @@ struct inet_protocol *p; int i; - printk("Swansea University Computer Society NET3.014\n"); + + printk("NET3 TCP/IP protcols stack v016\n"); /* * Tell SOCKET that we are alive... diff -u --recursive --new-file v1.1.19/linux/net/inet/arp.c linux/net/inet/arp.c --- v1.1.19/linux/net/inet/arp.c Thu Jun 9 18:56:14 1994 +++ linux/net/inet/arp.c Fri Jun 17 07:53:57 1994 @@ -27,6 +27,7 @@ * * Ross Martin : Rewrote arp_rcv() and arp_get_info() * Stephen Henson : Add AX25 support to arp_get_info() + * Alan Cox : Drop data when a device is downed. */ #include @@ -228,20 +229,62 @@ static void arp_release_entry(struct arp_table *entry) { struct sk_buff *skb; + unsigned long flags; if (entry->flags & ATF_PUBL) proxies--; + + save_flags(flags); + cli(); /* Release the list of `skb' pointers. */ while ((skb = skb_dequeue(&entry->skb)) != NULL) { - if (skb->free) - kfree_skb(skb, FREE_WRITE); + skb_device_lock(skb); + restore_flags(flags); + dev_kfree_skb(skb, FREE_WRITE); } + restore_flags(flags); del_timer(&entry->timer); kfree_s(entry, sizeof(struct arp_table)); return; } +/* + * Purge a device from the ARP queue + */ + +void arp_device_down(struct device *dev) +{ + int i; + unsigned long flags; + + /* + * This is a bit OTT - maybe we need some arp semaphores instead. + */ + save_flags(flags); + cli(); + for (i = 0; i < ARP_TABLE_SIZE; i++) + { + struct arp_table *entry; + struct arp_table **pentry = &arp_tables[i]; + + while ((entry = *pentry) != NULL) + { + if(entry->dev==dev) + { + *pentry = entry->next; /* remove from list */ + if (entry->flags & ATF_PUBL) + proxies--; + del_timer(&entry->timer); /* Paranoia */ + kfree_s(entry, sizeof(struct arp_table)); + } + else + pentry = &entry->next; /* go to next entry */ + } + } + restore_flags(flags); +} + /* * Create and send an arp packet. If (dest_hw == NULL), we create a broadcast @@ -393,6 +436,7 @@ { struct sk_buff *skb; + unsigned long flags; /* * Empty the entire queue, building its data up ready to send @@ -405,9 +449,14 @@ return; } + save_flags(flags); + + cli(); while((skb = skb_dequeue(&entry->skb)) != NULL) { IS_SKB(skb); + skb_device_lock(skb); + restore_flags(flags); if(!skb->dev->rebuild_header(skb->data,skb->dev,skb->raddr,skb)) { skb->arp = 1; @@ -419,12 +468,13 @@ else { /* This routine is only ever called when 'entry' is - complete. Thus this can't fail (but does) */ + complete. Thus this can't fail. */ printk("arp_send_q: The impossible occurred. Please notify Alan.\n"); printk("arp_send_q: active entity %s\n",in_ntoa(entry->ip)); printk("arp_send_q: failed to find %s\n",in_ntoa(skb->raddr)); } } + restore_flags(flags); } diff -u --recursive --new-file v1.1.19/linux/net/inet/arp.h linux/net/inet/arp.h --- v1.1.19/linux/net/inet/arp.h Mon May 23 12:14:26 1994 +++ linux/net/inet/arp.h Fri Jun 17 07:53:57 1994 @@ -4,6 +4,7 @@ extern void arp_init(void); extern void arp_destroy(unsigned long paddr, int force); +extern void arp_device_down(struct device *dev); extern int arp_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt); extern int arp_find(unsigned char *haddr, unsigned long paddr, diff -u --recursive --new-file v1.1.19/linux/net/inet/dev.c linux/net/inet/dev.c --- v1.1.19/linux/net/inet/dev.c Thu Jun 9 18:56:14 1994 +++ linux/net/inet/dev.c Fri Jun 17 07:53:58 1994 @@ -16,6 +16,11 @@ * Alan Cox * David Hinds * + * Changes: + * Alan Cox : device private ioctl copies fields back. + * Alan Cox : Transmit queue code does relevant stunts to + * keep the queue safe. + * * Cleaned up and recommented by Alan Cox 2nd April 1994. I hope to have * the rest as well commented in the end. */ @@ -292,7 +297,8 @@ */ #ifdef CONFIG_INET ip_rt_flush(dev); -#endif + arp_device_down(dev); +#endif #ifdef CONFIG_IPX ipxrtr_device_down(dev); #endif @@ -321,6 +327,10 @@ /* * Send (or queue for sending) a packet. + * + * IMPORTANT: When this is called to resend frames. The caller MUST + * already have locked the sk_buff. Apart from that we do the + * rest of the magic. */ void dev_queue_xmit(struct sk_buff *skb, struct device *dev, int pri) @@ -328,13 +338,16 @@ unsigned long flags; int where = 0; /* used to say if the packet should go */ /* at the front or the back of the */ - /* queue. */ + /* queue - front is a retranmsit try */ if (dev == NULL) { printk("dev.c: dev_queue_xmit: dev = NULL\n"); return; } + + if(pri>=0 && !skb_device_locked(skb)) + skb_device_lock(skb); /* Shove a lock on the frame */ #ifdef CONFIG_SLAVE_BALANCING save_flags(flags); cli(); @@ -386,6 +399,7 @@ */ if (!skb->arp && dev->rebuild_header(skb->data, dev, skb->raddr, skb)) { + skb_device_unlock(skb); /* It's now safely on the arp queue */ return; } @@ -396,7 +410,9 @@ skb->in_dev_queue=1; #endif skb_queue_tail(dev->buffs + pri,skb); + skb_device_unlock(skb); /* Buffer is on the device queue and can be freed safely */ skb = skb_dequeue(dev->buffs + pri); + skb_device_lock(skb); /* New buffer needs locking down */ #ifdef CONFIG_SLAVE_BALANCING skb->in_dev_queue=0; #endif @@ -404,6 +420,9 @@ restore_flags(flags); if (dev->hard_start_xmit(skb, dev) == 0) { + /* + * Packet is now solely the responsibility of the driver + */ #ifdef CONFIG_SLAVE_BALANCING dev->pkt_queue--; #endif @@ -411,13 +430,15 @@ } /* - * Transmission failed, put skb back into a list. + * Transmission failed, put skb back into a list. Once on the list its safe and + * no longer device locked (it can be freed safely from the device queue) */ cli(); #ifdef CONFIG_SLAVE_BALANCING skb->in_dev_queue=1; dev->pkt_queue++; #endif + skb_device_unlock(skb); skb_queue_head(dev->buffs + pri,skb); restore_flags(flags); } @@ -783,7 +804,9 @@ { int i; struct sk_buff *skb; + unsigned long flags; + save_flags(flags); /* * Work the queues in priority order */ @@ -794,9 +817,16 @@ * Pull packets from the queue */ + + cli(); while((skb=skb_dequeue(&dev->buffs[i]))!=NULL) { /* + * Stop anyone freeing the buffer while we retransmit it + */ + skb_device_lock(skb); + restore_flags(flags); + /* * Feed them to the output stage and if it fails * indicate they re-queue at the front. */ @@ -806,8 +836,10 @@ */ if (dev->tbusy) return; + cli(); } } + restore_flags(flags); } @@ -1222,7 +1254,9 @@ case SIOCDEVPRIVATE: if(dev->do_ioctl==NULL) return -EOPNOTSUPP; - return dev->do_ioctl(dev, &ifr); + ret=dev->do_ioctl(dev, &ifr); + memcpy_tofs(arg,&ifr,sizeof(struct ifreq)); + break; case SIOCGIFMAP: ifr.ifr_map.mem_start=dev->mem_start; diff -u --recursive --new-file v1.1.19/linux/net/inet/ip.c linux/net/inet/ip.c --- v1.1.19/linux/net/inet/ip.c Thu Jun 9 18:56:14 1994 +++ linux/net/inet/ip.c Fri Jun 17 07:53:58 1994 @@ -51,6 +51,7 @@ * UDP as there is a nasty checksum issue * if you do things the wrong way. * Alan Cox : Always defrag, moved IP_FORWARD to the config.in file + * Alan Cox : IP options adjust sk->priority. * * To Fix: * IP option processing is mostly not needed. ip_forward needs to know about routing rules @@ -1578,25 +1579,9 @@ */ if (ipprot->copy) { -#if 0 - skb2 = alloc_skb(skb->mem_len-sizeof(struct sk_buff), GFP_ATOMIC); - if (skb2 == NULL) - continue; - memcpy(skb2, skb, skb2->mem_len); - skb2->ip_hdr = (struct iphdr *)( - (unsigned long)skb2 + - (unsigned long) skb->ip_hdr - - (unsigned long)skb); - skb2->h.raw = (unsigned char *)( - (unsigned long)skb2 + - (unsigned long) skb->h.raw - - (unsigned long)skb); - skb2->free=1; -#else skb2 = skb_clone(skb, GFP_ATOMIC); if(skb2==NULL) continue; -#endif } else { @@ -1958,6 +1943,10 @@ if(val<0||val>255) return -EINVAL; sk->ip_tos=val; + if(val==IPTOS_LOWDELAY) + sk->priority=SOPRI_INTERACTIVE; + if(val==IPTOS_THROUGHPUT) + sk->priority=SOPRI_BACKGROUND; return 0; case IP_TTL: if(val<1||val>255) diff -u --recursive --new-file v1.1.19/linux/net/inet/ipx.c linux/net/inet/ipx.c --- v1.1.19/linux/net/inet/ipx.c Tue May 31 12:48:20 1994 +++ linux/net/inet/ipx.c Fri Jun 17 07:53:58 1994 @@ -51,6 +51,7 @@ #include #include #include +#include /* For TIOCOUTQ/INQ */ #include #include "p8022.h" @@ -462,16 +463,7 @@ static int ipx_fcntl(struct socket *sock, unsigned int cmd, unsigned long arg) { - ipx_socket *sk; - - sk=(ipx_socket *)sock->data; - - if(sk==NULL) - { - printk("IPX:fcntl:passed sock->data=NULL\n"); - return(0); - } - + ipx_socket *sk=(ipx_socket *)sock->data; switch(cmd) { default: @@ -479,22 +471,16 @@ } } -static int ipx_setsockopt(struct socket *sock, int level, int optname, - char *optval, int optlen) +static int ipx_setsockopt(struct socket *sock, int level, int optname, char *optval, int optlen) { ipx_socket *sk; int err,opt; sk=(ipx_socket *)sock->data; - if(sk==NULL) - { - printk("IPX:setsockopt:passed sock->data=NULL\n"); - return 0; - } - if(optval==NULL) return(-EINVAL); + err=verify_area(VERIFY_READ,optval,sizeof(int)); if(err) return err; @@ -531,11 +517,6 @@ int err; sk=(ipx_socket *)sock->data; - if(sk==NULL) - { - printk("IPX:getsockopt:passed NULL sock->data.\n"); - return 0; - } switch(level) { @@ -676,53 +657,48 @@ static int ipx_bind(struct socket *sock, struct sockaddr *uaddr,int addr_len) { ipx_socket *sk; - int err; - struct sockaddr_ipx addr; struct ipx_route *rt; unsigned char *nodestart; + struct sockaddr_ipx *addr=(struct sockaddr_ipx *)uaddr; sk=(ipx_socket *)sock->data; - if(sk==NULL) - { - printk("IPX:bind:sock->data=NULL\n"); - return 0; - } if(sk->zapped==0) return(-EIO); - err=verify_area(VERIFY_READ,uaddr,addr_len); - if(err) - return err; - if(addr_len!=sizeof(addr)) + if(addr_len!=sizeof(struct sockaddr_ipx)) return -EINVAL; - memcpy_fromfs(&addr,uaddr,addr_len); - if (addr.sipx_port == 0) { - addr.sipx_port = first_free_socketnum(); - if (addr.sipx_port == 0) return -EINVAL; + if (addr->sipx_port == 0) + { + addr->sipx_port = first_free_socketnum(); + if (addr->sipx_port == 0) + return -EINVAL; } - if(ntohs(addr.sipx_port)<0x4000 && !suser()) + if(ntohs(addr->sipx_port)<0x4000 && !suser()) return(-EPERM); /* protect IPX system stuff like routing/sap */ /* Source addresses are easy. It must be our network:node pair for an interface routed to IPX with the ipx routing ioctl() */ - if(ipx_find_socket(addr.sipx_port)!=NULL) + if(ipx_find_socket(addr->sipx_port)!=NULL) { if(sk->debug) printk("IPX: bind failed because port %X in use.\n", - (int)addr.sipx_port); + (int)addr->sipx_port); return -EADDRINUSE; } - sk->ipx_source_addr.sock=addr.sipx_port; + sk->ipx_source_addr.sock=addr->sipx_port; - if (addr.sipx_network == 0L) { + if (addr->sipx_network == 0L) + { rt = ipxrtr_get_default_net(); - } else { - rt = ipxrtr_get_dev(addr.sipx_network); + } + else + { + rt = ipxrtr_get_dev(addr->sipx_network); } if(rt == NULL) @@ -751,32 +727,22 @@ int addr_len, int flags) { ipx_socket *sk=(ipx_socket *)sock->data; - struct sockaddr_ipx addr; - int err; + struct sockaddr_ipx *addr; - if(sk==NULL) - { - printk("IPX:connect:sock->data=NULL!\n"); - return 0; - } - sk->state = TCP_CLOSE; sock->state = SS_UNCONNECTED; if(addr_len!=sizeof(addr)) return(-EINVAL); - err=verify_area(VERIFY_READ,uaddr,addr_len); - if(err) - return err; - memcpy_fromfs(&addr,uaddr,sizeof(addr)); + addr=(struct sockaddr_ipx *)uaddr; if(sk->ipx_source_addr.net==0) /* Must bind first - no autobinding in this */ return -EINVAL; - sk->ipx_dest_addr.net=addr.sipx_network; - sk->ipx_dest_addr.sock=addr.sipx_port; - memcpy(sk->ipx_dest_addr.node,addr.sipx_node,sizeof(sk->ipx_source_addr.node)); + sk->ipx_dest_addr.net=addr->sipx_network; + sk->ipx_dest_addr.sock=addr->sipx_port; + memcpy(sk->ipx_dest_addr.node,addr->sipx_node,sizeof(sk->ipx_source_addr.node)); if(ipxrtr_get_dev(sk->ipx_dest_addr.net)==NULL) return -ENETUNREACH; sock->state = SS_CONNECTED; @@ -802,23 +768,10 @@ ipx_address *addr; struct sockaddr_ipx sipx; ipx_socket *sk; - int len; - int err; sk=(ipx_socket *)sock->data; - err = verify_area(VERIFY_WRITE,uaddr_len,sizeof(long)); - if(err) - return err; - - len = get_fs_long(uaddr_len); - - err = verify_area(VERIFY_WRITE, uaddr, len); - if(err) - return err; - - if(lensock; sipx.sipx_network = addr->net; memcpy(sipx.sipx_node,addr->node,sizeof(sipx.sipx_node)); - memcpy_tofs(uaddr,&sipx,sizeof(sipx)); - put_fs_long(len,uaddr_len); + memcpy(uaddr,&sipx,sizeof(sipx)); return(0); } @@ -966,7 +918,7 @@ /* Ok its for us ! */ if (ln->net == 0L) { - printk("IPX: Registering local net %lx\n", ipx->ipx_dest.net); +/* printk("IPX: Registering local net %lx\n", ipx->ipx_dest.net);*/ ln->net = ipx->ipx_dest.net; } @@ -1002,8 +954,7 @@ { ipx_socket *sk=(ipx_socket *)sock->data; struct sockaddr_ipx *usipx=(struct sockaddr_ipx *)usip; - int err; - struct sockaddr_ipx sipx; + struct sockaddr_ipx local_sipx; struct sk_buff *skb; struct device *dev; struct ipx_packet *ipx; @@ -1016,38 +967,32 @@ if(flags) return -EINVAL; - if(len<0) - return -EINVAL; - if(len == 0) - return 0; if(usipx) { - if(addr_len sipx_family != AF_IPX) return -EINVAL; - if(htons(sipx.sipx_port)<0x4000 && !suser()) + if(htons(usipx->sipx_port)<0x4000 && !suser()) return -EPERM; } else { if(sk->state!=TCP_ESTABLISHED) return -ENOTCONN; - sipx.sipx_family=AF_IPX; - sipx.sipx_port=sk->ipx_dest_addr.sock; - sipx.sipx_network=sk->ipx_dest_addr.net; - memcpy(sipx.sipx_node,sk->ipx_dest_addr.node,sizeof(sipx.sipx_node)); + usipx=&local_sipx; + usipx->sipx_family=AF_IPX; + usipx->sipx_port=sk->ipx_dest_addr.sock; + usipx->sipx_network=sk->ipx_dest_addr.net; + memcpy(usipx->sipx_node,sk->ipx_dest_addr.node,sizeof(usipx->sipx_node)); } if(sk->debug) printk("IPX: sendto: Addresses built.\n"); - if(memcmp(&sipx.sipx_node,&ipx_broadcast_node,6)==0) { + if(memcmp(&usipx->sipx_node,&ipx_broadcast_node,6)==0) + { if (!sk->broadcast) return -ENETUNREACH; broadcast = 1; @@ -1057,19 +1002,16 @@ if(sk->debug) printk("IPX: sendto: building packet.\n"); - err=verify_area(VERIFY_READ,ubuf,len); - if(err) - return err; size=sizeof(ipx_packet)+len; /* For mac headers */ /* Find out where this has to go */ - if (sipx.sipx_network == 0L) { + if (usipx->sipx_network == 0L) { rt = ipxrtr_get_default_net(); if (rt != NULL) - sipx.sipx_network = rt->net; + usipx->sipx_network = rt->net; } else - rt=ipxrtr_get_dev(sipx.sipx_network); + rt=ipxrtr_get_dev(usipx->sipx_network); if(rt==NULL) { @@ -1107,7 +1049,7 @@ /* Build Data Link header */ dl->datalink_header(dl, skb, - (rt->flags&IPX_RT_ROUTED)?rt->router_node:sipx.sipx_node); + (rt->flags&IPX_RT_ROUTED)?rt->router_node:usipx->sipx_node); /* See if we are sending to ourself */ memset(IPXaddr, '\0', 6); @@ -1116,7 +1058,7 @@ self_addressing = !memcmp(IPXaddr, (rt->flags&IPX_RT_ROUTED)?rt->router_node - :sipx.sipx_node, + :usipx->sipx_node, 6); /* Now the IPX */ @@ -1126,12 +1068,12 @@ ipx->ipx_checksum=0xFFFF; ipx->ipx_pktsize=htons(len+sizeof(ipx_packet)); ipx->ipx_tctrl=0; - ipx->ipx_type=sipx.sipx_type; + ipx->ipx_type=usipx->sipx_type; memcpy(&ipx->ipx_source,&sk->ipx_source_addr,sizeof(ipx->ipx_source)); - ipx->ipx_dest.net=sipx.sipx_network; - memcpy(ipx->ipx_dest.node,sipx.sipx_node,sizeof(ipx->ipx_dest.node)); - ipx->ipx_dest.sock=sipx.sipx_port; + ipx->ipx_dest.net=usipx->sipx_network; + memcpy(ipx->ipx_dest.node,usipx->sipx_node,sizeof(ipx->ipx_dest.node)); + ipx->ipx_dest.sock=usipx->sipx_port; if(sk->debug) printk("IPX: Appending user data.\n"); /* User data follows immediately after the IPX data */ @@ -1193,26 +1135,9 @@ return er; } - if(size==0) - return 0; - if(size<0) - return -EINVAL; if(addr_len) - { - er=verify_area(VERIFY_WRITE,addr_len,sizeof(*addr_len)); - if(er) - return er; - put_fs_long(sizeof(*sipx),addr_len); - } - if(sipx) - { - er=verify_area(VERIFY_WRITE,sipx,sizeof(*sipx)); - if(er) - return er; - } - er=verify_area(VERIFY_WRITE,ubuf,size); - if(er) - return er; + *addr_len=sizeof(*sipx); + skb=skb_recv_datagram(sk,flags,noblock,&er); if(skb==NULL) return er; @@ -1223,14 +1148,11 @@ if(sipx) { - struct sockaddr_ipx addr; - - addr.sipx_family=AF_IPX; - addr.sipx_port=ipx->ipx_source.sock; - memcpy(addr.sipx_node,ipx->ipx_source.node,sizeof(addr.sipx_node)); - addr.sipx_network=ipx->ipx_source.net; - addr.sipx_type = ipx->ipx_type; - memcpy_tofs(sipx,&addr,sizeof(*sipx)); + sipx->sipx_family=AF_IPX; + sipx->sipx_port=ipx->ipx_source.sock; + memcpy(sipx->sipx_node,ipx->ipx_source.node,sizeof(sipx->sipx_node)); + sipx->sipx_network=ipx->ipx_source.net; + sipx->sipx_type = ipx->ipx_type; } skb_free_datagram(skb); return(copied); @@ -1272,14 +1194,72 @@ static int ipx_ioctl(struct socket *sock,unsigned int cmd, unsigned long arg) { + int err; + long amount=0; + ipx_socket *sk=(ipx_socket *)sock->data; switch(cmd) { + case TIOCOUTQ: + err=verify_area(VERIFY_WRITE,(void *)arg,sizeof(unsigned long)); + if(err) + return err; + amount=sk->sndbuf-sk->wmem_alloc; + if(amount<0) + amount=0; + put_fs_long(amount,(unsigned long *)arg); + return 0; + case TIOCINQ: + { + struct sk_buff *skb; + /* These two are safe on a single CPU system as only user tasks fiddle here */ + if((skb=skb_peek(&sk->receive_queue))!=NULL) + amount=skb->len; + err=verify_area(VERIFY_WRITE,(void *)arg,sizeof(unsigned long)); + put_fs_long(amount,(unsigned long *)arg); + return 0; + } case SIOCADDRT: case SIOCDELRT: if(!suser()) return -EPERM; return(ipxrtr_ioctl(cmd,(void *)arg)); + case SIOCGSTAMP: + if (sk) + { + if(sk->stamp.tv_sec==0) + return -ENOENT; + err=verify_area(VERIFY_WRITE,(void *)arg,sizeof(struct timeval)); + if(err) + return err; + memcpy_tofs((void *)arg,&sk->stamp,sizeof(struct timeval)); + return 0; + } + return -EINVAL; + case SIOCGIFCONF: + case SIOCGIFFLAGS: + case SIOCSIFFLAGS: + case SIOCGIFADDR: + case SIOCSIFADDR: + case SIOCGIFDSTADDR: + case SIOCSIFDSTADDR: + case SIOCGIFBRDADDR: + case SIOCSIFBRDADDR: + case SIOCGIFNETMASK: + case SIOCSIFNETMASK: + case SIOCGIFMETRIC: + case SIOCSIFMETRIC: + case SIOCGIFMEM: + case SIOCSIFMEM: + case SIOCGIFMTU: + case SIOCSIFMTU: + case SIOCSIFLINK: + case SIOCGIFHWADDR: + case SIOCSIFHWADDR: + case OLD_SIOCGIFHWADDR: + return(dev_ioctl(cmd,(void *) arg)); + + default: return -EINVAL; } @@ -1353,7 +1333,7 @@ if ((p8022_datalink = register_8022_client(val, ipx_rcv)) == NULL) printk("IPX: Unable to register with 802.2\n"); - printk("Swansea University Computer Society IPX 0.25 BETA for NET3 014\n"); + printk("Swansea University Computer Society IPX 0.26 BETA for NET3.016\n"); } #endif diff -u --recursive --new-file v1.1.19/linux/net/inet/packet.c linux/net/inet/packet.c --- v1.1.19/linux/net/inet/packet.c Tue May 24 00:35:02 1994 +++ linux/net/inet/packet.c Fri Jun 17 07:53:59 1994 @@ -21,6 +21,7 @@ * Alan Cox : Uses the improved datagram code. * Alan Cox : Added NULL's for socket options. * Alan Cox : Re-commented the code. + * Alan Cox : Use new kernel side addressing * * * This program is free software; you can redistribute it and/or @@ -124,8 +125,7 @@ { struct sk_buff *skb; struct device *dev; - struct sockaddr saddr; - int err; + struct sockaddr *saddr=(struct sockaddr *)usin; /* * Check the flags. @@ -133,8 +133,6 @@ if (flags) return(-EINVAL); - if (len < 0) - return(-EINVAL); /* * Get and verify the address. @@ -142,31 +140,18 @@ if (usin) { - if (addr_len < sizeof(saddr)) + if (addr_len < sizeof(*saddr)) return(-EINVAL); - err=verify_area(VERIFY_READ, usin, sizeof(saddr)); - if(err) - return err; - memcpy_fromfs(&saddr, usin, sizeof(saddr)); } else return(-EINVAL); /* SOCK_PACKET must be sent giving an address */ - - /* - * Check the buffer is readable. - */ - - err=verify_area(VERIFY_READ,from,len); - if(err) - return(err); - /* * Find the device first to size check it */ - saddr.sa_data[13] = 0; - dev = dev_get(saddr.sa_data); + saddr->sa_data[13] = 0; + dev = dev_get(saddr->sa_data); if (dev == NULL) { return(-ENXIO); @@ -180,15 +165,11 @@ if(len>dev->mtu) return -EMSGSIZE; - /* - * Now allocate the buffer, knowing 4K pagelimits wont break this line. - */ - skb = sk->prot->wmalloc(sk, len, 0, GFP_KERNEL); /* * If the write buffer is full, then tough. At this level the user gets to - * deal with the problem. + * deal with the problem - do your own algorithmic backoffs. */ if (skb == NULL) @@ -289,12 +270,9 @@ struct sk_buff *skb; struct sockaddr *saddr; int err; + int truesize; saddr = (struct sockaddr *)sin; - if (len == 0) - return(0); - if (len < 0) - return(-EINVAL); if (sk->shutdown & RCV_SHUTDOWN) return(0); @@ -305,29 +283,9 @@ */ if (addr_len) - { - err=verify_area(VERIFY_WRITE, addr_len, sizeof(*addr_len)); - if(err) - return err; - put_fs_long(sizeof(*saddr), addr_len); - } - - if(saddr) - { - err=verify_area(VERIFY_WRITE, saddr, sizeof(*saddr)); - if(err) - return err; - } + *addr_len=sizeof(*saddr); /* - * Check the user given area can be written to. - */ - - err=verify_area(VERIFY_WRITE,to,len); - if(err) - return err; - - /* * Call the generic datagram receiver. This handles all sorts * of horrible races and re-entrancy so we can forget about it * in the protocol layers. @@ -349,7 +307,8 @@ * user program they can ask the device for its MTU anyway. */ - copied = min(len, skb->len); + truesize = skb->len; + copied = min(len, truesize); memcpy_tofs(to, skb->data, copied); /* We can't use skb_copy_datagram here */ @@ -359,11 +318,8 @@ if (saddr) { - struct sockaddr addr; - - addr.sa_family = skb->dev->type; - memcpy(addr.sa_data,skb->dev->name, 14); - memcpy_tofs(saddr, &addr, sizeof(*saddr)); + saddr->sa_family = skb->dev->type; + memcpy(saddr->sa_data,skb->dev->name, 14); } /* @@ -378,7 +334,7 @@ */ release_sock(sk); - return(copied); + return(truesize); } diff -u --recursive --new-file v1.1.19/linux/net/inet/proc.c linux/net/inet/proc.c --- v1.1.19/linux/net/inet/proc.c Tue May 31 12:48:20 1994 +++ linux/net/inet/proc.c Fri Jun 17 07:53:59 1994 @@ -151,6 +151,9 @@ extern struct tcp_mib tcp_statistics; extern struct udp_mib udp_statistics; int len; +/* + extern unsigned long tcp_rx_miss, tcp_rx_hit1,tcp_rx_hit2; +*/ len = sprintf (buffer, "Ip: Forwarding DefaultTTL InReceives InHdrErrors InAddrErrors ForwDatagrams InUnknownProtos InDiscards InDelivers OutRequests OutDiscards OutNoRoutes ReasmTimeout ReasmReqds ReasmOKs ReasmFails FragOKs FragFails FragCreates\n" @@ -197,6 +200,12 @@ "Udp: InDatagrams NoPorts InErrors OutDatagrams\nUdp: %lu %lu %lu %lu\n", udp_statistics.UdpInDatagrams, udp_statistics.UdpNoPorts, udp_statistics.UdpInErrors, udp_statistics.UdpOutDatagrams); + +/* + len += sprintf( buffer + len, + "TCP fast path RX: H2: %ul H1: %ul L: %ul\n", + tcp_rx_hit2,tcp_rx_hit1,tcp_rx_miss); +*/ if (offset >= len) { diff -u --recursive --new-file v1.1.19/linux/net/inet/raw.c linux/net/inet/raw.c --- v1.1.19/linux/net/inet/raw.c Tue May 24 00:35:03 1994 +++ linux/net/inet/raw.c Fri Jun 17 07:53:59 1994 @@ -24,6 +24,7 @@ * Alan Cox : Removed wake_up calls * Alan Cox : Use ttl/tos * Alan Cox : Cleaned up old debugging + * Alan Cox : Use new kernel side addresses * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -51,86 +52,92 @@ #include "udp.h" -static unsigned long -min(unsigned long a, unsigned long b) +static inline unsigned long min(unsigned long a, unsigned long b) { - if (a < b) return(a); - return(b); + if (a < b) + return(a); + return(b); } /* raw_err gets called by the icmp module. */ -void -raw_err (int err, unsigned char *header, unsigned long daddr, +void raw_err (int err, unsigned char *header, unsigned long daddr, unsigned long saddr, struct inet_protocol *protocol) { - struct sock *sk; + struct sock *sk; - if (protocol == NULL) return; - sk = (struct sock *) protocol->data; - if (sk == NULL) return; - - /* This is meaningless in raw sockets. */ - if (err & 0xff00 == (ICMP_SOURCE_QUENCH << 8)) { - if (sk->cong_window > 1) sk->cong_window = sk->cong_window/2; - return; - } + if (protocol == NULL) + return; + sk = (struct sock *) protocol->data; + if (sk == NULL) + return; + + /* This is meaningless in raw sockets. */ + if (err & 0xff00 == (ICMP_SOURCE_QUENCH << 8)) + { + if (sk->cong_window > 1) sk->cong_window = sk->cong_window/2; + return; + } - sk->err = icmp_err_convert[err & 0xff].errno; - sk->error_report(sk); + sk->err = icmp_err_convert[err & 0xff].errno; + sk->error_report(sk); - return; + return; } /* - * This should be the easiest of all, all we do is\ - * copy it into a buffer. + * This should be the easiest of all, all we do is + * copy it into a buffer. */ -int -raw_rcv(struct sk_buff *skb, struct device *dev, struct options *opt, + +int raw_rcv(struct sk_buff *skb, struct device *dev, struct options *opt, unsigned long daddr, unsigned short len, unsigned long saddr, int redo, struct inet_protocol *protocol) { - struct sock *sk; + struct sock *sk; - if (skb == NULL) - return(0); + if (skb == NULL) + return(0); - if (protocol == NULL) - { - kfree_skb(skb, FREE_READ); - return(0); - } + if (protocol == NULL) + { + kfree_skb(skb, FREE_READ); + return(0); + } - sk = (struct sock *) protocol->data; - if (sk == NULL) - { - kfree_skb(skb, FREE_READ); - return(0); - } + sk = (struct sock *) protocol->data; + if (sk == NULL) + { + kfree_skb(skb, FREE_READ); + return(0); + } + + /* Now we need to copy this into memory. */ + + skb->sk = sk; + skb->len = len + skb->ip_hdr->ihl*sizeof(long); + skb->h.raw = (unsigned char *) skb->ip_hdr; + skb->dev = dev; + skb->saddr = daddr; + skb->daddr = saddr; + + /* Charge it to the socket. */ + + if (sk->rmem_alloc + skb->mem_len >= sk->rcvbuf) + { + ip_statistics.IpInDiscards++; + skb->sk=NULL; + kfree_skb(skb, FREE_READ); + return(0); + } - /* Now we need to copy this into memory. */ - skb->sk = sk; - skb->len = len + skb->ip_hdr->ihl*sizeof(long); - skb->h.raw = (unsigned char *) skb->ip_hdr; - skb->dev = dev; - skb->saddr = daddr; - skb->daddr = saddr; - - /* Charge it too the socket. */ - if (sk->rmem_alloc + skb->mem_len >= sk->rcvbuf) { - ip_statistics.IpInDiscards++; - skb->sk=NULL; - kfree_skb(skb, FREE_READ); + sk->rmem_alloc += skb->mem_len; + ip_statistics.IpInDelivers++; + skb_queue_tail(&sk->receive_queue,skb); + sk->data_ready(sk,skb->len); + release_sock(sk); return(0); - } - sk->rmem_alloc += skb->mem_len; - ip_statistics.IpInDelivers++; - skb_queue_tail(&sk->receive_queue,skb); - sk->data_ready(sk,skb->len); - release_sock(sk); - return(0); } /* @@ -160,10 +167,7 @@ { if (addr_len < sizeof(sin)) return(-EINVAL); - err=verify_area (VERIFY_READ, usin, sizeof (sin)); - if(err) - return err; - memcpy_fromfs(&sin, usin, sizeof(sin)); + memcpy(&sin, usin, sizeof(sin)); if (sin.sin_family && sin.sin_family != AF_INET) return(-EINVAL); } @@ -279,123 +283,105 @@ } -static int -raw_init(struct sock *sk) +static int raw_init(struct sock *sk) { - struct inet_protocol *p; - - p = (struct inet_protocol *) kmalloc(sizeof (*p), GFP_KERNEL); - if (p == NULL) return(-ENOMEM); + struct inet_protocol *p; - p->handler = raw_rcv; - p->protocol = sk->protocol; - p->data = (void *)sk; - p->err_handler = raw_err; - p->name="USER"; - p->frag_handler = NULL; /* For now */ - inet_add_protocol(p); + p = (struct inet_protocol *) kmalloc(sizeof (*p), GFP_KERNEL); + if (p == NULL) + return(-ENOMEM); + + p->handler = raw_rcv; + p->protocol = sk->protocol; + p->data = (void *)sk; + p->err_handler = raw_err; + p->name="USER"; + p->frag_handler = NULL; /* For now */ + inet_add_protocol(p); - /* We need to remember this somewhere. */ - sk->pair = (struct sock *)p; + /* We need to remember this somewhere. */ + sk->pair = (struct sock *)p; - return(0); + return(0); } /* - * This should be easy, if there is something there - * we return it, otherwise we block. + * This should be easy, if there is something there + * we return it, otherwise we block. */ -int -raw_recvfrom(struct sock *sk, unsigned char *to, int len, - int noblock, unsigned flags, struct sockaddr_in *sin, + +int raw_recvfrom(struct sock *sk, unsigned char *to, int len, + int noblock, unsigned flags, struct sockaddr_in *sin, int *addr_len) { - int copied=0; - struct sk_buff *skb; - int err; - - if (len == 0) return(0); - if (len < 0) return(-EINVAL); - - if (sk->shutdown & RCV_SHUTDOWN) return(0); - if (addr_len) { - err=verify_area(VERIFY_WRITE, addr_len, sizeof(*addr_len)); - if(err) - return err; - put_fs_long(sizeof(*sin), addr_len); - } - if(sin) - { - err=verify_area(VERIFY_WRITE, sin, sizeof(*sin)); - if(err) - return err; - } - - err=verify_area(VERIFY_WRITE,to,len); - if(err) - return err; - - skb=skb_recv_datagram(sk,flags,noblock,&err); - if(skb==NULL) - return err; + int copied=0; + struct sk_buff *skb; + int err; + int truesize; + + if (sk->shutdown & RCV_SHUTDOWN) + return(0); + + if (addr_len) + *addr_len=sizeof(*sin); + + skb=skb_recv_datagram(sk,flags,noblock,&err); + if(skb==NULL) + return err; - copied = min(len, skb->len); + truesize=skb->len; + copied = min(len, truesize); - skb_copy_datagram(skb, 0, to, copied); - sk->stamp=skb->stamp; + skb_copy_datagram(skb, 0, to, copied); + sk->stamp=skb->stamp; - /* Copy the address. */ - if (sin) { - struct sockaddr_in addr; - - addr.sin_family = AF_INET; - addr.sin_addr.s_addr = skb->daddr; - memcpy_tofs(sin, &addr, sizeof(*sin)); - } - - skb_free_datagram(skb); - release_sock(sk); - return (copied); + /* Copy the address. */ + if (sin) + { + sin->sin_family = AF_INET; + sin->sin_addr.s_addr = skb->daddr; + } + skb_free_datagram(skb); + release_sock(sk); + return (truesize); /* len not copied. BSD returns the true size of the message so you know a bit fell off! */ } -int -raw_read (struct sock *sk, unsigned char *buff, int len, int noblock, - unsigned flags) +int raw_read (struct sock *sk, unsigned char *buff, int len, int noblock,unsigned flags) { - return(raw_recvfrom(sk, buff, len, noblock, flags, NULL, NULL)); + return(raw_recvfrom(sk, buff, len, noblock, flags, NULL, NULL)); } struct proto raw_prot = { - sock_wmalloc, - sock_rmalloc, - sock_wfree, - sock_rfree, - sock_rspace, - sock_wspace, - raw_close, - raw_read, - raw_write, - raw_sendto, - raw_recvfrom, - ip_build_header, - udp_connect, - NULL, - ip_queue_xmit, - ip_retransmit, - NULL, - NULL, - raw_rcv, - datagram_select, - NULL, - raw_init, - NULL, - ip_setsockopt, - ip_getsockopt, - 128, - 0, - {NULL,}, - "RAW" + sock_wmalloc, + sock_rmalloc, + sock_wfree, + sock_rfree, + sock_rspace, + sock_wspace, + raw_close, + raw_read, + raw_write, + raw_sendto, + raw_recvfrom, + ip_build_header, + udp_connect, + NULL, + ip_queue_xmit, + ip_retransmit, + NULL, + NULL, + raw_rcv, + datagram_select, + NULL, + raw_init, + NULL, + ip_setsockopt, + ip_getsockopt, + 128, + 0, + {NULL,}, + "RAW" }; diff -u --recursive --new-file v1.1.19/linux/net/inet/skbuff.c linux/net/inet/skbuff.c --- v1.1.19/linux/net/inet/skbuff.c Fri Jun 17 15:20:08 1994 +++ linux/net/inet/skbuff.c Fri Jun 17 07:53:59 1994 @@ -10,6 +10,12 @@ * 2 of the License, or (at your option) any later version. */ +/* + * Note: There are a load of cli()/sti() pairs protecting the net_memory type + * variables. Without them for some reason the ++/-- operators do not come out + * atomic. Also with gcc 2.4.5 these counts can come out wrong anyway - use 2.5.8!! + */ + #include #include #include @@ -37,7 +43,20 @@ volatile unsigned long net_memory = 0; volatile unsigned long net_skbcount = 0; - +volatile unsigned long net_locked = 0; +volatile unsigned long net_allocs = 0; +volatile unsigned long net_fails = 0; +volatile unsigned long net_free_locked = 0; + +void show_net_buffers(void) +{ + printk("Networking buffers in use : %lu\n",net_skbcount); + printk("Memory committed to network buffers: %lu\n",net_memory); + printk("Network buffers locked by drivers : %lu\n",net_locked); + printk("Total network buffer allocations : %lu\n",net_allocs); + printk("Total failed network buffer allocs : %lu\n",net_fails); + printk("Total free while locked events : %lu\n",net_free_locked); +} #if CONFIG_SKB_CHECK @@ -222,7 +241,8 @@ result->prev = NULL; restore_flags(flags); - + + IS_SKB(result); return result; } @@ -314,71 +334,6 @@ } /* - * Get a clone of an sk_buff. This is the safe way to peek at - * a socket queue without accidents. Its a bit long but most - * of it acutally ends up as tiny bits of inline assembler - * anyway. Only the memcpy of upto 4K with ints off is not - * as nice as I'd like. - */ - -struct sk_buff *skb_peek_copy(struct sk_buff_head *list_) -{ - struct sk_buff *list = (struct sk_buff *)list_; - struct sk_buff *orig,*newsk; - unsigned long flags; - unsigned int len; - /* Now for some games to avoid races */ - - IS_SKB_HEAD(list); - do - { - save_flags(flags); - cli(); - orig = list->next; - if (orig == list) { - restore_flags(flags); - return NULL; - } - IS_SKB(orig); - len = orig->truesize; - restore_flags(flags); - - newsk = alloc_skb(len,GFP_KERNEL); /* May sleep */ - - if (newsk == NULL) /* Oh dear... not to worry */ - return NULL; - - save_flags(flags); - cli(); - if (list->next != orig) /* List changed go around another time */ - { - restore_flags(flags); - newsk->sk = NULL; - newsk->free = 1; - newsk->mem_addr = newsk; - newsk->mem_len = len; - kfree_skb(newsk, FREE_WRITE); - continue; - } - - IS_SKB(orig); - IS_SKB(newsk); - memcpy(newsk,orig,len); - newsk->next = NULL; - newsk->prev = NULL; - newsk->mem_addr = newsk; - newsk->h.raw += ((char *)newsk - (char *)orig); - newsk->link3 = NULL; - newsk->sk = NULL; - newsk->free = 1; - } - while(0); - - restore_flags(flags); - return newsk; -} - -/* * Free an sk_buff. This still knows about things it should * not need to like protocols and sockets. */ @@ -394,7 +349,8 @@ IS_SKB(skb); if (skb->lock) { - skb->free = 1; /* Free when unlocked */ + skb->free = 3; /* Free when unlocked */ + net_free_locked++; return; } if (skb->free == 2) @@ -408,9 +364,9 @@ if(skb->sk->prot!=NULL) { if (rw) - skb->sk->prot->rfree(skb->sk, skb->mem_addr, skb->mem_len); + skb->sk->prot->rfree(skb->sk, skb, skb->mem_len); else - skb->sk->prot->wfree(skb->sk, skb->mem_addr, skb->mem_len); + skb->sk->prot->wfree(skb->sk, skb, skb->mem_len); } else @@ -426,7 +382,7 @@ if(skb->in_dev_queue && skb->dev!=NULL) skb->dev->pkt_queue--; #endif - kfree_skbmem(skb->mem_addr,skb->mem_len); + kfree_skbmem(skb,skb->mem_len); } } else @@ -435,7 +391,7 @@ if(skb->in_dev_queue && skb->dev!=NULL) skb->dev->pkt_queue--; #endif - kfree_skbmem(skb->mem_addr, skb->mem_len); + kfree_skbmem(skb, skb->mem_len); } } @@ -447,6 +403,7 @@ struct sk_buff *alloc_skb(unsigned int size,int priority) { struct sk_buff *skb; + unsigned long flags; if (intr_count && priority!=GFP_ATOMIC) { static int count = 0; @@ -460,8 +417,17 @@ size+=sizeof(struct sk_buff); skb=(struct sk_buff *)kmalloc(size,priority); if (skb == NULL) + { + net_fails++; return NULL; + } +#ifdef PARANOID_BUGHUNT_MODE + if(skb->magic_debug_cookie == SK_GOOD_SKB) + printk("Kernel kmalloc handed us an existing skb (%p)\n",skb); +#endif + net_allocs++; + skb->free = 2; /* Invalid so we pick up forgetful users */ skb->lock = 0; skb->pkt_type = PACKET_HOST; /* Default type */ @@ -475,10 +441,14 @@ skb->prev = skb->next = NULL; skb->link3 = NULL; skb->sk = NULL; + skb->localroute=0; skb->stamp.tv_sec=0; /* No idea about time */ skb->localroute = 0; + save_flags(flags); + cli(); net_memory += size; net_skbcount++; + restore_flags(flags); #if CONFIG_SKB_CHECK skb->magic_debug_cookie = SK_GOOD_SKB; #endif @@ -490,34 +460,33 @@ * Free an skbuff by memory */ -void kfree_skbmem(void *mem,unsigned size) +void kfree_skbmem(struct sk_buff *skb,unsigned size) { -#ifdef CONFIG_SLAVE_BALANCING - struct sk_buff *x = mem; unsigned long flags; +#ifdef CONFIG_SLAVE_BALANCING save_flags(flags); cli(); - if(x->in_dev_queue && x->dev!=NULL) - x->dev->pkt_queue--; + if(skb->in_dev_queue && skb->dev!=NULL) + skb->dev->pkt_queue--; restore_flags(flags); #endif -#if CONFIG_SKB_CHECK -#ifndef CONFIG_SLAVE_BALANCING - struct sk_buff *x = mem; -#endif - IS_SKB(x); - if(x->magic_debug_cookie == SK_GOOD_SKB) + IS_SKB(skb); + if(size!=skb->truesize) + printk("kfree_skbmem: size mismatch.\n"); + + if(skb->magic_debug_cookie == SK_GOOD_SKB) { - x->magic_debug_cookie = SK_FREED_SKB; - kfree_s(mem,size); + save_flags(flags); + cli(); + IS_SKB(skb); + skb->magic_debug_cookie = SK_FREED_SKB; + kfree_s((void *)skb,size); net_skbcount--; net_memory -= size; + restore_flags(flags); } else printk("kfree_skbmem: bad magic cookie\n"); -#else - kfree_s(mem, size); -#endif } /* @@ -564,20 +533,40 @@ * Skbuff device locking */ -void skb_kept_by_device(struct sk_buff *skb) +void skb_device_lock(struct sk_buff *skb) { + if(skb->lock) + printk("double lock on device queue!\n"); + else + net_locked++; skb->lock++; } -void skb_device_release(struct sk_buff *skb, int mode) +void skb_device_unlock(struct sk_buff *skb) +{ + if(skb->lock==0) + printk("double unlock on device queue!\n"); + skb->lock--; + if(skb->lock==0) + net_locked--; +} + +void dev_kfree_skb(struct sk_buff *skb, int mode) { unsigned long flags; save_flags(flags); cli(); - if (!--skb->lock && skb->free == 1) + if(skb->lock==1) + net_locked--; + + if (!--skb->lock && (skb->free == 1 || skb->free == 3)) + { + restore_flags(flags); kfree_skb(skb,mode); - restore_flags(flags); + } + else + restore_flags(flags); } int skb_device_locked(struct sk_buff *skb) diff -u --recursive --new-file v1.1.19/linux/net/inet/sock.c linux/net/inet/sock.c --- v1.1.19/linux/net/inet/sock.c Tue May 24 00:35:04 1994 +++ linux/net/inet/sock.c Fri Jun 17 07:54:00 1994 @@ -60,6 +60,7 @@ * Alan Cox : Fixed bogus SO_TYPE handling in getsockopt() * Adam Caldwell : Missing return in SO_DONTROUTE/SO_DEBUG code * Alan Cox : Split IP from generic code + * Alan Cox : New kfree_skbmem() * * To Fix: * @@ -307,124 +308,134 @@ } -struct sk_buff * -sock_wmalloc(struct sock *sk, unsigned long size, int force, - int priority) +struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, int priority) { - if (sk) { - if (sk->wmem_alloc + size < sk->sndbuf || force) { - struct sk_buff * c = alloc_skb(size, priority); - if (c) { - cli(); - sk->wmem_alloc+= c->mem_len; - sti(); + if (sk) + { + if (sk->wmem_alloc + size < sk->sndbuf || force) + { + struct sk_buff * c = alloc_skb(size, priority); + if (c) + { + cli(); + sk->wmem_alloc+= c->mem_len; + sti(); + } + return c; } - return c; + return(NULL); } - return(NULL); - } - return(alloc_skb(size, priority)); + return(alloc_skb(size, priority)); } -struct sk_buff * -sock_rmalloc(struct sock *sk, unsigned long size, int force, int priority) +struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int priority) { - if (sk) { - if (sk->rmem_alloc + size < sk->rcvbuf || force) { - struct sk_buff *c = alloc_skb(size, priority); - if (c) { - cli(); - sk->rmem_alloc += c->mem_len; - sti(); + if (sk) + { + if (sk->rmem_alloc + size < sk->rcvbuf || force) + { + struct sk_buff *c = alloc_skb(size, priority); + if (c) + { + cli(); + sk->rmem_alloc += c->mem_len; + sti(); + } + return(c); } - return(c); + return(NULL); } - return(NULL); - } - return(alloc_skb(size, priority)); + return(alloc_skb(size, priority)); } -unsigned long -sock_rspace(struct sock *sk) +unsigned long sock_rspace(struct sock *sk) { - int amt; + int amt; - if (sk != NULL) { - if (sk->rmem_alloc >= sk->rcvbuf-2*MIN_WINDOW) return(0); - amt = min((sk->rcvbuf-sk->rmem_alloc)/2-MIN_WINDOW, MAX_WINDOW); - if (amt < 0) return(0); - return(amt); - } - return(0); + if (sk != NULL) + { + if (sk->rmem_alloc >= sk->rcvbuf-2*MIN_WINDOW) + return(0); + amt = min((sk->rcvbuf-sk->rmem_alloc)/2-MIN_WINDOW, MAX_WINDOW); + if (amt < 0) + return(0); + return(amt); + } + return(0); } -unsigned long -sock_wspace(struct sock *sk) +unsigned long sock_wspace(struct sock *sk) { - if (sk != NULL) { - if (sk->shutdown & SEND_SHUTDOWN) return(0); - if (sk->wmem_alloc >= sk->sndbuf) return(0); - return(sk->sndbuf-sk->wmem_alloc ); - } - return(0); + if (sk != NULL) + { + if (sk->shutdown & SEND_SHUTDOWN) + return(0); + if (sk->wmem_alloc >= sk->sndbuf) + return(0); + return(sk->sndbuf-sk->wmem_alloc ); + } + return(0); } -void -sock_wfree(struct sock *sk, void *mem, unsigned long size) +void sock_wfree(struct sock *sk, struct sk_buff *skb, unsigned long size) { - IS_SKB(mem); - kfree_skbmem(mem, size); - if (sk) { - sk->wmem_alloc -= size; - - /* In case it might be waiting for more memory. */ - if (!sk->dead) sk->write_space(sk); - return; - } + IS_SKB(skb); + kfree_skbmem(skb, size); + if (sk) + { + sk->wmem_alloc -= size; + /* In case it might be waiting for more memory. */ + if (!sk->dead) + sk->write_space(sk); + return; + } } -void -sock_rfree(struct sock *sk, void *mem, unsigned long size) +void sock_rfree(struct sock *sk, struct sk_buff *skb, unsigned long size) { - IS_SKB(mem); - kfree_skbmem(mem, size); - if (sk) { - sk->rmem_alloc -= size; - } + IS_SKB(skb); + kfree_skbmem(skb, size); + if (sk) + { + sk->rmem_alloc -= size; + } } void release_sock(struct sock *sk) { - struct sk_buff *skb; + struct sk_buff *skb; - if (!sk->prot) - return; - - if (sk->blog) return; + if (!sk->prot) + return; + if (sk->blog) + return; #ifdef CONFIG_INET - /* See if we have any packets built up. */ - sk->inuse = 1; - while((skb = skb_dequeue(&sk->back_log)) != NULL) { - sk->blog = 1; - if (sk->prot->rcv) sk->prot->rcv(skb, skb->dev, sk->opt, - skb->saddr, skb->len, skb->daddr, 1, - /* Only used for/by raw sockets. */ - (struct inet_protocol *)sk->pair); - } + /* See if we have any packets built up. */ + sk->inuse = 1; + while((skb = skb_dequeue(&sk->back_log)) != NULL) + { + sk->blog = 1; + if (sk->prot->rcv) + sk->prot->rcv(skb, skb->dev, sk->opt, + skb->saddr, skb->len, skb->daddr, 1, + /* Only used for/by raw sockets. */ + (struct inet_protocol *)sk->pair); + } #endif - sk->blog = 0; - sk->inuse = 0; + sk->blog = 0; + sk->inuse = 0; #ifdef CONFIG_INET - if (sk->dead && sk->state == TCP_CLOSE) { - /* Should be about 2 rtt's */ - reset_timer(sk, TIME_DONE, min(sk->rtt * 2, TCP_DONE_TIME)); - } + if (sk->dead && sk->state == TCP_CLOSE) + { + /* Should be about 2 rtt's */ + reset_timer(sk, TIME_DONE, min(sk->rtt * 2, TCP_DONE_TIME)); + } #endif } diff -u --recursive --new-file v1.1.19/linux/net/inet/sock.h linux/net/inet/sock.h --- v1.1.19/linux/net/inet/sock.h Tue May 31 12:48:21 1994 +++ linux/net/inet/sock.h Fri Jun 17 07:54:01 1994 @@ -180,9 +180,9 @@ struct sk_buff * (*rmalloc)(struct sock *sk, unsigned long size, int force, int priority); - void (*wfree)(struct sock *sk, void *mem, + void (*wfree)(struct sock *sk, struct sk_buff *skb, unsigned long size); - void (*rfree)(struct sock *sk, void *mem, + void (*rfree)(struct sock *sk, struct sk_buff *skb, unsigned long size); unsigned long (*rspace)(struct sock *sk); unsigned long (*wspace)(struct sock *sk); @@ -262,9 +262,9 @@ extern struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int priority); -extern void sock_wfree(struct sock *sk, void *mem, +extern void sock_wfree(struct sock *sk, struct sk_buff *skb, unsigned long size); -extern void sock_rfree(struct sock *sk, void *mem, +extern void sock_rfree(struct sock *sk, struct sk_buff *skb, unsigned long size); extern unsigned long sock_rspace(struct sock *sk); extern unsigned long sock_wspace(struct sock *sk); diff -u --recursive --new-file v1.1.19/linux/net/inet/tcp.c linux/net/inet/tcp.c --- v1.1.19/linux/net/inet/tcp.c Thu Jun 2 13:50:57 1994 +++ linux/net/inet/tcp.c Fri Jun 17 07:54:06 1994 @@ -72,6 +72,8 @@ * Gerhard Koerting: PC/TCP workarounds * Adam Caldwell : Assorted timer/timing errors * Matthew Dillon : Fixed another RST bug + * Alan Cox : Move to kernel side addressing changes. + * Alan Cox : Beginning work on TCP fastpathing (not yet usable) * * * To Fix: @@ -80,6 +82,14 @@ * feel that _really_ its the BSD network programs that are bust (notably * inetd, which hangs occasionally because of this). * + * Fast path the code. Two things here - fix the window calculation + * so it doesn't iterate over the queue, also spot packets with no funny + * options arriving in order and process directly. + * Any assembler hacker who can speed up the checksum routines will + * be welcome as well as someone who feels like writing a single 'checksum udp + * and copy up to user mode for the first n bytes at the same time' routine. + * which should be quicker than the current sum then copy for the UDP layer. + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -144,16 +154,22 @@ #include #include +#undef TCP_FASTPATH + #define SEQ_TICK 3 unsigned long seq_offset; struct tcp_mib tcp_statistics; +#ifdef TCP_FASTPATH +unsigned long tcp_rx_miss=0, tcp_rx_hit1=0, tcp_rx_hit2=0; +#endif + -static __inline__ int -min(unsigned int a, unsigned int b) +static __inline__ int min(unsigned int a, unsigned int b) { - if (a < b) return(a); - return(b); + if (a < b) + return(a); + return(b); } @@ -185,21 +201,22 @@ * is technically allowed, but RFC1122 advises against it and * in practice it causes trouble. */ - if (new_window < min(sk->mss, MAX_WINDOW/2) || - new_window < sk->window) - return(sk->window); + if (new_window < min(sk->mss, MAX_WINDOW/2) || new_window < sk->window) + return(sk->window); return(new_window); } -/* Enter the time wait state. */ +/* + * Enter the time wait state. + */ static void tcp_time_wait(struct sock *sk) { - sk->state = TCP_TIME_WAIT; - sk->shutdown = SHUTDOWN_MASK; - if (!sk->dead) - sk->state_change(sk); - reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); + sk->state = TCP_TIME_WAIT; + sk->shutdown = SHUTDOWN_MASK; + if (!sk->dead) + sk->state_change(sk); + reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); } /* @@ -209,22 +226,22 @@ * nothing clever here. */ -static void -tcp_retransmit(struct sock *sk, int all) +static void tcp_retransmit(struct sock *sk, int all) { - if (all) { - ip_retransmit(sk, all); - return; - } + if (all) + { + ip_retransmit(sk, all); + return; + } - sk->ssthresh = sk->cong_window >> 1; /* remember window where we lost */ - /* sk->ssthresh in theory can be zero. I guess that's OK */ - sk->cong_count = 0; + sk->ssthresh = sk->cong_window >> 1; /* remember window where we lost */ + /* sk->ssthresh in theory can be zero. I guess that's OK */ + sk->cong_count = 0; - sk->cong_window = 1; + sk->cong_window = 1; - /* Do the actual retransmit. */ - ip_retransmit(sk, all); + /* Do the actual retransmit. */ + ip_retransmit(sk, all); } @@ -236,53 +253,60 @@ * header points to the first 8 bytes of the tcp header. We need * to find the appropriate port. */ -void -tcp_err(int err, unsigned char *header, unsigned long daddr, + +void tcp_err(int err, unsigned char *header, unsigned long daddr, unsigned long saddr, struct inet_protocol *protocol) { - struct tcphdr *th; - struct sock *sk; - struct iphdr *iph=(struct iphdr *)header; + struct tcphdr *th; + struct sock *sk; + struct iphdr *iph=(struct iphdr *)header; - header+=4*iph->ihl; + header+=4*iph->ihl; - th =(struct tcphdr *)header; - sk = get_sock(&tcp_prot, th->source/*dest*/, daddr, th->dest/*source*/, saddr); + th =(struct tcphdr *)header; + sk = get_sock(&tcp_prot, th->source/*dest*/, daddr, th->dest/*source*/, saddr); - if (sk == NULL) return; + if (sk == NULL) + return; - if(err<0) - { - sk->err = -err; - sk->error_report(sk); - return; - } + if(err<0) + { + sk->err = -err; + sk->error_report(sk); + return; + } + + if ((err & 0xff00) == (ICMP_SOURCE_QUENCH << 8)) + { + /* + * FIXME: + * For now we will just trigger a linear backoff. + * The slow start code should cause a real backoff here. + */ + if (sk->cong_window > 4) + sk->cong_window--; + return; + } + +/* sk->err = icmp_err_convert[err & 0xff].errno; -- moved as TCP should hide non fatals internally (and does) */ - if ((err & 0xff00) == (ICMP_SOURCE_QUENCH << 8)) { /* - * FIXME: - * For now we will just trigger a linear backoff. - * The slow start code should cause a real backoff here. + * If we've already connected we will keep trying + * until we time out, or the user gives up. */ - if (sk->cong_window > 4) sk->cong_window--; - return; - } - - sk->err = icmp_err_convert[err & 0xff].errno; - /* - * If we've already connected we will keep trying - * until we time out, or the user gives up. - */ - if (icmp_err_convert[err & 0xff].fatal) { - if (sk->state == TCP_SYN_SENT) { - tcp_statistics.TcpAttemptFails++; - sk->state = TCP_CLOSE; - sk->error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ + if (icmp_err_convert[err & 0xff].fatal) + { + if (sk->state == TCP_SYN_SENT) + { + tcp_statistics.TcpAttemptFails++; + sk->state = TCP_CLOSE; + sk->error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ + } + sk->err = icmp_err_convert[err & 0xff].errno; } - } - return; + return; } @@ -291,53 +315,57 @@ * in the received data queue (ie a frame missing that needs sending to us) */ -static int -tcp_readable(struct sock *sk) +static int tcp_readable(struct sock *sk) { - unsigned long counted; - unsigned long amount; - struct sk_buff *skb; - int sum; - unsigned long flags; - - if(sk && sk->debug) - printk("tcp_readable: %p - ",sk); - - save_flags(flags); - cli(); - if (sk == NULL || (skb = skb_peek(&sk->receive_queue)) == NULL) - { - restore_flags(flags); - if(sk && sk->debug) - printk("empty\n"); - return(0); - } + unsigned long counted; + unsigned long amount; + struct sk_buff *skb; + int sum; + unsigned long flags; + + if(sk && sk->debug) + printk("tcp_readable: %p - ",sk); + + save_flags(flags); + cli(); + if (sk == NULL || (skb = skb_peek(&sk->receive_queue)) == NULL) + { + restore_flags(flags); + if(sk && sk->debug) + printk("empty\n"); + return(0); + } - counted = sk->copied_seq+1; /* Where we are at the moment */ - amount = 0; + counted = sk->copied_seq+1; /* Where we are at the moment */ + amount = 0; - /* Do until a push or until we are out of data. */ - do { - if (before(counted, skb->h.th->seq)) /* Found a hole so stops here */ - break; - sum = skb->len -(counted - skb->h.th->seq); /* Length - header but start from where we are up to (avoid overlaps) */ - if (skb->h.th->syn) - sum++; - if (sum >= 0) { /* Add it up, move on */ - amount += sum; - if (skb->h.th->syn) amount--; - counted += sum; - } - if (amount && skb->h.th->psh) break; - skb = skb->next; - } while(skb != (struct sk_buff *)&sk->receive_queue); - if (amount && !sk->urginline && sk->urg_data && - (sk->urg_seq - sk->copied_seq) <= (counted - sk->copied_seq)) - amount--; /* don't count urg data */ - restore_flags(flags); - if(sk->debug) - printk("got %lu bytes.\n",amount); - return(amount); + /* Do until a push or until we are out of data. */ + do + { + if (before(counted, skb->h.th->seq)) /* Found a hole so stops here */ + break; + sum = skb->len -(counted - skb->h.th->seq); /* Length - header but start from where we are up to (avoid overlaps) */ + if (skb->h.th->syn) + sum++; + if (sum >= 0) + { /* Add it up, move on */ + amount += sum; + if (skb->h.th->syn) + amount--; + counted += sum; + } + if (amount && skb->h.th->psh) break; + skb = skb->next; + } + while(skb != (struct sk_buff *)&sk->receive_queue); + + if (amount && !sk->urginline && sk->urg_data && + (sk->urg_seq - sk->copied_seq) <= (counted - sk->copied_seq)) + amount--; /* don't count urg data */ + restore_flags(flags); + if(sk->debug) + printk("got %lu bytes.\n",amount); + return(amount); } @@ -346,94 +374,105 @@ * listening socket has a receive queue of sockets to accept. */ -static int -tcp_select(struct sock *sk, int sel_type, select_table *wait) +static int tcp_select(struct sock *sk, int sel_type, select_table *wait) { - sk->inuse = 1; - switch(sel_type) { - case SEL_IN: - if(sk->debug) - printk("select in"); - select_wait(sk->sleep, wait); - if(sk->debug) - printk("-select out"); - if (skb_peek(&sk->receive_queue) != NULL) { - if (sk->state == TCP_LISTEN || tcp_readable(sk)) { + sk->inuse = 1; + + switch(sel_type) + { + case SEL_IN: + if(sk->debug) + printk("select in"); + select_wait(sk->sleep, wait); + if(sk->debug) + printk("-select out"); + if (skb_peek(&sk->receive_queue) != NULL) + { + if (sk->state == TCP_LISTEN || tcp_readable(sk)) + { + release_sock(sk); + if(sk->debug) + printk("-select ok data\n"); + return(1); + } + } + if (sk->err != 0) /* Receiver error */ + { release_sock(sk); if(sk->debug) - printk("-select ok data\n"); + printk("-select ok error"); return(1); } - } - if (sk->err != 0) /* Receiver error */ - { - release_sock(sk); - if(sk->debug) - printk("-select ok error"); - return(1); - } - if (sk->shutdown & RCV_SHUTDOWN) { - release_sock(sk); - if(sk->debug) - printk("-select ok down\n"); - return(1); - } else { + if (sk->shutdown & RCV_SHUTDOWN) + { + release_sock(sk); + if(sk->debug) + printk("-select ok down\n"); + return(1); + } + else + { + release_sock(sk); + if(sk->debug) + printk("-select fail\n"); + return(0); + } + case SEL_OUT: + select_wait(sk->sleep, wait); + if (sk->shutdown & SEND_SHUTDOWN) + { + /* FIXME: should this return an error? */ + release_sock(sk); + return(0); + } + + /* + * FIXME: + * Hack so it will probably be able to write + * something if it says it's ok to write. + */ + + if (sk->prot->wspace(sk) >= sk->mss) + { + release_sock(sk); + /* This should cause connect to work ok. */ + if (sk->state == TCP_SYN_RECV || + sk->state == TCP_SYN_SENT) return(0); + return(1); + } release_sock(sk); - if(sk->debug) - printk("-select fail\n"); return(0); - } - case SEL_OUT: - select_wait(sk->sleep, wait); - if (sk->shutdown & SEND_SHUTDOWN) { - /* FIXME: should this return an error? */ + case SEL_EX: + select_wait(sk->sleep,wait); + if (sk->err || sk->urg_data) + { + release_sock(sk); + return(1); + } release_sock(sk); return(0); - } - - /* - * FIXME: - * Hack so it will probably be able to write - * something if it says it's ok to write. - */ - if (sk->prot->wspace(sk) >= sk->mss) { - release_sock(sk); - /* This should cause connect to work ok. */ - if (sk->state == TCP_SYN_RECV || - sk->state == TCP_SYN_SENT) return(0); - return(1); - } - release_sock(sk); - return(0); - case SEL_EX: - select_wait(sk->sleep,wait); - if (sk->err || sk->urg_data) { - release_sock(sk); - return(1); - } - release_sock(sk); - return(0); - } + } - release_sock(sk); - return(0); + release_sock(sk); + return(0); } -int -tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) +int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) { - int err; - switch(cmd) { + int err; + switch(cmd) + { - case TIOCINQ: + case TIOCINQ: #ifdef FIXME /* FIXME: */ - case FIONREAD: + case FIONREAD: #endif { unsigned long amount; - if (sk->state == TCP_LISTEN) return(-EINVAL); + if (sk->state == TCP_LISTEN) + return(-EINVAL); sk->inuse = 1; amount = tcp_readable(sk); @@ -445,7 +484,7 @@ put_fs_long(amount,(unsigned long *)arg); return(0); } - case SIOCATMARK: + case SIOCATMARK: { int answ = sk->urg_data && sk->urg_seq == sk->copied_seq+1; @@ -456,7 +495,7 @@ put_fs_long(answ,(int *) arg); return(0); } - case TIOCOUTQ: + case TIOCOUTQ: { unsigned long amount; @@ -469,71 +508,79 @@ put_fs_long(amount,(unsigned long *)arg); return(0); } - default: - return(-EINVAL); - } + default: + return(-EINVAL); + } } -/* This routine computes a TCP checksum. */ -unsigned short -tcp_check(struct tcphdr *th, int len, +/* + * This routine computes a TCP checksum. + */ + +unsigned short tcp_check(struct tcphdr *th, int len, unsigned long saddr, unsigned long daddr) { - unsigned long sum; + unsigned long sum; - if (saddr == 0) saddr = ip_my_addr(); - __asm__("\t addl %%ecx,%%ebx\n" - "\t adcl %%edx,%%ebx\n" - "\t adcl $0, %%ebx\n" - : "=b"(sum) - : "0"(daddr), "c"(saddr), "d"((ntohs(len) << 16) + IPPROTO_TCP*256) - : "cx","bx","dx" ); - - if (len > 3) { - __asm__("\tclc\n" - "1:\n" - "\t lodsl\n" - "\t adcl %%eax, %%ebx\n" - "\t loop 1b\n" + if (saddr == 0) saddr = ip_my_addr(); + + __asm__("\t addl %%ecx,%%ebx\n" + "\t adcl %%edx,%%ebx\n" "\t adcl $0, %%ebx\n" - : "=b"(sum) , "=S"(th) - : "0"(sum), "c"(len/4) ,"1"(th) - : "ax", "cx", "bx", "si" ); - } - - /* Convert from 32 bits to 16 bits. */ - __asm__("\t movl %%ebx, %%ecx\n" - "\t shrl $16,%%ecx\n" - "\t addw %%cx, %%bx\n" - "\t adcw $0, %%bx\n" - : "=b"(sum) - : "0"(sum) - : "bx", "cx"); + : "=b"(sum) + : "0"(daddr), "c"(saddr), "d"((ntohs(len) << 16) + IPPROTO_TCP*256) + : "cx","bx","dx" ); - /* Check for an extra word. */ - if ((len & 2) != 0) { - __asm__("\t lodsw\n" - "\t addw %%ax,%%bx\n" - "\t adcw $0, %%bx\n" - : "=b"(sum), "=S"(th) - : "0"(sum) ,"1"(th) - : "si", "ax", "bx"); - } + if (len > 3) + { + __asm__("\tclc\n" + "1:\n" + "\t lodsl\n" + "\t adcl %%eax, %%ebx\n" + "\t loop 1b\n" + "\t adcl $0, %%ebx\n" + : "=b"(sum) , "=S"(th) + : "0"(sum), "c"(len/4) ,"1"(th) + : "ax", "cx", "bx", "si" ); + } - /* Now check for the extra byte. */ - if ((len & 1) != 0) { - __asm__("\t lodsb\n" - "\t movb $0,%%ah\n" - "\t addw %%ax,%%bx\n" + /* Convert from 32 bits to 16 bits. */ + __asm__("\t movl %%ebx, %%ecx\n" + "\t shrl $16,%%ecx\n" + "\t addw %%cx, %%bx\n" "\t adcw $0, %%bx\n" : "=b"(sum) - : "0"(sum) ,"S"(th) - : "si", "ax", "bx"); - } + : "0"(sum) + : "bx", "cx"); + + /* Check for an extra word. */ + + if ((len & 2) != 0) + { + __asm__("\t lodsw\n" + "\t addw %%ax,%%bx\n" + "\t adcw $0, %%bx\n" + : "=b"(sum), "=S"(th) + : "0"(sum) ,"1"(th) + : "si", "ax", "bx"); + } - /* We only want the bottom 16 bits, but we never cleared the top 16. */ - return((~sum) & 0xffff); + /* Now check for the extra byte. */ + if ((len & 1) != 0) + { + __asm__("\t lodsb\n" + "\t movb $0,%%ah\n" + "\t addw %%ax,%%bx\n" + "\t adcw $0, %%bx\n" + : "=b"(sum) + : "0"(sum) ,"S"(th) + : "si", "ax", "bx"); + } + + /* We only want the bottom 16 bits, but we never cleared the top 16. */ + + return((~sum) & 0xffff); } @@ -554,7 +601,8 @@ size = skb->len - ((unsigned char *) th - skb->data); /* sanity check it.. */ - if (size < sizeof(struct tcphdr) || size > skb->len) { + if (size < sizeof(struct tcphdr) || size > skb->len) + { printk("tcp_send_skb: bad skb (skb = %p, data = %p, th = %p, len = %lu)\n", skb, skb->data, th, skb->len); kfree_skb(skb, FREE_WRITE); @@ -562,9 +610,11 @@ } /* If we have queued a header size packet.. */ - if (size == sizeof(struct tcphdr)) { + if (size == sizeof(struct tcphdr)) + { /* If its got a syn or fin its notionally included in the size..*/ - if(!th->syn && !th->fin) { + if(!th->syn && !th->fin) + { printk("tcp_send_skb: attempt to queue a bogon.\n"); kfree_skb(skb,FREE_WRITE); return; @@ -576,11 +626,13 @@ skb->h.seq = ntohl(th->seq) + size - 4*th->doff; if (after(skb->h.seq, sk->window_seq) || (sk->retransmits && sk->timeout == TIME_WRITE) || - sk->packets_out >= sk->cong_window) { + sk->packets_out >= sk->cong_window) + { /* checksum will be supplied by tcp_write_xmit. So * we shouldn't need to set it at all. I'm being paraoid */ th->check = 0; - if (skb->next != NULL) { + if (skb->next != NULL) + { printk("tcp_send_partial: next != NULL\n"); skb_unlink(skb); } @@ -588,8 +640,10 @@ if (before(sk->window_seq, sk->write_queue.next->h.seq) && sk->send_head == NULL && sk->ack_backlog == 0) - reset_timer(sk, TIME_PROBE0, sk->rto); - } else { + reset_timer(sk, TIME_PROBE0, sk->rto); + } + else + { th->ack_seq = ntohl(sk->acked_seq); th->window = ntohs(tcp_select_window(sk)); @@ -608,7 +662,8 @@ save_flags(flags); cli(); skb = sk->partial; - if (skb) { + if (skb) + { sk->partial = NULL; del_timer(&sk->partial_timer); } @@ -647,9 +702,11 @@ } -/* This routine sends an ack and also updates the window. */ -static void -tcp_send_ack(unsigned long sequence, unsigned long ack, +/* + * This routine sends an ack and also updates the window. + */ + +static void tcp_send_ack(unsigned long sequence, unsigned long ack, struct sock *sk, struct tcphdr *th, unsigned long daddr) { @@ -700,6 +757,7 @@ /* * Swap the send and the receive. */ + t1->dest = th->source; t1->source = th->dest; t1->seq = ntohl(sequence); @@ -737,32 +795,35 @@ } -/* This routine builds a generic TCP header. */ -static int -tcp_build_header(struct tcphdr *th, struct sock *sk, int push) +/* + * This routine builds a generic TCP header. + */ + +static int tcp_build_header(struct tcphdr *th, struct sock *sk, int push) { - /* FIXME: want to get rid of this. */ - memcpy(th,(void *) &(sk->dummy_th), sizeof(*th)); - th->seq = htonl(sk->write_seq); - th->psh =(push == 0) ? 1 : 0; - th->doff = sizeof(*th)/4; - th->ack = 1; - th->fin = 0; - sk->ack_backlog = 0; - sk->bytes_rcv = 0; - sk->ack_timed = 0; - th->ack_seq = htonl(sk->acked_seq); - sk->window = tcp_select_window(sk)/*sk->prot->rspace(sk)*/; - th->window = htons(sk->window); + /* FIXME: want to get rid of this. */ + memcpy(th,(void *) &(sk->dummy_th), sizeof(*th)); + th->seq = htonl(sk->write_seq); + th->psh =(push == 0) ? 1 : 0; + th->doff = sizeof(*th)/4; + th->ack = 1; + th->fin = 0; + sk->ack_backlog = 0; + sk->bytes_rcv = 0; + sk->ack_timed = 0; + th->ack_seq = htonl(sk->acked_seq); + sk->window = tcp_select_window(sk)/*sk->prot->rspace(sk)*/; + th->window = htons(sk->window); - return(sizeof(*th)); + return(sizeof(*th)); } /* - * This routine copies from a user buffer into a socket, - * and starts the transmit system. + * This routine copies from a user buffer into a socket, + * and starts the transmit system. */ + static int tcp_write(struct sock *sk, unsigned char *from, int len, int nonblock, unsigned flags) { @@ -1092,25 +1153,21 @@ int len, int nonblock, unsigned flags, struct sockaddr_in *addr, int addr_len) { - struct sockaddr_in sin; - if (flags & ~(MSG_OOB|MSG_DONTROUTE)) return -EINVAL; - if (addr_len < sizeof(sin)) - return(-EINVAL); - memcpy_fromfs(&sin, addr, sizeof(sin)); - if (sin.sin_family && sin.sin_family != AF_INET) + if (addr_len < sizeof(*addr)) return(-EINVAL); - if (sin.sin_port != sk->dummy_th.dest) + if (addr->sin_family && addr->sin_family != AF_INET) return(-EINVAL); - if (sin.sin_addr.s_addr != sk->daddr) - return(-EINVAL); + if (addr->sin_port != sk->dummy_th.dest) + return(-EISCONN); + if (addr->sin_addr.s_addr != sk->daddr) + return(-EISCONN); return(tcp_write(sk, from, len, nonblock, flags)); } -static void -tcp_read_wakeup(struct sock *sk) +static void tcp_read_wakeup(struct sock *sk) { int tmp; struct device *dev = NULL; @@ -1190,8 +1247,9 @@ static void cleanup_rbuf(struct sock *sk) { unsigned long flags; - int left; + unsigned long left; struct sk_buff *skb; + unsigned long rspace; if(sk->debug) printk("cleaning rbuf for sk=%p\n", sk); @@ -1225,9 +1283,9 @@ */ if(sk->debug) - printk("sk->rspace = %lu, was %d\n", sk->prot->rspace(sk), + printk("sk->rspace = %lu, was %lu\n", sk->prot->rspace(sk), left); - if (sk->prot->rspace(sk) != left) + if ((rspace=sk->prot->rspace(sk)) != left) { /* * This area has caused the most trouble. The current strategy @@ -1249,7 +1307,7 @@ * only on the send side, so I'm putting mtu here. */ - if ((sk->prot->rspace(sk) > (sk->window - sk->bytes_rcv + sk->mtu))) + if (rspace > (sk->window - sk->bytes_rcv + sk->mtu)) { /* Send an ack right now. */ tcp_read_wakeup(sk); @@ -1269,17 +1327,21 @@ } -/* Handle reading urgent data. */ -static int -tcp_read_urg(struct sock * sk, int nonblock, +/* + * Handle reading urgent data. + */ + +static int tcp_read_urg(struct sock * sk, int nonblock, unsigned char *to, int len, unsigned flags) { struct wait_queue wait = { current, NULL }; - while (len > 0) { + while (len > 0) + { if (sk->urginline || !sk->urg_data || sk->urg_data == URG_READ) return -EINVAL; - if (sk->urg_data & URG_VALID) { + if (sk->urg_data & URG_VALID) + { char c = sk->urg_data; if (!(flags & MSG_PEEK)) sk->urg_data = URG_READ; @@ -1287,13 +1349,15 @@ return 1; } - if (sk->err) { + if (sk->err) + { int tmp = -sk->err; sk->err = 0; return tmp; } - if (sk->state == TCP_CLOSE || sk->done) { + if (sk->state == TCP_CLOSE || sk->done) + { if (!sk->done) { sk->done = 1; return 0; @@ -1301,7 +1365,8 @@ return -ENOTCONN; } - if (sk->shutdown & RCV_SHUTDOWN) { + if (sk->shutdown & RCV_SHUTDOWN) + { sk->done = 1; return 0; } @@ -1324,7 +1389,10 @@ } -/* This routine copies from a sock struct into the user buffer. */ +/* + * This routine copies from a sock struct into the user buffer. + */ + static int tcp_read(struct sock *sk, unsigned char *to, int len, int nonblock, unsigned flags) { @@ -1349,7 +1417,8 @@ add_wait_queue(sk->sleep, &wait); sk->inuse = 1; - while (len > 0) { + while (len > 0) + { struct sk_buff * skb; unsigned long offset; @@ -1362,7 +1431,8 @@ current->state = TASK_INTERRUPTIBLE; skb = skb_peek(&sk->receive_queue); - do { + do + { if (!skb) break; if (before(1+*seq, skb->h.th->seq)) @@ -1375,19 +1445,23 @@ if (!(flags & MSG_PEEK)) skb->used = 1; skb = skb->next; - } while (skb != (struct sk_buff *)&sk->receive_queue); + } + while (skb != (struct sk_buff *)&sk->receive_queue); if (copied) break; - if (sk->err) { + if (sk->err) + { copied = -sk->err; sk->err = 0; break; } - if (sk->state == TCP_CLOSE) { - if (!sk->done) { + if (sk->state == TCP_CLOSE) + { + if (!sk->done) + { sk->done = 1; break; } @@ -1395,12 +1469,14 @@ break; } - if (sk->shutdown & RCV_SHUTDOWN) { + if (sk->shutdown & RCV_SHUTDOWN) + { sk->done = 1; break; } - if (nonblock) { + if (nonblock) + { copied = -EAGAIN; break; } @@ -1410,7 +1486,8 @@ schedule(); sk->inuse = 1; - if (current->signal & ~current->blocked) { + if (current->signal & ~current->blocked) + { copied = -ERESTARTSYS; break; } @@ -1422,16 +1499,21 @@ if (len < used) used = len; /* do we have urgent data here? */ - if (sk->urg_data) { + if (sk->urg_data) + { unsigned long urg_offset = sk->urg_seq - (1 + *seq); - if (urg_offset < used) { - if (!urg_offset) { - if (!sk->urginline) { + if (urg_offset < used) + { + if (!urg_offset) + { + if (!sk->urginline) + { ++*seq; offset++; used--; } - } else + } + else used = urg_offset; } } @@ -1458,7 +1540,7 @@ /* - * Shutdown the sending side of a connection. + * Shutdown the sending side of a connection. */ void tcp_shutdown(struct sock *sk, int how) @@ -1490,7 +1572,8 @@ sk->state == TCP_CLOSING || sk->state == TCP_LAST_ACK || sk->state == TCP_TIME_WAIT - ) { + ) + { return; } sk->inuse = 1; @@ -1606,35 +1689,28 @@ int to_len, int nonblock, unsigned flags, struct sockaddr_in *addr, int *addr_len) { - struct sockaddr_in sin; - int len; - int err; - int result; - - /* Have to check these first unlike the old code. If - we check them after we lose data on an error - which is wrong */ - err = verify_area(VERIFY_WRITE,addr_len,sizeof(long)); - if(err) - return err; - len = get_fs_long(addr_len); - if(len > sizeof(sin)) - len = sizeof(sin); - err=verify_area(VERIFY_WRITE, addr, len); - if(err) - return err; - - result=tcp_read(sk, to, to_len, nonblock, flags); - - if (result < 0) return(result); + int result; - sin.sin_family = AF_INET; - sin.sin_port = sk->dummy_th.dest; - sin.sin_addr.s_addr = sk->daddr; + /* + * Have to check these first unlike the old code. If + * we check them after we lose data on an error + * which is wrong + */ + + if(addr_len) + *addr_len = sizeof(*addr); + result=tcp_read(sk, to, to_len, nonblock, flags); - memcpy_tofs(addr, &sin, len); - put_fs_long(len, addr_len); - return(result); + if (result < 0) + return(result); + + if(addr) + { + addr->sin_family = AF_INET; + addr->sin_port = sk->dummy_th.dest; + addr->sin_addr.s_addr = sk->daddr; + } + return(result); } @@ -1727,53 +1803,53 @@ * we have set up sk->mtu to our own MTU. */ -static void -tcp_options(struct sock *sk, struct tcphdr *th) +static void tcp_options(struct sock *sk, struct tcphdr *th) { - unsigned char *ptr; - int length=(th->doff*4)-sizeof(struct tcphdr); - int mss_seen = 0; + unsigned char *ptr; + int length=(th->doff*4)-sizeof(struct tcphdr); + int mss_seen = 0; - ptr = (unsigned char *)(th + 1); + ptr = (unsigned char *)(th + 1); - while(length>0) - { - int opcode=*ptr++; - int opsize=*ptr++; - switch(opcode) - { - case TCPOPT_EOL: - return; - case TCPOPT_NOP: - length-=2; - continue; - - default: - if(opsize<=2) /* Avoid silly options looping forever */ - return; - switch(opcode) - { - case TCPOPT_MSS: - if(opsize==4 && th->syn) - { - sk->mtu=min(sk->mtu,ntohs(*(unsigned short *)ptr)); - mss_seen = 1; - } - break; - /* Add other options here as people feel the urge to implement stuff like large windows */ - } - ptr+=opsize-2; - length-=opsize; - } - } - if (th->syn) { - if (! mss_seen) - sk->mtu=min(sk->mtu, 536); /* default MSS if none sent */ - } + while(length>0) + { + int opcode=*ptr++; + int opsize=*ptr++; + switch(opcode) + { + case TCPOPT_EOL: + return; + case TCPOPT_NOP: + length-=2; + continue; + + default: + if(opsize<=2) /* Avoid silly options looping forever */ + return; + switch(opcode) + { + case TCPOPT_MSS: + if(opsize==4 && th->syn) + { + sk->mtu=min(sk->mtu,ntohs(*(unsigned short *)ptr)); + mss_seen = 1; + } + break; + /* Add other options here as people feel the urge to implement stuff like large windows */ + } + ptr+=opsize-2; + length-=opsize; + } + } + if (th->syn) + { + if (! mss_seen) + sk->mtu=min(sk->mtu, 536); /* default MSS if none sent */ + } #ifdef CONFIG_INET_PCTCP - sk->mss = min(sk->max_window >> 1, sk->mtu); + sk->mss = min(sk->max_window >> 1, sk->mtu); #else - sk->mss = min(sk->max_window, sk->mtu); + sk->mss = min(sk->max_window, sk->mtu); #endif } @@ -1788,228 +1864,266 @@ } /* - * This routine handles a connection request. - * It should make sure we haven't already responded. - * Because of the way BSD works, we have to send a syn/ack now. - * This also means it will be harder to close a socket which is - * listening. + * This routine handles a connection request. + * It should make sure we haven't already responded. + * Because of the way BSD works, we have to send a syn/ack now. + * This also means it will be harder to close a socket which is + * listening. */ -static void -tcp_conn_request(struct sock *sk, struct sk_buff *skb, + +static void tcp_conn_request(struct sock *sk, struct sk_buff *skb, unsigned long daddr, unsigned long saddr, struct options *opt, struct device *dev) { - struct sk_buff *buff; - struct tcphdr *t1; - unsigned char *ptr; - struct sock *newsk; - struct tcphdr *th; - struct device *ndev=NULL; - int tmp; - struct rtable *rt; - - th = skb->h.th; - - /* If the socket is dead, don't accept the connection. */ - if (!sk->dead) { - sk->data_ready(sk,0); - } else { - tcp_reset(daddr, saddr, th, sk->prot, opt, dev, sk->ip_tos,sk->ip_ttl); - tcp_statistics.TcpAttemptFails++; - kfree_skb(skb, FREE_READ); - return; - } + struct sk_buff *buff; + struct tcphdr *t1; + unsigned char *ptr; + struct sock *newsk; + struct tcphdr *th; + struct device *ndev=NULL; + int tmp; + struct rtable *rt; + + th = skb->h.th; - /* - * Make sure we can accept more. This will prevent a - * flurry of syns from eating up all our memory. - */ - if (sk->ack_backlog >= sk->max_ack_backlog) { - tcp_statistics.TcpAttemptFails++; - kfree_skb(skb, FREE_READ); - return; - } + /* If the socket is dead, don't accept the connection. */ + if (!sk->dead) + { + sk->data_ready(sk,0); + } + else + { + tcp_reset(daddr, saddr, th, sk->prot, opt, dev, sk->ip_tos,sk->ip_ttl); + tcp_statistics.TcpAttemptFails++; + kfree_skb(skb, FREE_READ); + return; + } - /* - * We need to build a new sock struct. - * It is sort of bad to have a socket without an inode attached - * to it, but the wake_up's will just wake up the listening socket, - * and if the listening socket is destroyed before this is taken - * off of the queue, this will take care of it. - */ - newsk = (struct sock *) kmalloc(sizeof(struct sock), GFP_ATOMIC); - if (newsk == NULL) { - /* just ignore the syn. It will get retransmitted. */ - tcp_statistics.TcpAttemptFails++; - kfree_skb(skb, FREE_READ); - return; - } + /* + * Make sure we can accept more. This will prevent a + * flurry of syns from eating up all our memory. + */ + + if (sk->ack_backlog >= sk->max_ack_backlog) + { + tcp_statistics.TcpAttemptFails++; + kfree_skb(skb, FREE_READ); + return; + } - memcpy(newsk, sk, sizeof(*newsk)); - skb_queue_head_init(&newsk->write_queue); - skb_queue_head_init(&newsk->receive_queue); - newsk->send_head = NULL; - newsk->send_tail = NULL; - skb_queue_head_init(&newsk->back_log); - newsk->rtt = 0; /*TCP_CONNECT_TIME<<3*/ - newsk->rto = TCP_TIMEOUT_INIT; - newsk->mdev = 0; - newsk->max_window = 0; - newsk->cong_window = 1; - newsk->cong_count = 0; - newsk->ssthresh = 0; - newsk->backoff = 0; - newsk->blog = 0; - newsk->intr = 0; - newsk->proc = 0; - newsk->done = 0; - newsk->partial = NULL; - newsk->pair = NULL; - newsk->wmem_alloc = 0; - newsk->rmem_alloc = 0; - newsk->localroute = sk->localroute; - - newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF; - - newsk->err = 0; - newsk->shutdown = 0; - newsk->ack_backlog = 0; - newsk->acked_seq = skb->h.th->seq+1; - newsk->fin_seq = skb->h.th->seq; - newsk->copied_seq = skb->h.th->seq; - newsk->state = TCP_SYN_RECV; - newsk->timeout = 0; - newsk->write_seq = jiffies * SEQ_TICK - seq_offset; - newsk->window_seq = newsk->write_seq; - newsk->rcv_ack_seq = newsk->write_seq; - newsk->urg_data = 0; - newsk->retransmits = 0; - newsk->destroy = 0; - newsk->timer.data = (unsigned long)newsk; - newsk->timer.function = &net_timer; - newsk->dummy_th.source = skb->h.th->dest; - newsk->dummy_th.dest = skb->h.th->source; - - /* Swap these two, they are from our point of view. */ - newsk->daddr = saddr; - newsk->saddr = daddr; - - put_sock(newsk->num,newsk); - newsk->dummy_th.res1 = 0; - newsk->dummy_th.doff = 6; - newsk->dummy_th.fin = 0; - newsk->dummy_th.syn = 0; - newsk->dummy_th.rst = 0; - newsk->dummy_th.psh = 0; - newsk->dummy_th.ack = 0; - newsk->dummy_th.urg = 0; - newsk->dummy_th.res2 = 0; - newsk->acked_seq = skb->h.th->seq + 1; - newsk->copied_seq = skb->h.th->seq; - - /* Grab the ttl and tos values and use them */ - newsk->ip_ttl=sk->ip_ttl; - newsk->ip_tos=skb->ip_hdr->tos; - -/* use 512 or whatever user asked for */ -/* note use of sk->user_mss, since user has no direct access to newsk */ - rt=ip_rt_route(saddr, NULL,NULL); - if (sk->user_mss) - newsk->mtu = sk->user_mss; - else if(rt!=NULL && (rt->rt_flags&RTF_MTU)) - newsk->mtu = rt->rt_mtu - HEADER_SIZE; - else { + /* + * We need to build a new sock struct. + * It is sort of bad to have a socket without an inode attached + * to it, but the wake_up's will just wake up the listening socket, + * and if the listening socket is destroyed before this is taken + * off of the queue, this will take care of it. + */ + + newsk = (struct sock *) kmalloc(sizeof(struct sock), GFP_ATOMIC); + if (newsk == NULL) + { + /* just ignore the syn. It will get retransmitted. */ + tcp_statistics.TcpAttemptFails++; + kfree_skb(skb, FREE_READ); + return; + } + + memcpy(newsk, sk, sizeof(*newsk)); + skb_queue_head_init(&newsk->write_queue); + skb_queue_head_init(&newsk->receive_queue); + newsk->send_head = NULL; + newsk->send_tail = NULL; + skb_queue_head_init(&newsk->back_log); + newsk->rtt = 0; /*TCP_CONNECT_TIME<<3*/ + newsk->rto = TCP_TIMEOUT_INIT; + newsk->mdev = 0; + newsk->max_window = 0; + newsk->cong_window = 1; + newsk->cong_count = 0; + newsk->ssthresh = 0; + newsk->backoff = 0; + newsk->blog = 0; + newsk->intr = 0; + newsk->proc = 0; + newsk->done = 0; + newsk->partial = NULL; + newsk->pair = NULL; + newsk->wmem_alloc = 0; + newsk->rmem_alloc = 0; + newsk->localroute = sk->localroute; + + newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF; + + newsk->err = 0; + newsk->shutdown = 0; + newsk->ack_backlog = 0; + newsk->acked_seq = skb->h.th->seq+1; + newsk->fin_seq = skb->h.th->seq; + newsk->copied_seq = skb->h.th->seq; + newsk->state = TCP_SYN_RECV; + newsk->timeout = 0; + newsk->write_seq = jiffies * SEQ_TICK - seq_offset; + newsk->window_seq = newsk->write_seq; + newsk->rcv_ack_seq = newsk->write_seq; + newsk->urg_data = 0; + newsk->retransmits = 0; + newsk->destroy = 0; + newsk->timer.data = (unsigned long)newsk; + newsk->timer.function = &net_timer; + newsk->dummy_th.source = skb->h.th->dest; + newsk->dummy_th.dest = skb->h.th->source; + + /* + * Swap these two, they are from our point of view. + */ + + newsk->daddr = saddr; + newsk->saddr = daddr; + + put_sock(newsk->num,newsk); + newsk->dummy_th.res1 = 0; + newsk->dummy_th.doff = 6; + newsk->dummy_th.fin = 0; + newsk->dummy_th.syn = 0; + newsk->dummy_th.rst = 0; + newsk->dummy_th.psh = 0; + newsk->dummy_th.ack = 0; + newsk->dummy_th.urg = 0; + newsk->dummy_th.res2 = 0; + newsk->acked_seq = skb->h.th->seq + 1; + newsk->copied_seq = skb->h.th->seq; + + /* + * Grab the ttl and tos values and use them + */ + + newsk->ip_ttl=sk->ip_ttl; + newsk->ip_tos=skb->ip_hdr->tos; + + /* + * Use 512 or whatever user asked for + */ + + /* + * Note use of sk->user_mss, since user has no direct access to newsk + */ + + rt=ip_rt_route(saddr, NULL,NULL); + if (sk->user_mss) + newsk->mtu = sk->user_mss; + else if(rt!=NULL && (rt->rt_flags&RTF_MTU)) + newsk->mtu = rt->rt_mtu - HEADER_SIZE; + else + { #ifdef CONFIG_INET_SNARL /* Sub Nets ARe Local */ - if ((saddr ^ daddr) & default_mask(saddr)) + if ((saddr ^ daddr) & default_mask(saddr)) #else - if ((saddr ^ daddr) & dev->pa_mask) + if ((saddr ^ daddr) & dev->pa_mask) #endif - newsk->mtu = 576 - HEADER_SIZE; - else - newsk->mtu = MAX_WINDOW; - } -/* but not bigger than device MTU */ - newsk->mtu = min(newsk->mtu, dev->mtu - HEADER_SIZE); - -/* this will min with what arrived in the packet */ - tcp_options(newsk,skb->h.th); - - buff = newsk->prot->wmalloc(newsk, MAX_SYN_SIZE, 1, GFP_ATOMIC); - if (buff == NULL) { - sk->err = -ENOMEM; - newsk->dead = 1; - release_sock(newsk); - kfree_skb(skb, FREE_READ); - tcp_statistics.TcpAttemptFails++; - return; - } + newsk->mtu = 576 - HEADER_SIZE; + else + newsk->mtu = MAX_WINDOW; + } + + /* + * But not bigger than device MTU + */ + + newsk->mtu = min(newsk->mtu, dev->mtu - HEADER_SIZE); + + /* + * This will min with what arrived in the packet + */ + + tcp_options(newsk,skb->h.th); + + buff = newsk->prot->wmalloc(newsk, MAX_SYN_SIZE, 1, GFP_ATOMIC); + if (buff == NULL) + { + sk->err = -ENOMEM; + newsk->dead = 1; + release_sock(newsk); + kfree_skb(skb, FREE_READ); + tcp_statistics.TcpAttemptFails++; + return; + } - buff->len = sizeof(struct tcphdr)+4; - buff->sk = newsk; - buff->localroute = newsk->localroute; - - t1 =(struct tcphdr *) buff->data; + buff->len = sizeof(struct tcphdr)+4; + buff->sk = newsk; + buff->localroute = newsk->localroute; + + t1 =(struct tcphdr *) buff->data; + + /* + * Put in the IP header and routing stuff. + */ - /* Put in the IP header and routing stuff. */ - tmp = sk->prot->build_header(buff, newsk->saddr, newsk->daddr, &ndev, + tmp = sk->prot->build_header(buff, newsk->saddr, newsk->daddr, &ndev, IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl); - /* Something went wrong. */ - if (tmp < 0) { - sk->err = tmp; - buff->free=1; - kfree_skb(buff,FREE_WRITE); - newsk->dead = 1; - release_sock(newsk); - skb->sk = sk; - kfree_skb(skb, FREE_READ); - tcp_statistics.TcpAttemptFails++; - return; - } + /* + * Something went wrong. + */ + + if (tmp < 0) + { + sk->err = tmp; + buff->free=1; + kfree_skb(buff,FREE_WRITE); + newsk->dead = 1; + release_sock(newsk); + skb->sk = sk; + kfree_skb(skb, FREE_READ); + tcp_statistics.TcpAttemptFails++; + return; + } - buff->len += tmp; - t1 =(struct tcphdr *)((char *)t1 +tmp); + buff->len += tmp; + t1 =(struct tcphdr *)((char *)t1 +tmp); - memcpy(t1, skb->h.th, sizeof(*t1)); - buff->h.seq = newsk->write_seq; + memcpy(t1, skb->h.th, sizeof(*t1)); + buff->h.seq = newsk->write_seq; + /* + * Swap the send and the receive. + */ + t1->dest = skb->h.th->source; + t1->source = newsk->dummy_th.source; + t1->seq = ntohl(newsk->write_seq++); + t1->ack = 1; + newsk->window = tcp_select_window(newsk);/*newsk->prot->rspace(newsk);*/ + newsk->sent_seq = newsk->write_seq; + t1->window = ntohs(newsk->window); + t1->res1 = 0; + t1->res2 = 0; + t1->rst = 0; + t1->urg = 0; + t1->psh = 0; + t1->syn = 1; + t1->ack_seq = ntohl(skb->h.th->seq+1); + t1->doff = sizeof(*t1)/4+1; + ptr =(unsigned char *)(t1+1); + ptr[0] = 2; + ptr[1] = 4; + ptr[2] = ((newsk->mtu) >> 8) & 0xff; + ptr[3] =(newsk->mtu) & 0xff; + + tcp_send_check(t1, daddr, saddr, sizeof(*t1)+4, newsk); + newsk->prot->queue_xmit(newsk, dev, buff, 0); - /* Swap the send and the receive. */ - t1->dest = skb->h.th->source; - t1->source = newsk->dummy_th.source; - t1->seq = ntohl(newsk->write_seq++); - t1->ack = 1; - newsk->window = tcp_select_window(newsk);/*newsk->prot->rspace(newsk);*/ - newsk->sent_seq = newsk->write_seq; - t1->window = ntohs(newsk->window); - t1->res1 = 0; - t1->res2 = 0; - t1->rst = 0; - t1->urg = 0; - t1->psh = 0; - t1->syn = 1; - t1->ack_seq = ntohl(skb->h.th->seq+1); - t1->doff = sizeof(*t1)/4+1; - - ptr =(unsigned char *)(t1+1); - ptr[0] = 2; - ptr[1] = 4; - ptr[2] = ((newsk->mtu) >> 8) & 0xff; - ptr[3] =(newsk->mtu) & 0xff; - - tcp_send_check(t1, daddr, saddr, sizeof(*t1)+4, newsk); - newsk->prot->queue_xmit(newsk, dev, buff, 0); - - reset_timer(newsk, TIME_WRITE /* -1 ? FIXME ??? */, TCP_TIMEOUT_INIT); - skb->sk = newsk; - - /* Charge the sock_buff to newsk. */ - sk->rmem_alloc -= skb->mem_len; - newsk->rmem_alloc += skb->mem_len; - - skb_queue_tail(&sk->receive_queue,skb); - sk->ack_backlog++; - release_sock(newsk); - tcp_statistics.TcpOutSegs++; + reset_timer(newsk, TIME_WRITE /* -1 ? FIXME ??? */, TCP_TIMEOUT_INIT); + skb->sk = newsk; + + /* + * Charge the sock_buff to newsk. + */ + + sk->rmem_alloc -= skb->mem_len; + newsk->rmem_alloc += skb->mem_len; + + skb_queue_tail(&sk->receive_queue,skb); + sk->ack_backlog++; + release_sock(newsk); + tcp_statistics.TcpOutSegs++; } @@ -2224,27 +2338,35 @@ static void tcp_write_xmit(struct sock *sk) { - struct sk_buff *skb; + struct sk_buff *skb; - /* The bytes will have to remain here. In time closedown will - empty the write queue and all will be happy */ - if(sk->zapped) - return; + /* + * The bytes will have to remain here. In time closedown will + * empty the write queue and all will be happy + */ + + if(sk->zapped) + return; - while((skb = skb_peek(&sk->write_queue)) != NULL && - before(skb->h.seq, sk->window_seq + 1) && - (sk->retransmits == 0 || - sk->timeout != TIME_WRITE || - before(skb->h.seq, sk->rcv_ack_seq + 1)) - && sk->packets_out < sk->cong_window) { + while((skb = skb_peek(&sk->write_queue)) != NULL && + before(skb->h.seq, sk->window_seq + 1) && + (sk->retransmits == 0 || + sk->timeout != TIME_WRITE || + before(skb->h.seq, sk->rcv_ack_seq + 1)) + && sk->packets_out < sk->cong_window) + { IS_SKB(skb); skb_unlink(skb); /* See if we really need to send the packet. */ - if (before(skb->h.seq, sk->rcv_ack_seq +1)) { + if (before(skb->h.seq, sk->rcv_ack_seq +1)) + { sk->retransmits = 0; kfree_skb(skb, FREE_WRITE); - if (!sk->dead) sk->write_space(sk); - } else { + if (!sk->dead) + sk->write_space(sk); + } + else + { struct tcphdr *th; struct iphdr *iph; int size; @@ -2273,145 +2395,173 @@ /* - * This routine sorts the send list, and resets the - * sk->send_head and sk->send_tail pointers. + * This routine sorts the send list, and resets the + * sk->send_head and sk->send_tail pointers. */ -void -sort_send(struct sock *sk) + +static void sort_send(struct sock *sk) { - struct sk_buff *list = NULL; - struct sk_buff *skb,*skb2,*skb3; + struct sk_buff *list = NULL; + struct sk_buff *skb,*skb2,*skb3; - for (skb = sk->send_head; skb != NULL; skb = skb2) { - skb2 = skb->link3; - if (list == NULL || before (skb2->h.seq, list->h.seq)) { - skb->link3 = list; - sk->send_tail = skb; - list = skb; - } else { - for (skb3 = list; ; skb3 = skb3->link3) { - if (skb3->link3 == NULL || - before(skb->h.seq, skb3->link3->h.seq)) { - skb->link3 = skb3->link3; - skb3->link3 = skb; - if (skb->link3 == NULL) sk->send_tail = skb; - break; + for (skb = sk->send_head; skb != NULL; skb = skb2) + { + skb2 = skb->link3; + if (list == NULL || before (skb2->h.seq, list->h.seq)) + { + skb->link3 = list; + sk->send_tail = skb; + list = skb; + } + else + { + for (skb3 = list; ; skb3 = skb3->link3) + { + if (skb3->link3 == NULL || + before(skb->h.seq, skb3->link3->h.seq)) + { + skb->link3 = skb3->link3; + skb3->link3 = skb; + if (skb->link3 == NULL) + sk->send_tail = skb; + break; + } } } } - } - sk->send_head = list; + sk->send_head = list; } /* - * This routine deals with incoming acks, but not outgoing ones. + * This routine deals with incoming acks, but not outgoing ones. */ -static int -tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int len) +static int tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int len) { - unsigned long ack; - int flag = 0; - /* - * 1 - there was data in packet as well as ack or new data is sent or - * in shutdown state - * 2 - data from retransmit queue was acked and removed - * 4 - window shrunk or data from retransmit queue was acked and removed - */ + unsigned long ack; + int flag = 0; + + /* + * 1 - there was data in packet as well as ack or new data is sent or + * in shutdown state + * 2 - data from retransmit queue was acked and removed + * 4 - window shrunk or data from retransmit queue was acked and removed + */ - if(sk->zapped) - return(1); /* Dead, cant ack any more so why bother */ + if(sk->zapped) + return(1); /* Dead, cant ack any more so why bother */ - ack = ntohl(th->ack_seq); - if (ntohs(th->window) > sk->max_window) { - sk->max_window = ntohs(th->window); + ack = ntohl(th->ack_seq); + if (ntohs(th->window) > sk->max_window) + { + sk->max_window = ntohs(th->window); #ifdef CONFIG_INET_PCTCP - sk->mss = min(sk->max_window>>1, sk->mtu); + sk->mss = min(sk->max_window>>1, sk->mtu); #else - sk->mss = min(sk->max_window, sk->mtu); + sk->mss = min(sk->max_window, sk->mtu); #endif - } + } - if (sk->retransmits && sk->timeout == TIME_KEEPOPEN) - sk->retransmits = 0; + if (sk->retransmits && sk->timeout == TIME_KEEPOPEN) + sk->retransmits = 0; -/* not quite clear why the +1 and -1 here, and why not +1 in next line */ - if (after(ack, sk->sent_seq+1) || before(ack, sk->rcv_ack_seq-1)) { - if (after(ack, sk->sent_seq) || - (sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT)) { - return(0); - } - if (sk->keepopen) { - reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN); +/* + * Not quite clear why the +1 and -1 here, and why not +1 in next line + */ + + if (after(ack, sk->sent_seq+1) || before(ack, sk->rcv_ack_seq-1)) + { + if (after(ack, sk->sent_seq) || + (sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT)) + { + return(0); + } + if (sk->keepopen) + { + reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN); + } + return(1); } - return(1); - } - if (len != th->doff*4) flag |= 1; - - /* See if our window has been shrunk. */ - if (after(sk->window_seq, ack+ntohs(th->window))) { - /* - * We may need to move packets from the send queue - * to the write queue, if the window has been shrunk on us. - * The RFC says you are not allowed to shrink your window - * like this, but if the other end does, you must be able - * to deal with it. - */ - struct sk_buff *skb; - struct sk_buff *skb2; - struct sk_buff *wskb = NULL; - - skb2 = sk->send_head; - sk->send_head = NULL; - sk->send_tail = NULL; + if (len != th->doff*4) + flag |= 1; - flag |= 4; + /* See if our window has been shrunk. */ - sk->window_seq = ack + ntohs(th->window); - cli(); - while (skb2 != NULL) { - skb = skb2; - skb2 = skb->link3; - skb->link3 = NULL; - if (after(skb->h.seq, sk->window_seq)) { - if (sk->packets_out > 0) sk->packets_out--; - /* We may need to remove this from the dev send list. */ - if (skb->next != NULL) { - skb_unlink(skb); - } - /* Now add it to the write_queue. */ - if (wskb == NULL) - skb_queue_head(&sk->write_queue,skb); - else - skb_append(wskb,skb); - wskb = skb; - } else { - if (sk->send_head == NULL) { - sk->send_head = skb; - sk->send_tail = skb; - } else { - sk->send_tail->link3 = skb; - sk->send_tail = skb; - } + if (after(sk->window_seq, ack+ntohs(th->window))) + { + /* + * We may need to move packets from the send queue + * to the write queue, if the window has been shrunk on us. + * The RFC says you are not allowed to shrink your window + * like this, but if the other end does, you must be able + * to deal with it. + */ + struct sk_buff *skb; + struct sk_buff *skb2; + struct sk_buff *wskb = NULL; + + skb2 = sk->send_head; + sk->send_head = NULL; + sk->send_tail = NULL; + + flag |= 4; + + sk->window_seq = ack + ntohs(th->window); + cli(); + while (skb2 != NULL) + { + skb = skb2; + skb2 = skb->link3; skb->link3 = NULL; + if (after(skb->h.seq, sk->window_seq)) + { + if (sk->packets_out > 0) + sk->packets_out--; + /* We may need to remove this from the dev send list. */ + if (skb->next != NULL) + { + skb_unlink(skb); + } + /* Now add it to the write_queue. */ + if (wskb == NULL) + skb_queue_head(&sk->write_queue,skb); + else + skb_append(wskb,skb); + wskb = skb; + } + else + { + if (sk->send_head == NULL) + { + sk->send_head = skb; + sk->send_tail = skb; + } + else + { + sk->send_tail->link3 = skb; + sk->send_tail = skb; + } + skb->link3 = NULL; + } } + sti(); } - sti(); - } + + if (sk->send_tail == NULL || sk->send_head == NULL) + { + sk->send_head = NULL; + sk->send_tail = NULL; + sk->packets_out= 0; + } + + sk->window_seq = ack + ntohs(th->window); - if (sk->send_tail == NULL || sk->send_head == NULL) { - sk->send_head = NULL; - sk->send_tail = NULL; - sk->packets_out= 0; - } - - sk->window_seq = ack + ntohs(th->window); - - /* We don't want too many packets out there. */ - if (sk->timeout == TIME_WRITE && - sk->cong_window < 2048 && after(ack, sk->rcv_ack_seq)) { + /* We don't want too many packets out there. */ + if (sk->timeout == TIME_WRITE && + sk->cong_window < 2048 && after(ack, sk->rcv_ack_seq)) + { /* * This is Jacobson's slow start and congestion avoidance. * SIGCOMM '88, p. 328. Because we keep cong_window in integral @@ -2421,492 +2571,630 @@ * interpreting "new data is acked" as including data that has * been retransmitted but is just now being acked. */ - if (sk->cong_window < sk->ssthresh) - /* in "safe" area, increase */ - sk->cong_window++; - else { - /* in dangerous area, increase slowly. In theory this is - sk->cong_window += 1 / sk->cong_window - */ - if (sk->cong_count >= sk->cong_window) { - sk->cong_window++; - sk->cong_count = 0; - } else - sk->cong_count++; - } - } - - sk->rcv_ack_seq = ack; - - /* - * if this ack opens up a zero window, clear backoff. It was - * being used to time the probes, and is probably far higher than - * it needs to be for normal retransmission - */ - if (sk->timeout == TIME_PROBE0) { - if (skb_peek(&sk->write_queue) != NULL && /* should always be non-null */ - ! before (sk->window_seq, sk->write_queue.next->h.seq)) { - sk->retransmits = 0; - sk->backoff = 0; - /* recompute rto from rtt. this eliminates any backoff */ - sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1; - if (sk->rto > 120*HZ) - sk->rto = 120*HZ; - if (sk->rto < 2) /* Was 1*HZ */ - sk->rto = 2; - } - } - - /* See if we can take anything off of the retransmit queue. */ - while(sk->send_head != NULL) { - /* Check for a bug. */ - if (sk->send_head->link3 && - after(sk->send_head->h.seq, sk->send_head->link3->h.seq)) { - printk("INET: tcp.c: *** bug send_list out of order.\n"); - sort_send(sk); + if (sk->cong_window < sk->ssthresh) + /* + * In "safe" area, increase + */ + sk->cong_window++; + else + { + /* + * In dangerous area, increase slowly. In theory this is + * sk->cong_window += 1 / sk->cong_window + */ + if (sk->cong_count >= sk->cong_window) + { + sk->cong_window++; + sk->cong_count = 0; + } + else + sk->cong_count++; + } } - if (before(sk->send_head->h.seq, ack+1)) { - struct sk_buff *oskb; - - if (sk->retransmits) { + sk->rcv_ack_seq = ack; - /* we were retransmitting. don't count this in RTT est */ - flag |= 2; + /* + * if this ack opens up a zero window, clear backoff. It was + * being used to time the probes, and is probably far higher than + * it needs to be for normal retransmission. + */ + if (sk->timeout == TIME_PROBE0) + { + if (skb_peek(&sk->write_queue) != NULL && /* should always be non-null */ + ! before (sk->window_seq, sk->write_queue.next->h.seq)) + { + sk->retransmits = 0; + sk->backoff = 0; /* - * even though we've gotten an ack, we're still - * retransmitting as long as we're sending from - * the retransmit queue. Keeping retransmits non-zero - * prevents us from getting new data interspersed with - * retransmissions. + * Recompute rto from rtt. this eliminates any backoff. */ - if (sk->send_head->link3) - sk->retransmits = 1; - else - sk->retransmits = 0; + sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1; + if (sk->rto > 120*HZ) + sk->rto = 120*HZ; + if (sk->rto < 20) /* Was 1*HZ, then 1 - turns out we must allow about + .2 of a second because of BSD delayed acks - on a 100Mb/sec link + .2 of a second is going to need huge windows (SIGH) */ + sk->rto = 20; + } + } + /* + * See if we can take anything off of the retransmit queue. + */ + + while(sk->send_head != NULL) + { + /* Check for a bug. */ + if (sk->send_head->link3 && + after(sk->send_head->h.seq, sk->send_head->link3->h.seq)) + { + printk("INET: tcp.c: *** bug send_list out of order.\n"); + sort_send(sk); } - /* - * Note that we only reset backoff and rto in the - * rtt recomputation code. And that doesn't happen - * if there were retransmissions in effect. So the - * first new packet after the retransmissions is - * sent with the backoff still in effect. Not until - * we get an ack from a non-retransmitted packet do - * we reset the backoff and rto. This allows us to deal - * with a situation where the network delay has increased - * suddenly. I.e. Karn's algorithm. (SIGCOMM '87, p5.) - */ + if (before(sk->send_head->h.seq, ack+1)) + { + struct sk_buff *oskb; + if (sk->retransmits) + { + /* + * We were retransmitting. don't count this in RTT est + */ + flag |= 2; - /* We have one less packet out there. */ - if (sk->packets_out > 0) sk->packets_out --; - /* Wake up the process, it can probably write more. */ - if (!sk->dead) sk->write_space(sk); - - oskb = sk->send_head; - - if (!(flag&2)) { - long m; - - /* The following amusing code comes from Jacobson's - * article in SIGCOMM '88. Note that rtt and mdev - * are scaled versions of rtt and mean deviation. - * This is designed to be as fast as possible - * m stands for "measurement". - */ + /* + * even though we've gotten an ack, we're still + * retransmitting as long as we're sending from + * the retransmit queue. Keeping retransmits non-zero + * prevents us from getting new data interspersed with + * retransmissions. + */ + + if (sk->send_head->link3) + sk->retransmits = 1; + else + sk->retransmits = 0; + } + /* + * Note that we only reset backoff and rto in the + * rtt recomputation code. And that doesn't happen + * if there were retransmissions in effect. So the + * first new packet after the retransmissions is + * sent with the backoff still in effect. Not until + * we get an ack from a non-retransmitted packet do + * we reset the backoff and rto. This allows us to deal + * with a situation where the network delay has increased + * suddenly. I.e. Karn's algorithm. (SIGCOMM '87, p5.) + */ - m = jiffies - oskb->when; /* RTT */ - if(m<=0) - m=1; /* IS THIS RIGHT FOR <0 ??? */ - m -= (sk->rtt >> 3); /* m is now error in rtt est */ - sk->rtt += m; /* rtt = 7/8 rtt + 1/8 new */ - if (m < 0) - m = -m; /* m is now abs(error) */ - m -= (sk->mdev >> 2); /* similar update on mdev */ - sk->mdev += m; /* mdev = 3/4 mdev + 1/4 new */ - - /* now update timeout. Note that this removes any backoff */ - sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1; - if (sk->rto > 120*HZ) - sk->rto = 120*HZ; - if (sk->rto < 2) /* Was 1*HZ */ - sk->rto = 2; - sk->backoff = 0; + /* + * We have one less packet out there. + */ + + if (sk->packets_out > 0) + sk->packets_out --; + /* + * Wake up the process, it can probably write more. + */ + if (!sk->dead) + sk->write_space(sk); + oskb = sk->send_head; - } - flag |= (2|4); + if (!(flag&2)) + { + long m; + + /* + * The following amusing code comes from Jacobson's + * article in SIGCOMM '88. Note that rtt and mdev + * are scaled versions of rtt and mean deviation. + * This is designed to be as fast as possible + * m stands for "measurement". + */ + + m = jiffies - oskb->when; /* RTT */ + if(m<=0) + m=1; /* IS THIS RIGHT FOR <0 ??? */ + m -= (sk->rtt >> 3); /* m is now error in rtt est */ + sk->rtt += m; /* rtt = 7/8 rtt + 1/8 new */ + if (m < 0) + m = -m; /* m is now abs(error) */ + m -= (sk->mdev >> 2); /* similar update on mdev */ + sk->mdev += m; /* mdev = 3/4 mdev + 1/4 new */ + + /* + * Now update timeout. Note that this removes any backoff. + */ + + sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1; + if (sk->rto > 120*HZ) + sk->rto = 120*HZ; + if (sk->rto < 20) /* Was 1*HZ - keep .2 as minimum cos of the BSD delayed acks */ + sk->rto = 20; + sk->backoff = 0; + } + flag |= (2|4); + cli(); + oskb = sk->send_head; + IS_SKB(oskb); + sk->send_head = oskb->link3; + if (sk->send_head == NULL) + { + sk->send_tail = NULL; + } - cli(); + /* + * We may need to remove this from the dev send list. + */ - oskb = sk->send_head; - IS_SKB(oskb); - sk->send_head = oskb->link3; - if (sk->send_head == NULL) { - sk->send_tail = NULL; + if (oskb->next) + skb_unlink(oskb); + sti(); + kfree_skb(oskb, FREE_WRITE); /* write. */ + if (!sk->dead) + sk->write_space(sk); + } + else + { + break; } + } - /* We may need to remove this from the dev send list. */ - if (oskb->next) - skb_unlink(oskb); - sti(); - kfree_skb(oskb, FREE_WRITE); /* write. */ - if (!sk->dead) sk->write_space(sk); - } else { - break; + /* + * Maybe we can take some stuff off of the write queue, + * and put it onto the xmit queue. + */ + if (skb_peek(&sk->write_queue) != NULL) + { + if (after (sk->window_seq+1, sk->write_queue.next->h.seq) && + (sk->retransmits == 0 || + sk->timeout != TIME_WRITE || + before(sk->write_queue.next->h.seq, sk->rcv_ack_seq + 1)) + && sk->packets_out < sk->cong_window) + { + flag |= 1; + tcp_write_xmit(sk); + } + else if (before(sk->window_seq, sk->write_queue.next->h.seq) && + sk->send_head == NULL && + sk->ack_backlog == 0 && + sk->state != TCP_TIME_WAIT) + { + reset_timer(sk, TIME_PROBE0, sk->rto); + } } - } + else + { + if (sk->send_head == NULL && sk->ack_backlog == 0 && + sk->state != TCP_TIME_WAIT && !sk->keepopen) + { + if (!sk->dead) + sk->write_space(sk); + if (sk->keepopen) + reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN); + else + delete_timer(sk); + } + else + { + if (sk->state != (unsigned char) sk->keepopen) + { + reset_timer(sk, TIME_WRITE, sk->rto); + } + if (sk->state == TCP_TIME_WAIT) + { + reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); + } + } + } - /* - * Maybe we can take some stuff off of the write queue, - * and put it onto the xmit queue. - */ - if (skb_peek(&sk->write_queue) != NULL) { - if (after (sk->window_seq+1, sk->write_queue.next->h.seq) && - (sk->retransmits == 0 || - sk->timeout != TIME_WRITE || - before(sk->write_queue.next->h.seq, sk->rcv_ack_seq + 1)) - && sk->packets_out < sk->cong_window) { + if (sk->packets_out == 0 && sk->partial != NULL && + skb_peek(&sk->write_queue) == NULL && sk->send_head == NULL) + { flag |= 1; - tcp_write_xmit(sk); - } else if (before(sk->window_seq, sk->write_queue.next->h.seq) && - sk->send_head == NULL && - sk->ack_backlog == 0 && - sk->state != TCP_TIME_WAIT) { - reset_timer(sk, TIME_PROBE0, sk->rto); - } - } else { - if (sk->send_head == NULL && sk->ack_backlog == 0 && - sk->state != TCP_TIME_WAIT && !sk->keepopen) { - if (!sk->dead) sk->write_space(sk); + tcp_send_partial(sk); + } - if (sk->keepopen) - reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN); - else - delete_timer(sk); - } else { - if (sk->state != (unsigned char) sk->keepopen) { - reset_timer(sk, TIME_WRITE, sk->rto); - } - if (sk->state == TCP_TIME_WAIT) { - reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); + /* + * In the LAST_ACK case, the other end FIN'd us. We then FIN'd them, and + * we are now waiting for an acknowledge to our FIN. The other end is + * already in TIME_WAIT. + * + * Move to TCP_CLOSE on success. + */ + + if (sk->state == TCP_LAST_ACK) + { + if (!sk->dead) + sk->state_change(sk); + if (sk->rcv_ack_seq == sk->write_seq && sk->acked_seq == sk->fin_seq) + { + flag |= 1; + sk->state = TCP_CLOSE; + sk->shutdown = SHUTDOWN_MASK; } } - } - - if (sk->packets_out == 0 && sk->partial != NULL && - skb_peek(&sk->write_queue) == NULL && sk->send_head == NULL) { - flag |= 1; - tcp_send_partial(sk); - } - /* - * In the LAST_ACK case, the other end FIN'd us. We then FIN'd them, and - * we are now waiting for an acknowledge to our FIN. The other end is - * already in TIME_WAIT. - * - * Move to TCP_CLOSE on success. - */ + /* + * Incomming ACK to a FIN we sent in the case of our initiating the close. + * + * Move to FIN_WAIT2 to await a FIN from the other end. + */ - if (sk->state == TCP_LAST_ACK) { - if (!sk->dead) - sk->state_change(sk); - if (sk->rcv_ack_seq == sk->write_seq && sk->acked_seq == sk->fin_seq) { - flag |= 1; - sk->state = TCP_CLOSE; - sk->shutdown = SHUTDOWN_MASK; + if (sk->state == TCP_FIN_WAIT1) + { + if (!sk->dead) + sk->state_change(sk); + if (sk->rcv_ack_seq == sk->write_seq) + { + flag |= 1; + if (sk->acked_seq != sk->fin_seq) + { + tcp_time_wait(sk); + } + else + { + sk->shutdown = SHUTDOWN_MASK; + sk->state = TCP_FIN_WAIT2; + } + } } - } - /* - * Incomming ACK to a FIN we sent in the case of our initiating the close. - * - * Move to FIN_WAIT2 to await a FIN from the other end. - */ + /* + * Incoming ACK to a FIN we sent in the case of a simultanious close. + * + * Move to TIME_WAIT + */ - if (sk->state == TCP_FIN_WAIT1) { - if (!sk->dead) - sk->state_change(sk); - if (sk->rcv_ack_seq == sk->write_seq) { - flag |= 1; - if (sk->acked_seq != sk->fin_seq) { + if (sk->state == TCP_CLOSING) + { + if (!sk->dead) + sk->state_change(sk); + if (sk->rcv_ack_seq == sk->write_seq) + { + flag |= 1; tcp_time_wait(sk); - } else { - sk->shutdown = SHUTDOWN_MASK; - sk->state = TCP_FIN_WAIT2; } } - } - /* - * Incomming ACK to a FIN we sent in the case of a simultanious close. - * - * Move to TIME_WAIT - */ - - if (sk->state == TCP_CLOSING) { - if (!sk->dead) - sk->state_change(sk); - if (sk->rcv_ack_seq == sk->write_seq) { - flag |= 1; - tcp_time_wait(sk); + /* + * I make no guarantees about the first clause in the following + * test, i.e. "(!flag) || (flag&4)". I'm not entirely sure under + * what conditions "!flag" would be true. However I think the rest + * of the conditions would prevent that from causing any + * unnecessary retransmission. + * Clearly if the first packet has expired it should be + * retransmitted. The other alternative, "flag&2 && retransmits", is + * harder to explain: You have to look carefully at how and when the + * timer is set and with what timeout. The most recent transmission always + * sets the timer. So in general if the most recent thing has timed + * out, everything before it has as well. So we want to go ahead and + * retransmit some more. If we didn't explicitly test for this + * condition with "flag&2 && retransmits", chances are "when + rto < jiffies" + * would not be true. If you look at the pattern of timing, you can + * show that rto is increased fast enough that the next packet would + * almost never be retransmitted immediately. Then you'd end up + * waiting for a timeout to send each packet on the retranmission + * queue. With my implementation of the Karn sampling algorithm, + * the timeout would double each time. The net result is that it would + * take a hideous amount of time to recover from a single dropped packet. + * It's possible that there should also be a test for TIME_WRITE, but + * I think as long as "send_head != NULL" and "retransmit" is on, we've + * got to be in real retransmission mode. + * Note that ip_do_retransmit is called with all==1. Setting cong_window + * back to 1 at the timeout will cause us to send 1, then 2, etc. packets. + * As long as no further losses occur, this seems reasonable. + */ + + if (((!flag) || (flag&4)) && sk->send_head != NULL && + (((flag&2) && sk->retransmits) || + (sk->send_head->when + sk->rto < jiffies))) + { + ip_do_retransmit(sk, 1); + reset_timer(sk, TIME_WRITE, sk->rto); } - } -/* - * I make no guarantees about the first clause in the following - * test, i.e. "(!flag) || (flag&4)". I'm not entirely sure under - * what conditions "!flag" would be true. However I think the rest - * of the conditions would prevent that from causing any - * unnecessary retransmission. - * Clearly if the first packet has expired it should be - * retransmitted. The other alternative, "flag&2 && retransmits", is - * harder to explain: You have to look carefully at how and when the - * timer is set and with what timeout. The most recent transmission always - * sets the timer. So in general if the most recent thing has timed - * out, everything before it has as well. So we want to go ahead and - * retransmit some more. If we didn't explicitly test for this - * condition with "flag&2 && retransmits", chances are "when + rto < jiffies" - * would not be true. If you look at the pattern of timing, you can - * show that rto is increased fast enough that the next packet would - * almost never be retransmitted immediately. Then you'd end up - * waiting for a timeout to send each packet on the retranmission - * queue. With my implementation of the Karn sampling algorithm, - * the timeout would double each time. The net result is that it would - * take a hideous amount of time to recover from a single dropped packet. - * It's possible that there should also be a test for TIME_WRITE, but - * I think as long as "send_head != NULL" and "retransmit" is on, we've - * got to be in real retransmission mode. - * Note that ip_do_retransmit is called with all==1. Setting cong_window - * back to 1 at the timeout will cause us to send 1, then 2, etc. packets. - * As long as no further losses occur, this seems reasonable. - */ - - if (((!flag) || (flag&4)) && sk->send_head != NULL && - (((flag&2) && sk->retransmits) || - (sk->send_head->when + sk->rto < jiffies))) { - ip_do_retransmit(sk, 1); - reset_timer(sk, TIME_WRITE, sk->rto); - } - - return(1); + return(1); } /* - * This routine handles the data. If there is room in the buffer, - * it will be have already been moved into it. If there is no - * room, then we will just have to discard the packet. + * This routine handles the data. If there is room in the buffer, + * it will be have already been moved into it. If there is no + * room, then we will just have to discard the packet. */ -static int -tcp_data(struct sk_buff *skb, struct sock *sk, + +static int tcp_data(struct sk_buff *skb, struct sock *sk, unsigned long saddr, unsigned short len) { - struct sk_buff *skb1, *skb2; - struct tcphdr *th; - int dup_dumped=0; - - th = skb->h.th; - skb->len = len -(th->doff*4); - - sk->bytes_rcv += skb->len; - if (skb->len == 0 && !th->fin && !th->urg && !th->psh) { - /* Don't want to keep passing ack's back and forth. */ - if (!th->ack) tcp_send_ack(sk->sent_seq, sk->acked_seq,sk, th, saddr); - kfree_skb(skb, FREE_READ); - return(0); - } - - if (sk->shutdown & RCV_SHUTDOWN && skb->len!=0 /* Added AGC */) { - sk->acked_seq = th->seq + skb->len + th->syn + th->fin; - tcp_reset(sk->saddr, sk->daddr, skb->h.th, - sk->prot, NULL, skb->dev, sk->ip_tos, sk->ip_ttl); - tcp_statistics.TcpEstabResets++; - sk->state = TCP_CLOSE; - sk->err = EPIPE; - sk->shutdown = SHUTDOWN_MASK; - kfree_skb(skb, FREE_READ); - if (!sk->dead) sk->state_change(sk); - return(0); - } - - /* - * Now we have to walk the chain, and figure out where this one - * goes into it. This is set up so that the last packet we received - * will be the first one we look at, that way if everything comes - * in order, there will be no performance loss, and if they come - * out of order we will be able to fit things in nicely. - */ + struct sk_buff *skb1, *skb2; + struct tcphdr *th; + int dup_dumped=0; + unsigned long new_seq; + + th = skb->h.th; + skb->len = len -(th->doff*4); + + /* The bytes in the receive read/assembly queue has increased. Needed for the + low memory discard algorithm */ + + sk->bytes_rcv += skb->len; + + if (skb->len == 0 && !th->fin && !th->urg && !th->psh) + { + /* + * Don't want to keep passing ack's back and forth. + * (someone sent us dataless, boring frame) + */ + if (!th->ack) + tcp_send_ack(sk->sent_seq, sk->acked_seq,sk, th, saddr); + kfree_skb(skb, FREE_READ); + return(0); + } + + /* + * We no longer have anyone receiving data on this connection. + */ - /* This should start at the last one, and then go around forwards. */ - if (skb_peek(&sk->receive_queue) == NULL) { - skb_queue_head(&sk->receive_queue,skb); - skb1= NULL; - } else { - for(skb1=sk->receive_queue.prev; ; skb1 = skb1->prev) { - if(sk->debug) - { - printk("skb1=%p :", skb1); - printk("skb1->h.th->seq = %ld: ", skb1->h.th->seq); - printk("skb->h.th->seq = %ld\n",skb->h.th->seq); - printk("copied_seq = %ld acked_seq = %ld\n", sk->copied_seq, - sk->acked_seq); - } - if (th->seq==skb1->h.th->seq && skb->len>= skb1->len) - { - skb_append(skb1,skb); - skb_unlink(skb1); - kfree_skb(skb1,FREE_READ); - dup_dumped=1; - skb1=NULL; - break; - } - if (after(th->seq+1, skb1->h.th->seq)) + if(sk->shutdown & RCV_SHUTDOWN) + { + new_seq= th->seq + skb->len + th->syn; /* Right edge of _data_ part of frame */ + + if(after(new_seq,sk->copied_seq+1)) /* If the right edge of this frame is after the last copied byte + then it contains data we will never touch. We send an RST to + ensure the far end knows it never got to the application */ { - skb_append(skb1,skb); - break; + sk->acked_seq = new_seq + th->fin; + tcp_reset(sk->saddr, sk->daddr, skb->h.th, + sk->prot, NULL, skb->dev, sk->ip_tos, sk->ip_ttl); + tcp_statistics.TcpEstabResets++; + sk->state = TCP_CLOSE; + sk->err = EPIPE; + sk->shutdown = SHUTDOWN_MASK; + kfree_skb(skb, FREE_READ); + if (!sk->dead) + sk->state_change(sk); + return(0); } - if (skb1 == skb_peek(&sk->receive_queue)) + /* Discard the frame here - we've already proved its a duplicate */ + + kfree_skb(skb, FREE_READ); + return(0); + } + /* + * Now we have to walk the chain, and figure out where this one + * goes into it. This is set up so that the last packet we received + * will be the first one we look at, that way if everything comes + * in order, there will be no performance loss, and if they come + * out of order we will be able to fit things in nicely. + */ + + /* + * This should start at the last one, and then go around forwards. + */ + + if (skb_peek(&sk->receive_queue) == NULL) /* Empty queue is easy case */ + { + skb_queue_head(&sk->receive_queue,skb); + skb1= NULL; + } + else + { + for(skb1=sk->receive_queue.prev; ; skb1 = skb1->prev) { - skb_queue_head(&sk->receive_queue, skb); - break; + if(sk->debug) + { + printk("skb1=%p :", skb1); + printk("skb1->h.th->seq = %ld: ", skb1->h.th->seq); + printk("skb->h.th->seq = %ld\n",skb->h.th->seq); + printk("copied_seq = %ld acked_seq = %ld\n", sk->copied_seq, + sk->acked_seq); + } + + /* + * Optimisation: Duplicate frame or extension of previous frame from + * same sequence point (lost ack case). + * The frame contains duplicate data or replaces a previous frame + * discard the previous frame (safe as sk->inuse is set) and put + * the new one in its place. + */ + + if (th->seq==skb1->h.th->seq && skb->len>= skb1->len) + { + skb_append(skb1,skb); + skb_unlink(skb1); + kfree_skb(skb1,FREE_READ); + dup_dumped=1; + skb1=NULL; + break; + } + + /* + * Found where it fits + */ + + if (after(th->seq+1, skb1->h.th->seq)) + { + skb_append(skb1,skb); + break; + } + + /* + * See if we've hit the start. If so insert. + */ + if (skb1 == skb_peek(&sk->receive_queue)) + { + skb_queue_head(&sk->receive_queue, skb); + break; + } } + } + + /* + * Figure out what the ack value for this frame is + */ + + th->ack_seq = th->seq + skb->len; + if (th->syn) + th->ack_seq++; + if (th->fin) + th->ack_seq++; + + if (before(sk->acked_seq, sk->copied_seq)) + { + printk("*** tcp.c:tcp_data bug acked < copied\n"); + sk->acked_seq = sk->copied_seq; } - } - th->ack_seq = th->seq + skb->len; - if (th->syn) th->ack_seq++; - if (th->fin) th->ack_seq++; - - if (before(sk->acked_seq, sk->copied_seq)) { - printk("*** tcp.c:tcp_data bug acked < copied\n"); - sk->acked_seq = sk->copied_seq; - } - - /* Now figure out if we can ack anything. */ - if ((!dup_dumped && (skb1 == NULL || skb1->acked)) || before(th->seq, sk->acked_seq+1)) { - if (before(th->seq, sk->acked_seq+1)) { - int newwindow; - - if (after(th->ack_seq, sk->acked_seq)) { - newwindow = sk->window - - (th->ack_seq - sk->acked_seq); - if (newwindow < 0) - newwindow = 0; - sk->window = newwindow; - sk->acked_seq = th->ack_seq; - } - skb->acked = 1; - - /* When we ack the fin, we turn on the RCV_SHUTDOWN flag. */ - if (skb->h.th->fin) { - if (!sk->dead) sk->state_change(sk); - sk->shutdown |= RCV_SHUTDOWN; - } + /* + * Now figure out if we can ack anything. + */ + + if ((!dup_dumped && (skb1 == NULL || skb1->acked)) || before(th->seq, sk->acked_seq+1)) + { + if (before(th->seq, sk->acked_seq+1)) + { + int newwindow; + + if (after(th->ack_seq, sk->acked_seq)) + { + newwindow = sk->window-(th->ack_seq - sk->acked_seq); + if (newwindow < 0) + newwindow = 0; + sk->window = newwindow; + sk->acked_seq = th->ack_seq; + } + skb->acked = 1; + + /* + * When we ack the fin, we turn on the RCV_SHUTDOWN flag. + */ + + if (skb->h.th->fin) + { + if (!sk->dead) + sk->state_change(sk); + sk->shutdown |= RCV_SHUTDOWN; + } - for(skb2 = skb->next; - skb2 != (struct sk_buff *)&sk->receive_queue; - skb2 = skb2->next) { - if (before(skb2->h.th->seq, sk->acked_seq+1)) { - if (after(skb2->h.th->ack_seq, sk->acked_seq)) - { - newwindow = sk->window - - (skb2->h.th->ack_seq - sk->acked_seq); - if (newwindow < 0) - newwindow = 0; - sk->window = newwindow; - sk->acked_seq = skb2->h.th->ack_seq; - } - skb2->acked = 1; + for(skb2 = skb->next; + skb2 != (struct sk_buff *)&sk->receive_queue; + skb2 = skb2->next) + { + if (before(skb2->h.th->seq, sk->acked_seq+1)) + { + if (after(skb2->h.th->ack_seq, sk->acked_seq)) + { + newwindow = sk->window - + (skb2->h.th->ack_seq - sk->acked_seq); + if (newwindow < 0) + newwindow = 0; + sk->window = newwindow; + sk->acked_seq = skb2->h.th->ack_seq; + } + skb2->acked = 1; + /* + * When we ack the fin, we turn on + * the RCV_SHUTDOWN flag. + */ + if (skb2->h.th->fin) + { + sk->shutdown |= RCV_SHUTDOWN; + if (!sk->dead) + sk->state_change(sk); + } - /* - * When we ack the fin, we turn on - * the RCV_SHUTDOWN flag. - */ - if (skb2->h.th->fin) { - sk->shutdown |= RCV_SHUTDOWN; - if (!sk->dead) sk->state_change(sk); + /* + * Force an immediate ack. + */ + + sk->ack_backlog = sk->max_ack_backlog; } - - /* Force an immediate ack. */ - sk->ack_backlog = sk->max_ack_backlog; - } else { - break; + else + { + break; + } } - } - /* - * This also takes care of updating the window. - * This if statement needs to be simplified. - */ - if (!sk->delay_acks || - sk->ack_backlog >= sk->max_ack_backlog || - sk->bytes_rcv > sk->max_unacked || th->fin) { -/* tcp_send_ack(sk->sent_seq, sk->acked_seq,sk,th, saddr); */ - } else { - sk->ack_backlog++; - if(sk->debug) - printk("Ack queued.\n"); - reset_timer(sk, TIME_WRITE, TCP_ACK_TIME); + /* + * This also takes care of updating the window. + * This if statement needs to be simplified. + */ + if (!sk->delay_acks || + sk->ack_backlog >= sk->max_ack_backlog || + sk->bytes_rcv > sk->max_unacked || th->fin) { + /* tcp_send_ack(sk->sent_seq, sk->acked_seq,sk,th, saddr); */ + } + else + { + sk->ack_backlog++; + if(sk->debug) + printk("Ack queued.\n"); + reset_timer(sk, TIME_WRITE, TCP_ACK_TIME); + } } } - } - /* - * If we've missed a packet, send an ack. - * Also start a timer to send another. - */ - if (!skb->acked) { /* - * This is important. If we don't have much room left, - * we need to throw out a few packets so we have a good - * window. Note that mtu is used, not mss, because mss is really - * for the send side. He could be sending us stuff as large as mtu. - */ - while (sk->prot->rspace(sk) < sk->mtu) { - skb1 = skb_peek(&sk->receive_queue); - if (skb1 == NULL) { - printk("INET: tcp.c:tcp_data memory leak detected.\n"); - break; - } + * If we've missed a packet, send an ack. + * Also start a timer to send another. + */ + + if (!skb->acked) + { + + /* + * This is important. If we don't have much room left, + * we need to throw out a few packets so we have a good + * window. Note that mtu is used, not mss, because mss is really + * for the send side. He could be sending us stuff as large as mtu. + */ + + while (sk->prot->rspace(sk) < sk->mtu) + { + skb1 = skb_peek(&sk->receive_queue); + if (skb1 == NULL) + { + printk("INET: tcp.c:tcp_data memory leak detected.\n"); + break; + } - /* Don't throw out something that has been acked. */ - if (skb1->acked) { - break; - } + /* + * Don't throw out something that has been acked. + */ + + if (skb1->acked) + { + break; + } - skb_unlink(skb1); - kfree_skb(skb1, FREE_READ); + skb_unlink(skb1); + kfree_skb(skb1, FREE_READ); + } + tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr); + sk->ack_backlog++; + reset_timer(sk, TIME_WRITE, TCP_ACK_TIME); } - tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr); - sk->ack_backlog++; - reset_timer(sk, TIME_WRITE, TCP_ACK_TIME); - } else { - /* We missed a packet. Send an ack to try to resync things. */ - tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr); - } - - /* Now tell the user we may have some data. */ - if (!sk->dead) { - if(sk->debug) - printk("Data wakeup.\n"); - sk->data_ready(sk,0); - } - -#ifdef NOTDEF /* say what? this is handled by tcp_ack() */ - - if (sk->state == TCP_FIN_WAIT2 && - sk->acked_seq == sk->fin_seq && sk->rcv_ack_seq == sk->write_seq) { -/* tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr); */ - sk->shutdown = SHUTDOWN_MASK; - sk->state = TCP_LAST_ACK; - if (!sk->dead) sk->state_change(sk); - } -#endif + else + { + /* We missed a packet. Send an ack to try to resync things. */ + tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr); + } - return(0); + /* + * Now tell the user we may have some data. + */ + + if (!sk->dead) + { + if(sk->debug) + printk("Data wakeup.\n"); + sk->data_ready(sk,0); + } + return(0); } @@ -3062,47 +3350,56 @@ static struct sock * tcp_accept(struct sock *sk, int flags) { - struct sock *newsk; - struct sk_buff *skb; + struct sock *newsk; + struct sk_buff *skb; /* * We need to make sure that this socket is listening, * and that it has something pending. */ - if (sk->state != TCP_LISTEN) { - sk->err = EINVAL; - return(NULL); - } - - /* avoid the race. */ - cli(); - sk->inuse = 1; - while((skb = skb_dequeue(&sk->receive_queue)) == NULL) { - if (flags & O_NONBLOCK) { - sti(); - release_sock(sk); - sk->err = EAGAIN; - return(NULL); - } - release_sock(sk); - interruptible_sleep_on(sk->sleep); - if (current->signal & ~current->blocked) { - sti(); - sk->err = ERESTARTSYS; - return(NULL); + if (sk->state != TCP_LISTEN) + { + sk->err = EINVAL; + return(NULL); } + + /* Avoid the race. */ + cli(); sk->inuse = 1; - } - sti(); + + while((skb = skb_dequeue(&sk->receive_queue)) == NULL) + { + if (flags & O_NONBLOCK) + { + sti(); + release_sock(sk); + sk->err = EAGAIN; + return(NULL); + } + + release_sock(sk); + interruptible_sleep_on(sk->sleep); + if (current->signal & ~current->blocked) + { + sti(); + sk->err = ERESTARTSYS; + return(NULL); + } + sk->inuse = 1; + } + sti(); + + /* + * Now all we need to do is return skb->sk. + */ - /* Now all we need to do is return skb->sk. */ - newsk = skb->sk; + newsk = skb->sk; - kfree_skb(skb, FREE_READ); - sk->ack_backlog--; - release_sock(sk); - return(newsk); + kfree_skb(skb, FREE_READ); + sk->ack_backlog--; + release_sock(sk); + return(newsk); } @@ -3113,40 +3410,33 @@ static int tcp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len) { struct sk_buff *buff; - struct sockaddr_in sin; struct device *dev=NULL; unsigned char *ptr; int tmp; struct tcphdr *t1; - int err; struct rtable *rt; if (sk->state != TCP_CLOSE) return(-EISCONN); + if (addr_len < 8) return(-EINVAL); - err=verify_area(VERIFY_READ, usin, addr_len); - if(err) - return err; - - memcpy_fromfs(&sin,usin, min(sizeof(sin), addr_len)); - - if (sin.sin_family && sin.sin_family != AF_INET) + if (usin->sin_family && usin->sin_family != AF_INET) return(-EAFNOSUPPORT); /* * connect() to INADDR_ANY means loopback (BSD'ism). */ - if(sin.sin_addr.s_addr==INADDR_ANY) - sin.sin_addr.s_addr=ip_my_addr(); + if(usin->sin_addr.s_addr==INADDR_ANY) + usin->sin_addr.s_addr=ip_my_addr(); /* * Don't want a TCP connection going to a broadcast address */ - if (ip_chk_addr(sin.sin_addr.s_addr) == IS_BROADCAST) + if (ip_chk_addr(usin->sin_addr.s_addr) == IS_BROADCAST) { return -ENETUNREACH; } @@ -3155,16 +3445,16 @@ * Connect back to the same socket: Blows up so disallow it */ - if(sk->saddr == sin.sin_addr.s_addr && sk->num==ntohs(sin.sin_port)) + if(sk->saddr == usin->sin_addr.s_addr && sk->num==ntohs(usin->sin_port)) return -EBUSY; sk->inuse = 1; - sk->daddr = sin.sin_addr.s_addr; + sk->daddr = usin->sin_addr.s_addr; sk->write_seq = jiffies * SEQ_TICK - seq_offset; sk->window_seq = sk->write_seq; sk->rcv_ack_seq = sk->write_seq -1; sk->err = 0; - sk->dummy_th.dest = sin.sin_port; + sk->dummy_th.dest = usin->sin_port; release_sock(sk); buff = sk->prot->wmalloc(sk,MAX_SYN_SIZE,0, GFP_KERNEL); @@ -3328,403 +3618,536 @@ } +#ifdef TCP_FASTPATH +/* + * Is the end of the queue clear of fragments as yet unmerged into the data stream + * Yes if + * a) The queue is empty + * b) The last frame on the queue has the acked flag set + */ + +static inline int tcp_clean_end(struct sock *sk) +{ + struct sk_buff *skb=skb_peek(&sk->receive_queue); + if(skb==NULL || sk->receive_queue.prev->acked) + return 1; +} + +#endif + int tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt, unsigned long daddr, unsigned short len, unsigned long saddr, int redo, struct inet_protocol * protocol) { - struct tcphdr *th; - struct sock *sk; - - if (!skb) { - return(0); - } - - if (!dev) - { - return(0); - } - - tcp_statistics.TcpInSegs++; - - if(skb->pkt_type!=PACKET_HOST) - { - kfree_skb(skb,FREE_READ); - return(0); - } - - th = skb->h.th; - - /* Find the socket. */ - sk = get_sock(&tcp_prot, th->dest, saddr, th->source, daddr); - /* If this socket has got a reset its to all intents and purposes - really dead */ - if (sk!=NULL && sk->zapped) - sk=NULL; - - if (!redo) { - if (tcp_check(th, len, saddr, daddr )) { - skb->sk = NULL; - kfree_skb(skb,FREE_READ); - /* - * We don't release the socket because it was - * never marked in use. - */ - return(0); - } - - th->seq = ntohl(th->seq); + struct tcphdr *th; + struct sock *sk; - /* See if we know about the socket. */ - if (sk == NULL) { - if (!th->rst) - tcp_reset(daddr, saddr, th, &tcp_prot, opt,dev,skb->ip_hdr->tos,255); - skb->sk = NULL; - kfree_skb(skb, FREE_READ); + if (!skb) + { return(0); } - skb->len = len; - skb->sk = sk; - skb->acked = 0; - skb->used = 0; - skb->free = 0; - skb->saddr = daddr; - skb->daddr = saddr; - - /* We may need to add it to the backlog here. */ - cli(); - if (sk->inuse) { - skb_queue_head(&sk->back_log, skb); - sti(); + if (!dev) + { return(0); } - sk->inuse = 1; - sti(); - } else { - if (!sk) { - return(0); + + tcp_statistics.TcpInSegs++; + + if(skb->pkt_type!=PACKET_HOST) + { + kfree_skb(skb,FREE_READ); + return(0); } - } - - if (!sk->prot) { - return(0); - } - - /* Charge the memory to the socket. */ - if (sk->rmem_alloc + skb->mem_len >= sk->rcvbuf) { - skb->sk = NULL; - kfree_skb(skb, FREE_READ); - release_sock(sk); - return(0); - } - sk->rmem_alloc += skb->mem_len; - + + th = skb->h.th; - /* Now deal with it. */ - switch(sk->state) { /* - * This should close the system down if it's waiting - * for an ack that is never going to be sent. + * Find the socket. */ - case TCP_LAST_ACK: - if (th->rst) { - sk->zapped=1; - sk->err = ECONNRESET; - sk->state = TCP_CLOSE; - sk->shutdown = SHUTDOWN_MASK; - if (!sk->dead) { - sk->state_change(sk); - } - kfree_skb(skb, FREE_READ); - release_sock(sk); - return(0); - } - case TCP_ESTABLISHED: - case TCP_CLOSE_WAIT: - case TCP_CLOSING: - case TCP_FIN_WAIT1: - case TCP_FIN_WAIT2: - case TCP_TIME_WAIT: - if (!tcp_sequence(sk, th, len, opt, saddr,dev)) { - kfree_skb(skb, FREE_READ); - release_sock(sk); - return(0); - } - - if (th->rst) - { - tcp_statistics.TcpEstabResets++; - tcp_statistics.TcpCurrEstab--; - sk->zapped=1; - /* This means the thing should really be closed. */ - sk->err = ECONNRESET; + sk = get_sock(&tcp_prot, th->dest, saddr, th->source, daddr); - if (sk->state == TCP_CLOSE_WAIT) - { - sk->err = EPIPE; - } + /* + * If this socket has got a reset its to all intents and purposes + * really dead + */ + + if (sk!=NULL && sk->zapped) + sk=NULL; + if (!redo) + { + if (tcp_check(th, len, saddr, daddr )) + { + skb->sk = NULL; + kfree_skb(skb,FREE_READ); /* - * A reset with a fin just means that - * the data was not all read. + * We don't release the socket because it was + * never marked in use. */ - sk->state = TCP_CLOSE; - sk->shutdown = SHUTDOWN_MASK; - if (!sk->dead) - { - sk->state_change(sk); - } - kfree_skb(skb, FREE_READ); - release_sock(sk); - return(0); - } - if (th->syn) - { - tcp_statistics.TcpCurrEstab--; - tcp_statistics.TcpEstabResets++; - sk->err = ECONNRESET; - sk->state = TCP_CLOSE; - sk->shutdown = SHUTDOWN_MASK; - tcp_reset(daddr, saddr, th, sk->prot, opt,dev, sk->ip_tos,sk->ip_ttl); - if (!sk->dead) { - sk->state_change(sk); - } - kfree_skb(skb, FREE_READ); - release_sock(sk); return(0); } + th->seq = ntohl(th->seq); - if (th->ack && !tcp_ack(sk, th, saddr, len)) { + /* See if we know about the socket. */ + if (sk == NULL) + { + if (!th->rst) + tcp_reset(daddr, saddr, th, &tcp_prot, opt,dev,skb->ip_hdr->tos,255); + skb->sk = NULL; kfree_skb(skb, FREE_READ); - release_sock(sk); return(0); } - if (tcp_urg(sk, th, saddr, len)) { - kfree_skb(skb, FREE_READ); - release_sock(sk); + skb->len = len; + skb->sk = sk; + skb->acked = 0; + skb->used = 0; + skb->free = 0; + skb->saddr = daddr; + skb->daddr = saddr; + + /* We may need to add it to the backlog here. */ + cli(); + if (sk->inuse) + { + skb_queue_head(&sk->back_log, skb); + sti(); return(0); } - - if (tcp_data(skb, sk, saddr, len)) { - kfree_skb(skb, FREE_READ); - release_sock(sk); + sk->inuse = 1; + sti(); + } + else + { + if (!sk) + { return(0); } + } - /* Moved: you must do data then fin bit */ - if (th->fin && tcp_fin(skb, sk, th, saddr, dev)) { - kfree_skb(skb, FREE_READ); - release_sock(sk); - return(0); - } - release_sock(sk); + if (!sk->prot) + { return(0); + } - case TCP_CLOSE: - if (sk->dead || sk->daddr) { - kfree_skb(skb, FREE_READ); - release_sock(sk); - return(0); - } - if (!th->rst) { - if (!th->ack) - th->ack_seq = 0; - tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl); - } + /* + * Charge the memory to the socket. + */ + + if (sk->rmem_alloc + skb->mem_len >= sk->rcvbuf) + { + skb->sk = NULL; kfree_skb(skb, FREE_READ); release_sock(sk); return(0); + } - case TCP_LISTEN: - if (th->rst) { - kfree_skb(skb, FREE_READ); - release_sock(sk); - return(0); - } - if (th->ack) { - tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl); - kfree_skb(skb, FREE_READ); - release_sock(sk); - return(0); - } + sk->rmem_alloc += skb->mem_len; - if (th->syn) +#ifdef TCP_FASTPATH +/* + * Incoming data stream fastpath. + * + * We try to optimise two things. + * 1) Spot general data arriving without funny options and skip extra checks and the switch. + * 2) Spot the common case in raw data receive streams of a packet that has no funny options, + * fits exactly on the end of the current queue and may or may not have the ack bit set. + * + * Case two especially is done inline in this routine so there are no long jumps causing heavy + * cache thrashing, no function call overhead (except for the ack sending if needed) and for + * speed although further optimizing here is possible. + */ + + /* Im trusting gcc to optimise this sensibly... might need judicious application of a software mallet */ + if(!(sk->shutdown & RCV_SHUTDOWN) && sk->state==TCP_ESTABLISHED && !th->urg && !th->syn && !th->fin && !th->rst && !th->urg) + { + /* Packets in order. Fits window */ + if(th->seq == sk->acked_seq+1 && sk->window && tcp_clean_end(sk)) { + /* Ack is harder */ + if(th->ack && !tcp_ack(sk, th, saddr, len)) + { + kfree_skb(skb, FREE_READ); + release_sock(sk); + return 0; + } /* - * Now we just put the whole thing including - * the header and saddr, and protocol pointer - * into the buffer. We can't respond until the - * user tells us to accept the connection. + * Set up variables */ - tcp_conn_request(sk, skb, daddr, saddr, opt, dev); - release_sock(sk); - return(0); + skb->len -= (th->doff *4); + sk->bytes_rcv += skb->len; + tcp_rx_hit2++; + if(skb->len) + { + skb_queue_tail(&sk->receive_queue,skb); /* We already know where to put it */ + if(sk->window >= skb->len) + sk->window-=skb->len; /* We know its effect on the window */ + else + sk->window=0; + sk->acked_seq = th->ack_seq; /* Easy */ + skb->acked=1; /* Guaranteed true */ + if(!sk->delay_acks || sk->ack_backlog >= sk->max_ack_backlog || + sk->bytes_rcv > sk->max_unacked) + { + tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th , saddr); + } + else + { + sk->ack_backlog++; + reset_timer(sk, TIME_WRITE, TCP_ACK_TIME); + } + if(!sk->dead) + sk->data_ready(sk,0); + return 0; + } } - - kfree_skb(skb, FREE_READ); - release_sock(sk); - return(0); - - case TCP_SYN_RECV: - if (th->syn) { - /* Probably a retransmitted syn */ + /* + * More generic case of arriving data stream in ESTABLISHED + */ + tcp_rx_hit1++; + if(!tcp_sequence(sk, th, len, opt, saddr, dev)) + { kfree_skb(skb, FREE_READ); release_sock(sk); - return(0); + return 0; } - - - default: - if (!tcp_sequence(sk, th, len, opt, saddr,dev)) + if(th->ack && !tcp_ack(sk, th, saddr, len)) { kfree_skb(skb, FREE_READ); release_sock(sk); - return(0); + return 0; } + if(tcp_data(skb, sk, saddr, len)) + kfree_skb(skb, FREE_READ); + release_sock(sk); + return 0; + } + tcp_rx_miss++; +#endif - case TCP_SYN_SENT: - if (th->rst) - { - tcp_statistics.TcpAttemptFails++; - sk->err = ECONNREFUSED; - sk->state = TCP_CLOSE; - sk->shutdown = SHUTDOWN_MASK; - sk->zapped = 1; - if (!sk->dead) + /* + * Now deal with all cases. + */ + + switch(sk->state) + { + + /* + * This should close the system down if it's waiting + * for an ack that is never going to be sent. + */ + case TCP_LAST_ACK: + if (th->rst) { - sk->state_change(sk); + sk->zapped=1; + sk->err = ECONNRESET; + sk->state = TCP_CLOSE; + sk->shutdown = SHUTDOWN_MASK; + if (!sk->dead) + { + sk->state_change(sk); + } + kfree_skb(skb, FREE_READ); + release_sock(sk); + return(0); } - kfree_skb(skb, FREE_READ); - release_sock(sk); - return(0); - } - if (!th->ack) - { - if (th->syn) + + case TCP_ESTABLISHED: + case TCP_CLOSE_WAIT: + case TCP_CLOSING: + case TCP_FIN_WAIT1: + case TCP_FIN_WAIT2: + case TCP_TIME_WAIT: + if (!tcp_sequence(sk, th, len, opt, saddr,dev)) { - sk->state = TCP_SYN_RECV; + kfree_skb(skb, FREE_READ); + release_sock(sk); + return(0); } - kfree_skb(skb, FREE_READ); - release_sock(sk); - return(0); - } - - switch(sk->state) - { - case TCP_SYN_SENT: - if (!tcp_ack(sk, th, saddr, len)) + if (th->rst) + { + tcp_statistics.TcpEstabResets++; + tcp_statistics.TcpCurrEstab--; + sk->zapped=1; + /* This means the thing should really be closed. */ + sk->err = ECONNRESET; + if (sk->state == TCP_CLOSE_WAIT) { - tcp_statistics.TcpAttemptFails++; - tcp_reset(daddr, saddr, th, - sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl); - kfree_skb(skb, FREE_READ); - release_sock(sk); - return(0); + sk->err = EPIPE; } - + /* - * If the syn bit is also set, switch to - * tcp_syn_recv, and then to established. + * A reset with a fin just means that + * the data was not all read. */ - if (!th->syn) + sk->state = TCP_CLOSE; + sk->shutdown = SHUTDOWN_MASK; + if (!sk->dead) { - kfree_skb(skb, FREE_READ); - release_sock(sk); - return(0); + sk->state_change(sk); + } + kfree_skb(skb, FREE_READ); + release_sock(sk); + return(0); + } + if (th->syn) + { + tcp_statistics.TcpCurrEstab--; + tcp_statistics.TcpEstabResets++; + sk->err = ECONNRESET; + sk->state = TCP_CLOSE; + sk->shutdown = SHUTDOWN_MASK; + tcp_reset(daddr, saddr, th, sk->prot, opt,dev, sk->ip_tos,sk->ip_ttl); + if (!sk->dead) { + sk->state_change(sk); } + kfree_skb(skb, FREE_READ); + release_sock(sk); + return(0); + } + + if (th->ack && !tcp_ack(sk, th, saddr, len)) { + kfree_skb(skb, FREE_READ); + release_sock(sk); + return(0); + } + + if (tcp_urg(sk, th, saddr, len)) { + kfree_skb(skb, FREE_READ); + release_sock(sk); + return(0); + } - /* Ack the syn and fall through. */ - sk->acked_seq = th->seq+1; - sk->fin_seq = th->seq; - tcp_send_ack(sk->sent_seq, th->seq+1, - sk, th, sk->daddr); + if (th->fin && tcp_fin(skb, sk, th, saddr, dev)) { + kfree_skb(skb, FREE_READ); + release_sock(sk); + return(0); + } - case TCP_SYN_RECV: - if (!tcp_ack(sk, th, saddr, len)) - { - tcp_statistics.TcpAttemptFails++; - tcp_reset(daddr, saddr, th, - sk->prot, opt, dev,sk->ip_tos,sk->ip_ttl); - kfree_skb(skb, FREE_READ); + if (tcp_data(skb, sk, saddr, len)) { + kfree_skb(skb, FREE_READ); + release_sock(sk); + return(0); + } + + release_sock(sk); + return(0); + + case TCP_CLOSE: + if (sk->dead || sk->daddr) { + kfree_skb(skb, FREE_READ); release_sock(sk); - return(0); - } - - tcp_statistics.TcpCurrEstab++; - sk->state = TCP_ESTABLISHED; - + return(0); + } + + if (!th->rst) { + if (!th->ack) + th->ack_seq = 0; + tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl); + } + kfree_skb(skb, FREE_READ); + release_sock(sk); + return(0); + + case TCP_LISTEN: + if (th->rst) { + kfree_skb(skb, FREE_READ); + release_sock(sk); + return(0); + } + if (th->ack) { + tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl); + kfree_skb(skb, FREE_READ); + release_sock(sk); + return(0); + } + + if (th->syn) + { /* - * Now we need to finish filling out - * some of the tcp header. + * Now we just put the whole thing including + * the header and saddr, and protocol pointer + * into the buffer. We can't respond until the + * user tells us to accept the connection. */ - /* We need to check for mtu info. */ - tcp_options(sk, th); - sk->dummy_th.dest = th->source; - sk->copied_seq = sk->acked_seq-1; - if (!sk->dead) { + tcp_conn_request(sk, skb, daddr, saddr, opt, dev); + release_sock(sk); + return(0); + } + + kfree_skb(skb, FREE_READ); + release_sock(sk); + return(0); + + case TCP_SYN_RECV: + if (th->syn) { + /* Probably a retransmitted syn */ + kfree_skb(skb, FREE_READ); + release_sock(sk); + return(0); + } + + + default: + if (!tcp_sequence(sk, th, len, opt, saddr,dev)) + { + kfree_skb(skb, FREE_READ); + release_sock(sk); + return(0); + } + + case TCP_SYN_SENT: + if (th->rst) + { + tcp_statistics.TcpAttemptFails++; + sk->err = ECONNREFUSED; + sk->state = TCP_CLOSE; + sk->shutdown = SHUTDOWN_MASK; + sk->zapped = 1; + if (!sk->dead) + { sk->state_change(sk); } - - /* - * We've already processed his first - * ack. In just about all cases that - * will have set max_window. This is - * to protect us against the possibility - * that the initial window he sent was 0. - * This must occur after tcp_options, which - * sets sk->mtu. - */ - if (sk->max_window == 0) { - sk->max_window = 32; - sk->mss = min(sk->max_window, sk->mtu); + kfree_skb(skb, FREE_READ); + release_sock(sk); + return(0); + } + if (!th->ack) + { + if (th->syn) + { + sk->state = TCP_SYN_RECV; } - - /* - * Now process the rest like we were - * already in the established state. - */ - if (th->urg) { - if (tcp_urg(sk, th, saddr, len)) { + kfree_skb(skb, FREE_READ); + release_sock(sk); + return(0); + } + + switch(sk->state) + { + case TCP_SYN_SENT: + if (!tcp_ack(sk, th, saddr, len)) + { + tcp_statistics.TcpAttemptFails++; + tcp_reset(daddr, saddr, th, + sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl); + kfree_skb(skb, FREE_READ); + release_sock(sk); + return(0); + } + + /* + * If the syn bit is also set, switch to + * tcp_syn_recv, and then to established. + */ + if (!th->syn) + { kfree_skb(skb, FREE_READ); release_sock(sk); return(0); } - } - if (tcp_data(skb, sk, saddr, len)) + + /* Ack the syn and fall through. */ + sk->acked_seq = th->seq+1; + sk->fin_seq = th->seq; + tcp_send_ack(sk->sent_seq, th->seq+1, + sk, th, sk->daddr); + + case TCP_SYN_RECV: + if (!tcp_ack(sk, th, saddr, len)) + { + tcp_statistics.TcpAttemptFails++; + tcp_reset(daddr, saddr, th, + sk->prot, opt, dev,sk->ip_tos,sk->ip_ttl); kfree_skb(skb, FREE_READ); + release_sock(sk); + return(0); + } + + tcp_statistics.TcpCurrEstab++; + sk->state = TCP_ESTABLISHED; + + /* + * Now we need to finish filling out + * some of the tcp header. + * + * We need to check for mtu info. + */ + tcp_options(sk, th); + sk->dummy_th.dest = th->source; + sk->copied_seq = sk->acked_seq-1; + if (!sk->dead) + { + sk->state_change(sk); + } + + /* + * We've already processed his first + * ack. In just about all cases that + * will have set max_window. This is + * to protect us against the possibility + * that the initial window he sent was 0. + * This must occur after tcp_options, which + * sets sk->mtu. + */ + if (sk->max_window == 0) + { + sk->max_window = 32; + sk->mss = min(sk->max_window, sk->mtu); + } - if (th->fin) tcp_fin(skb, sk, th, saddr, dev); - release_sock(sk); - return(0); - } + /* + * Now process the rest like we were + * already in the established state. + */ + if (th->urg) + { + if (tcp_urg(sk, th, saddr, len)) + { + kfree_skb(skb, FREE_READ); + release_sock(sk); + return(0); + } + } + if (tcp_data(skb, sk, saddr, len)) + kfree_skb(skb, FREE_READ); - if (th->urg) { - if (tcp_urg(sk, th, saddr, len)) { + if (th->fin) + tcp_fin(skb, sk, th, saddr, dev); + release_sock(sk); + return(0); + } + + if (th->urg) + { + if (tcp_urg(sk, th, saddr, len)) + { + kfree_skb(skb, FREE_READ); + release_sock(sk); + return(0); + } + } + if (tcp_data(skb, sk, saddr, len)) + { kfree_skb(skb, FREE_READ); release_sock(sk); return(0); } - } - - if (tcp_data(skb, sk, saddr, len)) { - kfree_skb(skb, FREE_READ); - release_sock(sk); - return(0); - } - - if (!th->fin) { + + if (!th->fin) + { + release_sock(sk); + return(0); + } + tcp_fin(skb, sk, th, saddr, dev); release_sock(sk); return(0); - } - tcp_fin(skb, sk, th, saddr, dev); - release_sock(sk); - return(0); } } @@ -3897,33 +4320,33 @@ struct proto tcp_prot = { - sock_wmalloc, - sock_rmalloc, - sock_wfree, - sock_rfree, - sock_rspace, - sock_wspace, - tcp_close, - tcp_read, - tcp_write, - tcp_sendto, - tcp_recvfrom, - ip_build_header, - tcp_connect, - tcp_accept, - ip_queue_xmit, - tcp_retransmit, - tcp_write_wakeup, - tcp_read_wakeup, - tcp_rcv, - tcp_select, - tcp_ioctl, - NULL, - tcp_shutdown, - tcp_setsockopt, - tcp_getsockopt, - 128, - 0, - {NULL,}, - "TCP" + sock_wmalloc, + sock_rmalloc, + sock_wfree, + sock_rfree, + sock_rspace, + sock_wspace, + tcp_close, + tcp_read, + tcp_write, + tcp_sendto, + tcp_recvfrom, + ip_build_header, + tcp_connect, + tcp_accept, + ip_queue_xmit, + tcp_retransmit, + tcp_write_wakeup, + tcp_read_wakeup, + tcp_rcv, + tcp_select, + tcp_ioctl, + NULL, + tcp_shutdown, + tcp_setsockopt, + tcp_getsockopt, + 128, + 0, + {NULL,}, + "TCP" }; diff -u --recursive --new-file v1.1.19/linux/net/inet/udp.c linux/net/inet/udp.c --- v1.1.19/linux/net/inet/udp.c Thu Jun 2 13:50:57 1994 +++ linux/net/inet/udp.c Fri Jun 17 07:54:03 1994 @@ -38,6 +38,8 @@ * Alan Cox : MSG_DONTROUTE, and 0.0.0.0 support. * Matt Dillon : UDP length checks. * Alan Cox : Smarter af_inet used properly. + * Alan Cox : Use new kernel side addressing. + * Alan Cox : Incorrect return on truncated datagram receive. * * * This program is free software; you can redistribute it and/or @@ -104,6 +106,7 @@ /* * Find the 8 bytes of post IP header ICMP included for usA */ + th = (struct udphdr *)header; sk = get_sock(&udp_prot, th->source, daddr, th->dest, saddr); @@ -332,7 +335,6 @@ { struct sockaddr_in sin; int tmp; - int err; /* * Check the flags. We support no flags for UDP sending @@ -347,10 +349,7 @@ { if (addr_len < sizeof(sin)) return(-EINVAL); - err=verify_area(VERIFY_READ, usin, sizeof(sin)); - if(err) - return err; - memcpy_fromfs(&sin, usin, sizeof(sin)); + memcpy(&sin,usin,sizeof(sin)); if (sin.sin_family && sin.sin_family != AF_INET) return(-EINVAL); if (sin.sin_port == 0) @@ -461,6 +460,7 @@ int *addr_len) { int copied = 0; + int truesize; struct sk_buff *skb; int er; @@ -469,20 +469,8 @@ */ if (addr_len) - { - er=verify_area(VERIFY_WRITE, addr_len, sizeof(*addr_len)); - if(er) - return(er); - put_fs_long(sizeof(*sin), addr_len); - } + *addr_len=sizeof(*sin); - if(sin) - { - er=verify_area(VERIFY_WRITE, sin, sizeof(*sin)); - if(er) - return(er); - } - /* * From here the generic datagram does a lot of the work. Come * the finished NET3, it will do _ALL_ the work! @@ -492,7 +480,8 @@ if(skb==NULL) return er; - copied = min(len, skb->len); + truesize = skb->len; + copied = min(len, truesize); /* * FIXME : should use udp header size info value @@ -504,17 +493,14 @@ /* Copy the address. */ if (sin) { - struct sockaddr_in addr; - - addr.sin_family = AF_INET; - addr.sin_port = skb->h.uh->source; - addr.sin_addr.s_addr = skb->daddr; - memcpy_tofs(sin, &addr, sizeof(*sin)); + sin->sin_family = AF_INET; + sin->sin_port = skb->h.uh->source; + sin->sin_addr.s_addr = skb->daddr; } skb_free_datagram(skb); release_sock(sk); - return(copied); + return(truesize); } /* @@ -528,42 +514,34 @@ } -int -udp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len) +int udp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len) { - struct sockaddr_in sin; - int er; - - if (addr_len < sizeof(sin)) + if (addr_len < sizeof(*usin)) return(-EINVAL); - er=verify_area(VERIFY_READ, usin, sizeof(sin)); - if(er) - return er; - - memcpy_fromfs(&sin, usin, sizeof(sin)); - if (sin.sin_family && sin.sin_family != AF_INET) + if (usin->sin_family && usin->sin_family != AF_INET) return(-EAFNOSUPPORT); - if (sin.sin_addr.s_addr==INADDR_ANY) - sin.sin_addr.s_addr=ip_my_addr(); + if (usin->sin_addr.s_addr==INADDR_ANY) + usin->sin_addr.s_addr=ip_my_addr(); - if(!sk->broadcast && ip_chk_addr(sin.sin_addr.s_addr)==IS_BROADCAST) + if(!sk->broadcast && ip_chk_addr(usin->sin_addr.s_addr)==IS_BROADCAST) return -EACCES; /* Must turn broadcast on first */ - sk->daddr = sin.sin_addr.s_addr; - sk->dummy_th.dest = sin.sin_port; + sk->daddr = usin->sin_addr.s_addr; + sk->dummy_th.dest = usin->sin_port; sk->state = TCP_ESTABLISHED; return(0); } -static void -udp_close(struct sock *sk, int timeout) +static void udp_close(struct sock *sk, int timeout) { - sk->inuse = 1; - sk->state = TCP_CLOSE; - if (sk->dead) destroy_sock(sk); - else release_sock(sk); + sk->inuse = 1; + sk->state = TCP_CLOSE; + if (sk->dead) + destroy_sock(sk); + else + release_sock(sk); } @@ -672,33 +650,34 @@ struct proto udp_prot = { - sock_wmalloc, - sock_rmalloc, - sock_wfree, - sock_rfree, - sock_rspace, - sock_wspace, - udp_close, - udp_read, - udp_write, - udp_sendto, - udp_recvfrom, - ip_build_header, - udp_connect, - NULL, - ip_queue_xmit, - ip_retransmit, - NULL, - NULL, - udp_rcv, - datagram_select, - udp_ioctl, - NULL, - NULL, - ip_setsockopt, - ip_getsockopt, - 128, - 0, - {NULL,}, - "UDP" + sock_wmalloc, + sock_rmalloc, + sock_wfree, + sock_rfree, + sock_rspace, + sock_wspace, + udp_close, + udp_read, + udp_write, + udp_sendto, + udp_recvfrom, + ip_build_header, + udp_connect, + NULL, + ip_queue_xmit, + ip_retransmit, + NULL, + NULL, + udp_rcv, + datagram_select, + udp_ioctl, + NULL, + NULL, + ip_setsockopt, + ip_getsockopt, + 128, + 0, + {NULL,}, + "UDP" }; + diff -u --recursive --new-file v1.1.19/linux/net/socket.c linux/net/socket.c --- v1.1.19/linux/net/socket.c Fri Jun 17 15:20:08 1994 +++ linux/net/socket.c Fri Jun 17 07:54:04 1994 @@ -13,6 +13,10 @@ * Alan Cox : verify_area() fixes * Alan Cox : Removed DDI * Jonathan Kamens : SOCK_DGRAM reconnect bug + * Alan Cox : Moved a load of checks to the very + * top level. + * Alan Cox : Move address structures to/from user + * mode above the protocol layers. * * * This program is free software; you can redistribute it and/or @@ -59,50 +63,119 @@ unsigned int cmd, unsigned long arg); +/* + * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear + * in the operation structures but are done directly via the socketcall() multiplexor. + */ + static struct file_operations socket_file_ops = { - sock_lseek, - sock_read, - sock_write, - sock_readdir, - sock_select, - sock_ioctl, - NULL, /* mmap */ - NULL, /* no special open code... */ - sock_close + sock_lseek, + sock_read, + sock_write, + sock_readdir, + sock_select, + sock_ioctl, + NULL, /* mmap */ + NULL, /* no special open code... */ + sock_close }; +/* + * The list of sockets - make this atomic. + */ static struct socket sockets[NSOCKETS]; +/* + * Used to wait for a socket. + */ static struct wait_queue *socket_wait_free = NULL; +/* + * The protocol list. Each protocol is registered in here. + */ static struct proto_ops *pops[NPROTO]; #define last_socket (sockets + NSOCKETS - 1) -/* Obtains the first available file descriptor and sets it up for use. */ -static int -get_fd(struct inode *inode) -{ - int fd; - struct file *file; - - /* Find a file descriptor suitable for return to the user. */ - file = get_empty_filp(); - if (!file) return(-1); - for (fd = 0; fd < NR_OPEN; ++fd) - if (!current->files->fd[fd]) break; - if (fd == NR_OPEN) { - file->f_count = 0; - return(-1); - } - FD_CLR(fd, ¤t->files->close_on_exec); - current->files->fd[fd] = file; - file->f_op = &socket_file_ops; - file->f_mode = 3; - file->f_flags = 0; - file->f_count = 1; - file->f_inode = inode; - if (inode) inode->i_count++; - file->f_pos = 0; - return(fd); + +/* + * Support routines. Move socket addresses back and forth across the kernel/user + * divide and look after the messy bits. + */ + +#define MAX_SOCK_ADDR 128 /* 108 for Unix domain - 16 for IP, 16 for IPX, about 80 for AX.25 */ + +static int move_addr_to_kernel(void *uaddr, int ulen, void *kaddr) +{ + int err; + if(ulen<0||ulen>MAX_SOCK_ADDR) + return -EINVAL; + if(ulen==0) + return 0; + if((err=verify_area(VERIFY_READ,uaddr,ulen))<0) + return err; + memcpy_fromfs(kaddr,uaddr,ulen); + return 0; +} + +static int move_addr_to_user(void *kaddr, int klen, void *uaddr, int *ulen) +{ + int err; + int len; + + + if((err=verify_area(VERIFY_WRITE,ulen,sizeof(*ulen)))<0) + return err; + len=get_fs_long(ulen); + if(len>klen) + len=klen; + if(len<0 || len> MAX_SOCK_ADDR) + return -EINVAL; + if(len) + { + if((err=verify_area(VERIFY_WRITE,uaddr,len))<0) + return err; + memcpy_tofs(uaddr,kaddr,len); + } + put_fs_long(len,ulen); + return 0; +} + +/* + * Obtains the first available file descriptor and sets it up for use. + */ + +static int get_fd(struct inode *inode) +{ + int fd; + struct file *file; + + /* + * Find a file descriptor suitable for return to the user. + */ + + file = get_empty_filp(); + if (!file) + return(-1); + + for (fd = 0; fd < NR_OPEN; ++fd) + if (!current->files->fd[fd]) + break; + if (fd == NR_OPEN) + { + file->f_count = 0; + return(-1); + } + + FD_CLR(fd, ¤t->files->close_on_exec); + current->files->fd[fd] = file; + file->f_op = &socket_file_ops; + file->f_mode = 3; + file->f_flags = 0; + file->f_count = 1; + file->f_inode = inode; + if (inode) + inode->i_count++; + file->f_pos = 0; + return(fd); } @@ -111,60 +184,77 @@ * the descriptor, but makes sure it does nothing more. Called when * an incomplete socket must be closed, along with sock_release(). */ -static inline void -toss_fd(int fd) + +static inline void toss_fd(int fd) { - sys_close(fd); /* the count protects us from iput */ + sys_close(fd); /* the count protects us from iput */ } +/* + * Go from an inode to its socket slot. + */ -struct socket * -socki_lookup(struct inode *inode) +struct socket *socki_lookup(struct inode *inode) { - struct socket *sock; + struct socket *sock; - if ((sock = inode->i_socket) != NULL) { - if (sock->state != SS_FREE && SOCK_INODE(sock) == inode) - return sock; - printk("socket.c: uhhuh. stale inode->i_socket pointer\n"); - } - for (sock = sockets; sock <= last_socket; ++sock) - if (sock->state != SS_FREE && SOCK_INODE(sock) == inode) { - printk("socket.c: uhhuh. Found socket despite no inode->i_socket pointer\n"); - return(sock); + if ((sock = inode->i_socket) != NULL) + { + if (sock->state != SS_FREE && SOCK_INODE(sock) == inode) + return sock; + printk("socket.c: uhhuh. stale inode->i_socket pointer\n"); } - return(NULL); + for (sock = sockets; sock <= last_socket; ++sock) + if (sock->state != SS_FREE && SOCK_INODE(sock) == inode) + { + printk("socket.c: uhhuh. Found socket despite no inode->i_socket pointer\n"); + return(sock); + } + return(NULL); } +/* + * Go from a file number to its socket slot. + */ -static inline struct socket * -sockfd_lookup(int fd, struct file **pfile) +static inline struct socket *sockfd_lookup(int fd, struct file **pfile) { - struct file *file; + struct file *file; + + if (fd < 0 || fd >= NR_OPEN || !(file = current->files->fd[fd])) + return(NULL); + + if (pfile) + *pfile = file; - if (fd < 0 || fd >= NR_OPEN || !(file = current->files->fd[fd])) return(NULL); - if (pfile) *pfile = file; - return(socki_lookup(file->f_inode)); + return(socki_lookup(file->f_inode)); } +/* + * Allocate a socket. Wait if we are out of sockets. + */ -static struct socket * -sock_alloc(int wait) +static struct socket *sock_alloc(int wait) { - struct socket *sock; - - while (1) { - cli(); - for (sock = sockets; sock <= last_socket; ++sock) { - if (sock->state == SS_FREE) { - sock->state = SS_UNCONNECTED; - sti(); - sock->flags = 0; - sock->ops = NULL; - sock->data = NULL; - sock->conn = NULL; - sock->iconn = NULL; + struct socket *sock; + while (1) + { + cli(); + for (sock = sockets; sock <= last_socket; ++sock) + { + if (sock->state == SS_FREE) + { + /* + * Got one.. + */ + sock->state = SS_UNCONNECTED; + sti(); + sock->flags = 0; + sock->ops = NULL; + sock->data = NULL; + sock->conn = NULL; + sock->iconn = NULL; /* * This really shouldn't be necessary, but everything * else depends on inodes, so we grab it. @@ -172,189 +262,275 @@ * inode. The close system call will iput this inode * for us. */ - if (!(SOCK_INODE(sock) = get_empty_inode())) { - printk("NET: sock_alloc: no more inodes\n"); - sock->state = SS_FREE; - return(NULL); - } - SOCK_INODE(sock)->i_mode = S_IFSOCK; - SOCK_INODE(sock)->i_uid = current->euid; - SOCK_INODE(sock)->i_gid = current->egid; - SOCK_INODE(sock)->i_socket = sock; + if (!(SOCK_INODE(sock) = get_empty_inode())) + { + printk("NET: sock_alloc: no more inodes\n"); + sock->state = SS_FREE; + return(NULL); + } + SOCK_INODE(sock)->i_mode = S_IFSOCK; + SOCK_INODE(sock)->i_uid = current->euid; + SOCK_INODE(sock)->i_gid = current->egid; + SOCK_INODE(sock)->i_socket = sock; - sock->wait = &SOCK_INODE(sock)->i_wait; - return(sock); + sock->wait = &SOCK_INODE(sock)->i_wait; + return(sock); + } } - } - sti(); - if (!wait) return(NULL); - interruptible_sleep_on(&socket_wait_free); - if (current->signal & ~current->blocked) { - return(NULL); + sti(); + /* + * If its a 'now or never request' then return. + */ + if (!wait) + return(NULL); + /* + * Sleep on the socket free'ing queue. + */ + interruptible_sleep_on(&socket_wait_free); + /* + * If we have been interrupted then return. + */ + if (current->signal & ~current->blocked) + { + return(NULL); + } } - } } +/* + * Release a socket. + */ -static inline void -sock_release_peer(struct socket *peer) +static inline void sock_release_peer(struct socket *peer) { - peer->state = SS_DISCONNECTING; - wake_up_interruptible(peer->wait); + peer->state = SS_DISCONNECTING; + wake_up_interruptible(peer->wait); } -static void -sock_release(struct socket *sock) +static void sock_release(struct socket *sock) { - int oldstate; - struct inode *inode; - struct socket *peersock, *nextsock; + int oldstate; + struct inode *inode; + struct socket *peersock, *nextsock; - if ((oldstate = sock->state) != SS_UNCONNECTED) - sock->state = SS_DISCONNECTING; + if ((oldstate = sock->state) != SS_UNCONNECTED) + sock->state = SS_DISCONNECTING; - /* Wake up anyone waiting for connections. */ - for (peersock = sock->iconn; peersock; peersock = nextsock) { - nextsock = peersock->next; - sock_release_peer(peersock); - } + /* + * Wake up anyone waiting for connections. + */ - /* - * Wake up anyone we're connected to. First, we release the - * protocol, to give it a chance to flush data, etc. - */ - peersock = (oldstate == SS_CONNECTED) ? sock->conn : NULL; - if (sock->ops) sock->ops->release(sock, peersock); - if (peersock) sock_release_peer(peersock); - inode = SOCK_INODE(sock); - sock->state = SS_FREE; /* this really releases us */ - wake_up_interruptible(&socket_wait_free); + for (peersock = sock->iconn; peersock; peersock = nextsock) + { + nextsock = peersock->next; + sock_release_peer(peersock); + } - /* We need to do this. If sock alloc was called we already have an inode. */ - iput(inode); + /* + * Wake up anyone we're connected to. First, we release the + * protocol, to give it a chance to flush data, etc. + */ + + peersock = (oldstate == SS_CONNECTED) ? sock->conn : NULL; + if (sock->ops) + sock->ops->release(sock, peersock); + if (peersock) + sock_release_peer(peersock); + inode = SOCK_INODE(sock); + sock->state = SS_FREE; /* this really releases us */ + + /* + * This will wake anyone waiting for a free socket. + */ + wake_up_interruptible(&socket_wait_free); + + /* + * We need to do this. If sock alloc was called we already have an inode. + */ + + iput(inode); } +/* + * Sockets are not seekable. + */ -static int -sock_lseek(struct inode *inode, struct file *file, off_t offset, int whence) +static int sock_lseek(struct inode *inode, struct file *file, off_t offset, int whence) { - return(-ESPIPE); + return(-ESPIPE); } +/* + * Read data from a socket. ubuf is a user mode pointer. We make sure the user + * area ubuf...ubuf+size-1 is writeable before asking the protocol. + */ -static int -sock_read(struct inode *inode, struct file *file, char *ubuf, int size) +static int sock_read(struct inode *inode, struct file *file, char *ubuf, int size) { - struct socket *sock; + struct socket *sock; + int err; + + if (!(sock = socki_lookup(inode))) + { + printk("NET: sock_read: can't find socket for inode!\n"); + return(-EBADF); + } + if (sock->flags & SO_ACCEPTCON) + return(-EINVAL); - if (!(sock = socki_lookup(inode))) { - printk("NET: sock_read: can't find socket for inode!\n"); - return(-EBADF); - } - if (sock->flags & SO_ACCEPTCON) return(-EINVAL); - return(sock->ops->read(sock, ubuf, size, (file->f_flags & O_NONBLOCK))); + if(size<0) + return -EINVAL; + if(size==0) + return 0; + if ((err=verify_area(VERIFY_WRITE,ubuf,size))<0) + return err; + return(sock->ops->read(sock, ubuf, size, (file->f_flags & O_NONBLOCK))); } +/* + * Write data to a socket. We verify that the user area ubuf..ubuf+size-1 is + * readable by the user process. + */ -static int -sock_write(struct inode *inode, struct file *file, char *ubuf, int size) +static int sock_write(struct inode *inode, struct file *file, char *ubuf, int size) { - struct socket *sock; + struct socket *sock; + int err; + + if (!(sock = socki_lookup(inode))) + { + printk("NET: sock_write: can't find socket for inode!\n"); + return(-EBADF); + } - if (!(sock = socki_lookup(inode))) { - printk("NET: sock_write: can't find socket for inode!\n"); - return(-EBADF); - } - if (sock->flags & SO_ACCEPTCON) return(-EINVAL); - return(sock->ops->write(sock, ubuf, size,(file->f_flags & O_NONBLOCK))); + if (sock->flags & SO_ACCEPTCON) + return(-EINVAL); + + if(size<0) + return -EINVAL; + if(size==0) + return 0; + + if ((err=verify_area(VERIFY_READ,ubuf,size))<0) + return err; + return(sock->ops->write(sock, ubuf, size,(file->f_flags & O_NONBLOCK))); } - -static int -sock_readdir(struct inode *inode, struct file *file, struct dirent *dirent, +/* + * You can't read directories from a socket! + */ + +static int sock_readdir(struct inode *inode, struct file *file, struct dirent *dirent, int count) { - return(-EBADF); + return(-EBADF); } +/* + * With an ioctl arg may well be a user mode pointer, but we don't know what to do + * with it - thats up to the protocol still. + */ -int -sock_ioctl(struct inode *inode, struct file *file, unsigned int cmd, +int sock_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { - struct socket *sock; + struct socket *sock; - if (!(sock = socki_lookup(inode))) { - printk("NET: sock_ioctl: can't find socket for inode!\n"); - return(-EBADF); - } - return(sock->ops->ioctl(sock, cmd, arg)); + if (!(sock = socki_lookup(inode))) + { + printk("NET: sock_ioctl: can't find socket for inode!\n"); + return(-EBADF); + } + return(sock->ops->ioctl(sock, cmd, arg)); } -static int -sock_select(struct inode *inode, struct file *file, int sel_type, select_table * wait) +static int sock_select(struct inode *inode, struct file *file, int sel_type, select_table * wait) { - struct socket *sock; + struct socket *sock; - if (!(sock = socki_lookup(inode))) { - printk("NET: sock_select: can't find socket for inode!\n"); - return(0); - } + if (!(sock = socki_lookup(inode))) + { + printk("NET: sock_select: can't find socket for inode!\n"); + return(0); + } - /* We can't return errors to select, so its either yes or no. */ - if (sock->ops && sock->ops->select) - return(sock->ops->select(sock, sel_type, wait)); - return(0); + /* + * We can't return errors to select, so its either yes or no. + */ + + if (sock->ops && sock->ops->select) + return(sock->ops->select(sock, sel_type, wait)); + return(0); } -void -sock_close(struct inode *inode, struct file *file) +void sock_close(struct inode *inode, struct file *file) { - struct socket *sock; + struct socket *sock; + + /* + * It's possible the inode is NULL if we're closing an unfinished socket. + */ + + if (!inode) + return; - /* It's possible the inode is NULL if we're closing an unfinished socket. */ - if (!inode) return; - if (!(sock = socki_lookup(inode))) { - printk("NET: sock_close: can't find socket for inode!\n"); - return; - } - sock_release(sock); + if (!(sock = socki_lookup(inode))) + { + printk("NET: sock_close: can't find socket for inode!\n"); + return; + } + + sock_release(sock); } +/* + * Wait for a connection. + */ -int -sock_awaitconn(struct socket *mysock, struct socket *servsock) +int sock_awaitconn(struct socket *mysock, struct socket *servsock) { - struct socket *last; + struct socket *last; - if (!(servsock->flags & SO_ACCEPTCON)) { - return(-EINVAL); - } + /* + * We must be listening + */ + if (!(servsock->flags & SO_ACCEPTCON)) + { + return(-EINVAL); + } + + /* + * Put ourselves on the server's incomplete connection queue. + */ + + mysock->next = NULL; + cli(); + if (!(last = servsock->iconn)) + servsock->iconn = mysock; + else + { + while (last->next) + last = last->next; + last->next = mysock; + } + mysock->state = SS_CONNECTING; + mysock->conn = servsock; + sti(); - /* Put ourselves on the server's incomplete connection queue. */ - mysock->next = NULL; - cli(); - if (!(last = servsock->iconn)) servsock->iconn = mysock; - else { - while (last->next) last = last->next; - last->next = mysock; - } - mysock->state = SS_CONNECTING; - mysock->conn = servsock; - sti(); - - /* - * Wake up server, then await connection. server will set state to - * SS_CONNECTED if we're connected. - */ - wake_up_interruptible(servsock->wait); - if (mysock->state != SS_CONNECTED) { - interruptible_sleep_on(mysock->wait); - if (mysock->state != SS_CONNECTED && - mysock->state != SS_DISCONNECTING) { + /* + * Wake up server, then await connection. server will set state to + * SS_CONNECTED if we're connected. + */ + wake_up_interruptible(servsock->wait); + if (mysock->state != SS_CONNECTED) + { + interruptible_sleep_on(mysock->wait); + if (mysock->state != SS_CONNECTED && + mysock->state != SS_DISCONNECTING) + { /* * if we're not connected we could have been * 1) interrupted, so we need to remove ourselves @@ -362,550 +538,734 @@ * 2) rejected (mysock->conn == NULL), and have * already been removed from the list */ - if (mysock->conn == servsock) { - cli(); - if ((last = servsock->iconn) == mysock) + if (mysock->conn == servsock) + { + cli(); + if ((last = servsock->iconn) == mysock) servsock->iconn = mysock->next; - else { - while (last->next != mysock) last = last->next; - last->next = mysock->next; + else + { + while (last->next != mysock) + last = last->next; + last->next = mysock->next; + } + sti(); } - sti(); + return(mysock->conn ? -EINTR : -EACCES); } - return(mysock->conn ? -EINTR : -EACCES); } - } - return(0); + return(0); } /* - * Perform the socket system call. we locate the appropriate - * family, then create a fresh socket. - */ -static int -sock_socket(int family, int type, int protocol) -{ - int i, fd; - struct socket *sock; - struct proto_ops *ops; - - /* Locate the correct protocol family. */ - for (i = 0; i < NPROTO; ++i) { - if (pops[i] == NULL) continue; - if (pops[i]->family == family) break; - } - if (i == NPROTO) { - return -EINVAL; - } - ops = pops[i]; - - /* - * Check that this is a type that we know how to manipulate and - * the protocol makes sense here. The family can still reject the - * protocol later. - */ - if ((type != SOCK_STREAM && type != SOCK_DGRAM && - type != SOCK_SEQPACKET && type != SOCK_RAW && - type != SOCK_PACKET) || protocol < 0) - return(-EINVAL); - - /* - * allocate the socket and allow the family to set things up. if - * the protocol is 0, the family is instructed to select an appropriate - * default. - */ - if (!(sock = sock_alloc(1))) { - printk("sock_socket: no more sockets\n"); - return(-EAGAIN); - } - sock->type = type; - sock->ops = ops; - if ((i = sock->ops->create(sock, protocol)) < 0) { - sock_release(sock); - return(i); - } + * Perform the socket system call. we locate the appropriate + * family, then create a fresh socket. + */ - if ((fd = get_fd(SOCK_INODE(sock))) < 0) { - sock_release(sock); - return(-EINVAL); - } +static int sock_socket(int family, int type, int protocol) +{ + int i, fd; + struct socket *sock; + struct proto_ops *ops; + + /* Locate the correct protocol family. */ + for (i = 0; i < NPROTO; ++i) + { + if (pops[i] == NULL) continue; + if (pops[i]->family == family) + break; + } + + if (i == NPROTO) + { + return -EINVAL; + } + + ops = pops[i]; + +/* + * Check that this is a type that we know how to manipulate and + * the protocol makes sense here. The family can still reject the + * protocol later. + */ + + if ((type != SOCK_STREAM && type != SOCK_DGRAM && + type != SOCK_SEQPACKET && type != SOCK_RAW && + type != SOCK_PACKET) || protocol < 0) + return(-EINVAL); - return(fd); +/* + * Allocate the socket and allow the family to set things up. if + * the protocol is 0, the family is instructed to select an appropriate + * default. + */ + + if (!(sock = sock_alloc(1))) + { + printk("sock_socket: no more sockets\n"); + return(-EAGAIN); + } + + sock->type = type; + sock->ops = ops; + if ((i = sock->ops->create(sock, protocol)) < 0) + { + sock_release(sock); + return(i); + } + + if ((fd = get_fd(SOCK_INODE(sock))) < 0) + { + sock_release(sock); + return(-EINVAL); + } + + return(fd); } +/* + * Create a pair of connected sockets. + */ -static int -sock_socketpair(int family, int type, int protocol, unsigned long usockvec[2]) +static int sock_socketpair(int family, int type, int protocol, unsigned long usockvec[2]) { - int fd1, fd2, i; - struct socket *sock1, *sock2; - int er; + int fd1, fd2, i; + struct socket *sock1, *sock2; + int er; + + /* + * Obtain the first socket and check if the underlying protocol + * supports the socketpair call. + */ + + if ((fd1 = sock_socket(family, type, protocol)) < 0) + return(fd1); + sock1 = sockfd_lookup(fd1, NULL); + if (!sock1->ops->socketpair) + { + sys_close(fd1); + return(-EINVAL); + } - /* - * Obtain the first socket and check if the underlying protocol - * supports the socketpair call. - */ - if ((fd1 = sock_socket(family, type, protocol)) < 0) return(fd1); - sock1 = sockfd_lookup(fd1, NULL); - if (!sock1->ops->socketpair) { - sys_close(fd1); - return(-EINVAL); - } + /* + * Now grab another socket and try to connect the two together. + */ - /* Now grab another socket and try to connect the two together. */ - if ((fd2 = sock_socket(family, type, protocol)) < 0) { - sys_close(fd1); - return(-EINVAL); - } - sock2 = sockfd_lookup(fd2, NULL); - if ((i = sock1->ops->socketpair(sock1, sock2)) < 0) { - sys_close(fd1); - sys_close(fd2); - return(i); - } - sock1->conn = sock2; - sock2->conn = sock1; - sock1->state = SS_CONNECTED; - sock2->state = SS_CONNECTED; + if ((fd2 = sock_socket(family, type, protocol)) < 0) + { + sys_close(fd1); + return(-EINVAL); + } - er=verify_area(VERIFY_WRITE, usockvec, 2 * sizeof(int)); - if(er) - return er; - put_fs_long(fd1, &usockvec[0]); - put_fs_long(fd2, &usockvec[1]); + sock2 = sockfd_lookup(fd2, NULL); + if ((i = sock1->ops->socketpair(sock1, sock2)) < 0) + { + sys_close(fd1); + sys_close(fd2); + return(i); + } - return(0); + sock1->conn = sock2; + sock2->conn = sock1; + sock1->state = SS_CONNECTED; + sock2->state = SS_CONNECTED; + + er=verify_area(VERIFY_WRITE, usockvec, 2 * sizeof(int)); + if(er) + return er; + put_fs_long(fd1, &usockvec[0]); + put_fs_long(fd2, &usockvec[1]); + + return(0); } /* - * Bind a name to a socket. Nothing much to do here since its - * the protocol's responsibility to handle the local address. + * Bind a name to a socket. Nothing much to do here since its + * the protocol's responsibility to handle the local address. + * + * We move the socket address to kernel space before we call + * the protocol layer (having also checked the address is ok). */ -static int -sock_bind(int fd, struct sockaddr *umyaddr, int addrlen) + +static int sock_bind(int fd, struct sockaddr *umyaddr, int addrlen) { - struct socket *sock; - int i; + struct socket *sock; + int i; + char address[MAX_SOCK_ADDR]; + int err; - if (fd < 0 || fd >= NR_OPEN || current->files->fd[fd] == NULL) - return(-EBADF); - if (!(sock = sockfd_lookup(fd, NULL))) return(-ENOTSOCK); - if ((i = sock->ops->bind(sock, umyaddr, addrlen)) < 0) { - return(i); - } - return(0); + if (fd < 0 || fd >= NR_OPEN || current->files->fd[fd] == NULL) + return(-EBADF); + + if (!(sock = sockfd_lookup(fd, NULL))) + return(-ENOTSOCK); + + if((err=move_addr_to_kernel(umyaddr,addrlen,address))<0) + return err; + + if ((i = sock->ops->bind(sock, (struct sockaddr *)address, addrlen)) < 0) + { + return(i); + } + return(0); } /* - * Perform a listen. Basically, we allow the protocol to do anything - * necessary for a listen, and if that works, we mark the socket as - * ready for listening. + * Perform a listen. Basically, we allow the protocol to do anything + * necessary for a listen, and if that works, we mark the socket as + * ready for listening. */ -static int -sock_listen(int fd, int backlog) + +static int sock_listen(int fd, int backlog) { - struct socket *sock; + struct socket *sock; - if (fd < 0 || fd >= NR_OPEN || current->files->fd[fd] == NULL) - return(-EBADF); - if (!(sock = sockfd_lookup(fd, NULL))) return(-ENOTSOCK); - if (sock->state != SS_UNCONNECTED) { - return(-EINVAL); - } - if (sock->ops && sock->ops->listen) sock->ops->listen(sock, backlog); - sock->flags |= SO_ACCEPTCON; - return(0); + if (fd < 0 || fd >= NR_OPEN || current->files->fd[fd] == NULL) + return(-EBADF); + if (!(sock = sockfd_lookup(fd, NULL))) + return(-ENOTSOCK); + + if (sock->state != SS_UNCONNECTED) + { + return(-EINVAL); + } + + if (sock->ops && sock->ops->listen) + sock->ops->listen(sock, backlog); + sock->flags |= SO_ACCEPTCON; + return(0); } /* - * For accept, we attempt to create a new socket, set up the link - * with the client, wake up the client, then return the new - * connected fd. + * For accept, we attempt to create a new socket, set up the link + * with the client, wake up the client, then return the new + * connected fd. We collect the address of the connector in kernel + * space and move it to user at the very end. This is buggy because + * we open the socket then return an error. */ -static int -sock_accept(int fd, struct sockaddr *upeer_sockaddr, int *upeer_addrlen) -{ - struct file *file; - struct socket *sock, *newsock; - int i; - if (fd < 0 || fd >= NR_OPEN || ((file = current->files->fd[fd]) == NULL)) - return(-EBADF); - - if (!(sock = sockfd_lookup(fd, &file))) return(-ENOTSOCK); - if (sock->state != SS_UNCONNECTED) { - return(-EINVAL); - } - if (!(sock->flags & SO_ACCEPTCON)) { - return(-EINVAL); - } +static int sock_accept(int fd, struct sockaddr *upeer_sockaddr, int *upeer_addrlen) +{ + struct file *file; + struct socket *sock, *newsock; + int i; + char address[MAX_SOCK_ADDR]; + int len; + + if (fd < 0 || fd >= NR_OPEN || ((file = current->files->fd[fd]) == NULL)) + return(-EBADF); + if (!(sock = sockfd_lookup(fd, &file))) + return(-ENOTSOCK); + if (sock->state != SS_UNCONNECTED) + { + return(-EINVAL); + } + if (!(sock->flags & SO_ACCEPTCON)) + { + return(-EINVAL); + } - if (!(newsock = sock_alloc(0))) { - printk("NET: sock_accept: no more sockets\n"); - return(-EAGAIN); - } - newsock->type = sock->type; - newsock->ops = sock->ops; - if ((i = sock->ops->dup(newsock, sock)) < 0) { - sock_release(newsock); - return(i); - } - - i = newsock->ops->accept(sock, newsock, file->f_flags); - if ( i < 0) { - sock_release(newsock); - return(i); - } + if (!(newsock = sock_alloc(0))) + { + printk("NET: sock_accept: no more sockets\n"); + return(-EAGAIN); + } + newsock->type = sock->type; + newsock->ops = sock->ops; + if ((i = sock->ops->dup(newsock, sock)) < 0) + { + sock_release(newsock); + return(i); + } - if ((fd = get_fd(SOCK_INODE(newsock))) < 0) { - sock_release(newsock); - return(-EINVAL); - } + i = newsock->ops->accept(sock, newsock, file->f_flags); + if ( i < 0) + { + sock_release(newsock); + return(i); + } - if (upeer_sockaddr) - newsock->ops->getname(newsock, upeer_sockaddr, upeer_addrlen, 1); + if ((fd = get_fd(SOCK_INODE(newsock))) < 0) + { + sock_release(newsock); + return(-EINVAL); + } - return(fd); + if (upeer_sockaddr) + { + newsock->ops->getname(newsock, (struct sockaddr *)address, &len, 1); + move_addr_to_user(address,len, upeer_sockaddr, upeer_addrlen); + } + return(fd); } -/* Attempt to connect to a socket with the server address. */ -static int -sock_connect(int fd, struct sockaddr *uservaddr, int addrlen) +/* + * Attempt to connect to a socket with the server address. The address + * is in user space so we verify it is OK and move it to kernel space. + */ + +static int sock_connect(int fd, struct sockaddr *uservaddr, int addrlen) { - struct socket *sock; - struct file *file; - int i; + struct socket *sock; + struct file *file; + int i; + char address[MAX_SOCK_ADDR]; + int err; + + if (fd < 0 || fd >= NR_OPEN || (file=current->files->fd[fd]) == NULL) + return(-EBADF); + if (!(sock = sockfd_lookup(fd, &file))) + return(-ENOTSOCK); - if (fd < 0 || fd >= NR_OPEN || (file=current->files->fd[fd]) == NULL) - return(-EBADF); + if((err=move_addr_to_kernel(uservaddr,addrlen,address))<0) + return err; - if (!(sock = sockfd_lookup(fd, &file))) return(-ENOTSOCK); - switch(sock->state) { - case SS_UNCONNECTED: - /* This is ok... continue with connect */ - break; - case SS_CONNECTED: - /* Socket is already connected */ - if(sock->type == SOCK_DGRAM) /* Hack for now - move this all into the protocol */ + switch(sock->state) + { + case SS_UNCONNECTED: + /* This is ok... continue with connect */ break; - return -EISCONN; - case SS_CONNECTING: - /* Not yet connected... we will check this. */ + case SS_CONNECTED: + /* Socket is already connected */ + if(sock->type == SOCK_DGRAM) /* Hack for now - move this all into the protocol */ + break; + return -EISCONN; + case SS_CONNECTING: + /* Not yet connected... we will check this. */ - /* - * FIXME: for all protocols what happens if you start - * an async connect fork and both children connect. Clean - * this up in the protocols! - */ - return(sock->ops->connect(sock, uservaddr, - addrlen, file->f_flags)); - default: - return(-EINVAL); - } - i = sock->ops->connect(sock, uservaddr, addrlen, file->f_flags); - if (i < 0) { - return(i); - } - return(0); + /* + * FIXME: for all protocols what happens if you start + * an async connect fork and both children connect. Clean + * this up in the protocols! + */ + return(sock->ops->connect(sock, uservaddr, + addrlen, file->f_flags)); + default: + return(-EINVAL); + } + i = sock->ops->connect(sock, (struct sockaddr *)address, addrlen, file->f_flags); + if (i < 0) + { + return(i); + } + return(0); } +/* + * Get the local address ('name') of a socket object. Move the obtained + * name to user space. + */ -static int -sock_getsockname(int fd, struct sockaddr *usockaddr, int *usockaddr_len) +static int sock_getsockname(int fd, struct sockaddr *usockaddr, int *usockaddr_len) { - struct socket *sock; - - if (fd < 0 || fd >= NR_OPEN || current->files->fd[fd] == NULL) - return(-EBADF); - if (!(sock = sockfd_lookup(fd, NULL))) return(-ENOTSOCK); - return(sock->ops->getname(sock, usockaddr, usockaddr_len, 0)); + struct socket *sock; + char address[MAX_SOCK_ADDR]; + int len; + int err; + + if (fd < 0 || fd >= NR_OPEN || current->files->fd[fd] == NULL) + return(-EBADF); + if (!(sock = sockfd_lookup(fd, NULL))) + return(-ENOTSOCK); + + err=sock->ops->getname(sock, (struct sockaddr *)address, &len, 0); + if(err) + return err; + if((err=move_addr_to_user(address,len, usockaddr, usockaddr_len))<0) + return err; + return 0; } - -static int -sock_getpeername(int fd, struct sockaddr *usockaddr, int *usockaddr_len) +/* + * Get the remote address ('name') of a socket object. Move the obtained + * name to user space. + */ + +static int sock_getpeername(int fd, struct sockaddr *usockaddr, int *usockaddr_len) { - struct socket *sock; - - if (fd < 0 || fd >= NR_OPEN || current->files->fd[fd] == NULL) - return(-EBADF); - if (!(sock = sockfd_lookup(fd, NULL))) return(-ENOTSOCK); - return(sock->ops->getname(sock, usockaddr, usockaddr_len, 1)); + struct socket *sock; + char address[MAX_SOCK_ADDR]; + int len; + int err; + + if (fd < 0 || fd >= NR_OPEN || current->files->fd[fd] == NULL) + return(-EBADF); + if (!(sock = sockfd_lookup(fd, NULL))) + return(-ENOTSOCK); + + err=sock->ops->getname(sock, (struct sockaddr *)address, &len, 1); + if(err) + return err; + if((err=move_addr_to_user(address,len, usockaddr, usockaddr_len))<0) + return err; + return 0; } +/* + * Send a datagram down a socket. The datagram as with write() is + * in user space. We check it can be read. + */ -static int -sock_send(int fd, void * buff, int len, unsigned flags) +static int sock_send(int fd, void * buff, int len, unsigned flags) { - struct socket *sock; - struct file *file; - - if (fd < 0 || fd >= NR_OPEN || ((file = current->files->fd[fd]) == NULL)) - return(-EBADF); - if (!(sock = sockfd_lookup(fd, NULL))) return(-ENOTSOCK); - - return(sock->ops->send(sock, buff, len, (file->f_flags & O_NONBLOCK), flags)); + struct socket *sock; + struct file *file; + int err; + + if (fd < 0 || fd >= NR_OPEN || ((file = current->files->fd[fd]) == NULL)) + return(-EBADF); + if (!(sock = sockfd_lookup(fd, NULL))) + return(-ENOTSOCK); + + if(len<0) + return -EINVAL; + if(len==0) + return 0; + err=verify_area(VERIFY_READ, buff, len); + if(err) + return err; + return(sock->ops->send(sock, buff, len, (file->f_flags & O_NONBLOCK), flags)); } +/* + * Send a datagram to a given address. We move the address into kernel + * spacee and check the user space data area is readable before invoking + * the protocol. + */ -static int -sock_sendto(int fd, void * buff, int len, unsigned flags, +static int sock_sendto(int fd, void * buff, int len, unsigned flags, struct sockaddr *addr, int addr_len) { - struct socket *sock; - struct file *file; - - if (fd < 0 || fd >= NR_OPEN || ((file = current->files->fd[fd]) == NULL)) - return(-EBADF); - if (!(sock = sockfd_lookup(fd, NULL))) return(-ENOTSOCK); + struct socket *sock; + struct file *file; + char address[MAX_SOCK_ADDR]; + int err; + + if (fd < 0 || fd >= NR_OPEN || ((file = current->files->fd[fd]) == NULL)) + return(-EBADF); + if (!(sock = sockfd_lookup(fd, NULL))) + return(-ENOTSOCK); + + if(len<0) + return -EINVAL; + if(len==0) + return 0; + err=verify_area(VERIFY_READ,buff,len); + if(err) + return err; + + if((err=move_addr_to_kernel(addr,addr_len,address))<0) + return err; - return(sock->ops->sendto(sock, buff, len, (file->f_flags & O_NONBLOCK), - flags, addr, addr_len)); + return(sock->ops->sendto(sock, buff, len, (file->f_flags & O_NONBLOCK), + flags, (struct sockaddr *)address, addr_len)); } -static int -sock_recv(int fd, void * buff, int len, unsigned flags) +/* + * Receive a datagram from a socket. This isn't really right. The BSD manual + * pages explicitly state that recv is recvfrom with a NULL to argument. The + * Linux stack gets the right results for the wrong reason and this need to + * be tidied in the inet layer and removed from here. + * We check the buffer is writable and valid. + */ + +static int sock_recv(int fd, void * buff, int len, unsigned flags) { - struct socket *sock; - struct file *file; + struct socket *sock; + struct file *file; + int err; + + if (fd < 0 || fd >= NR_OPEN || ((file = current->files->fd[fd]) == NULL)) + return(-EBADF); - if (fd < 0 || fd >= NR_OPEN || ((file = current->files->fd[fd]) == NULL)) - return(-EBADF); - if (!(sock = sockfd_lookup(fd, NULL))) return(-ENOTSOCK); + if (!(sock = sockfd_lookup(fd, NULL))) + return(-ENOTSOCK); + + if(len<0) + return -EINVAL; + if(len==0) + return 0; + err=verify_area(VERIFY_WRITE, buff, len); + if(err) + return err; - return(sock->ops->recv(sock, buff, len,(file->f_flags & O_NONBLOCK), flags)); + return(sock->ops->recv(sock, buff, len,(file->f_flags & O_NONBLOCK), flags)); } +/* + * Receive a frame from the socket and optionally record the address of the + * sender. We verify the buffers are writable and if needed move the + * sender address from kernel to user space. + */ -static int -sock_recvfrom(int fd, void * buff, int len, unsigned flags, +static int sock_recvfrom(int fd, void * buff, int len, unsigned flags, struct sockaddr *addr, int *addr_len) { - struct socket *sock; - struct file *file; + struct socket *sock; + struct file *file; + char address[MAX_SOCK_ADDR]; + int err; + int alen; + if (fd < 0 || fd >= NR_OPEN || ((file = current->files->fd[fd]) == NULL)) + return(-EBADF); + if (!(sock = sockfd_lookup(fd, NULL))) + return(-ENOTSOCK); + if(len<0) + return -EINVAL; + if(len==0) + return 0; + + err=verify_area(VERIFY_WRITE,buff,len); + if(err) + return err; + + len=sock->ops->recvfrom(sock, buff, len, (file->f_flags & O_NONBLOCK), + flags, (struct sockaddr *)address, &alen); - if (fd < 0 || fd >= NR_OPEN || ((file = current->files->fd[fd]) == NULL)) - return(-EBADF); - if (!(sock = sockfd_lookup(fd, NULL))) return(-ENOTSOCK); + if(len<0) + return len; + if(addr!=NULL && (err=move_addr_to_user(address,alen, addr, addr_len))<0) + return err; - return(sock->ops->recvfrom(sock, buff, len, (file->f_flags & O_NONBLOCK), - flags, addr, addr_len)); + return len; } - -static int -sock_setsockopt(int fd, int level, int optname, char *optval, int optlen) +/* + * Set a socket option. Because we don't know the option lengths we have + * to pass the user mode parameter for the protocols to sort out. + */ + +static int sock_setsockopt(int fd, int level, int optname, char *optval, int optlen) { - struct socket *sock; - struct file *file; + struct socket *sock; + struct file *file; - if (fd < 0 || fd >= NR_OPEN || ((file = current->files->fd[fd]) == NULL)) - return(-EBADF); - if (!(sock = sockfd_lookup(fd, NULL))) return(-ENOTSOCK); + if (fd < 0 || fd >= NR_OPEN || ((file = current->files->fd[fd]) == NULL)) + return(-EBADF); + if (!(sock = sockfd_lookup(fd, NULL))) + return(-ENOTSOCK); - return(sock->ops->setsockopt(sock, level, optname, optval, optlen)); + return(sock->ops->setsockopt(sock, level, optname, optval, optlen)); } +/* + * Get a socket option. Because we don't know the option lengths we have + * to pass a user mode parameter for the protocols to sort out. + */ -static int -sock_getsockopt(int fd, int level, int optname, char *optval, int *optlen) +static int sock_getsockopt(int fd, int level, int optname, char *optval, int *optlen) { - struct socket *sock; - struct file *file; + struct socket *sock; + struct file *file; - if (fd < 0 || fd >= NR_OPEN || ((file = current->files->fd[fd]) == NULL)) - return(-EBADF); - if (!(sock = sockfd_lookup(fd, NULL))) return(-ENOTSOCK); + if (fd < 0 || fd >= NR_OPEN || ((file = current->files->fd[fd]) == NULL)) + return(-EBADF); + if (!(sock = sockfd_lookup(fd, NULL))) + return(-ENOTSOCK); - if (!sock->ops || !sock->ops->getsockopt) return(0); - return(sock->ops->getsockopt(sock, level, optname, optval, optlen)); + if (!sock->ops || !sock->ops->getsockopt) + return(0); + return(sock->ops->getsockopt(sock, level, optname, optval, optlen)); } -static int -sock_shutdown(int fd, int how) +/* + * Shutdown a socket. + */ + +static int sock_shutdown(int fd, int how) { - struct socket *sock; - struct file *file; - - if (fd < 0 || fd >= NR_OPEN || ((file = current->files->fd[fd]) == NULL)) - return(-EBADF); - - if (!(sock = sockfd_lookup(fd, NULL))) return(-ENOTSOCK); - - return(sock->ops->shutdown(sock, how)); -} - - -int -sock_fcntl(struct file *filp, unsigned int cmd, unsigned long arg) -{ - struct socket *sock; - - sock = socki_lookup (filp->f_inode); - if (sock != NULL && sock->ops != NULL && sock->ops->fcntl != NULL) - return(sock->ops->fcntl(sock, cmd, arg)); - return(-EINVAL); -} - - -/* - * System call vectors. Since I (RIB) want to rewrite sockets as streams, - * we have this level of indirection. Not a lot of overhead, since more of - * the work is done via read/write/select directly. - */ -asmlinkage int -sys_socketcall(int call, unsigned long *args) -{ - int er; - switch(call) { - case SYS_SOCKET: - er=verify_area(VERIFY_READ, args, 3 * sizeof(long)); - if(er) - return er; - return(sock_socket(get_fs_long(args+0), - get_fs_long(args+1), - get_fs_long(args+2))); - case SYS_BIND: - er=verify_area(VERIFY_READ, args, 3 * sizeof(long)); - if(er) - return er; - return(sock_bind(get_fs_long(args+0), - (struct sockaddr *)get_fs_long(args+1), - get_fs_long(args+2))); - case SYS_CONNECT: - er=verify_area(VERIFY_READ, args, 3 * sizeof(long)); - if(er) - return er; - return(sock_connect(get_fs_long(args+0), - (struct sockaddr *)get_fs_long(args+1), - get_fs_long(args+2))); - case SYS_LISTEN: - er=verify_area(VERIFY_READ, args, 2 * sizeof(long)); - if(er) - return er; - return(sock_listen(get_fs_long(args+0), - get_fs_long(args+1))); - case SYS_ACCEPT: - er=verify_area(VERIFY_READ, args, 3 * sizeof(long)); - if(er) - return er; - return(sock_accept(get_fs_long(args+0), - (struct sockaddr *)get_fs_long(args+1), - (int *)get_fs_long(args+2))); - case SYS_GETSOCKNAME: - er=verify_area(VERIFY_READ, args, 3 * sizeof(long)); - if(er) - return er; - return(sock_getsockname(get_fs_long(args+0), - (struct sockaddr *)get_fs_long(args+1), - (int *)get_fs_long(args+2))); - case SYS_GETPEERNAME: - er=verify_area(VERIFY_READ, args, 3 * sizeof(long)); - if(er) - return er; - return(sock_getpeername(get_fs_long(args+0), - (struct sockaddr *)get_fs_long(args+1), - (int *)get_fs_long(args+2))); - case SYS_SOCKETPAIR: - er=verify_area(VERIFY_READ, args, 4 * sizeof(long)); - if(er) - return er; - return(sock_socketpair(get_fs_long(args+0), - get_fs_long(args+1), - get_fs_long(args+2), - (unsigned long *)get_fs_long(args+3))); - case SYS_SEND: - er=verify_area(VERIFY_READ, args, 4 * sizeof(unsigned long)); - if(er) - return er; - return(sock_send(get_fs_long(args+0), - (void *)get_fs_long(args+1), - get_fs_long(args+2), - get_fs_long(args+3))); - case SYS_SENDTO: - er=verify_area(VERIFY_READ, args, 6 * sizeof(unsigned long)); - if(er) - return er; - return(sock_sendto(get_fs_long(args+0), - (void *)get_fs_long(args+1), - get_fs_long(args+2), - get_fs_long(args+3), - (struct sockaddr *)get_fs_long(args+4), - get_fs_long(args+5))); - case SYS_RECV: - er=verify_area(VERIFY_READ, args, 4 * sizeof(unsigned long)); - if(er) - return er; - return(sock_recv(get_fs_long(args+0), - (void *)get_fs_long(args+1), - get_fs_long(args+2), - get_fs_long(args+3))); - case SYS_RECVFROM: - er=verify_area(VERIFY_READ, args, 6 * sizeof(unsigned long)); - if(er) - return er; - return(sock_recvfrom(get_fs_long(args+0), - (void *)get_fs_long(args+1), - get_fs_long(args+2), - get_fs_long(args+3), - (struct sockaddr *)get_fs_long(args+4), - (int *)get_fs_long(args+5))); - case SYS_SHUTDOWN: - er=verify_area(VERIFY_READ, args, 2* sizeof(unsigned long)); - if(er) - return er; - return(sock_shutdown(get_fs_long(args+0), - get_fs_long(args+1))); - case SYS_SETSOCKOPT: - er=verify_area(VERIFY_READ, args, 5*sizeof(unsigned long)); - if(er) - return er; - return(sock_setsockopt(get_fs_long(args+0), - get_fs_long(args+1), - get_fs_long(args+2), - (char *)get_fs_long(args+3), - get_fs_long(args+4))); - case SYS_GETSOCKOPT: - er=verify_area(VERIFY_READ, args, 5*sizeof(unsigned long)); - if(er) - return er; - return(sock_getsockopt(get_fs_long(args+0), - get_fs_long(args+1), - get_fs_long(args+2), - (char *)get_fs_long(args+3), - (int *)get_fs_long(args+4))); - default: - return(-EINVAL); - } + struct socket *sock; + struct file *file; + + if (fd < 0 || fd >= NR_OPEN || ((file = current->files->fd[fd]) == NULL)) + return(-EBADF); + if (!(sock = sockfd_lookup(fd, NULL))) + return(-ENOTSOCK); + + return(sock->ops->shutdown(sock, how)); +} + + +/* + * Perform a file control on a socket file descriptor. + */ + +int sock_fcntl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + struct socket *sock; + + sock = socki_lookup (filp->f_inode); + if (sock != NULL && sock->ops != NULL && sock->ops->fcntl != NULL) + return(sock->ops->fcntl(sock, cmd, arg)); + return(-EINVAL); } + +/* + * System call vectors. Since I (RIB) want to rewrite sockets as streams, + * we have this level of indirection. Not a lot of overhead, since more of + * the work is done via read/write/select directly. + * + * I'm now expanding this up to a higher level to seperate the assorted + * kernel/user space manipulations and global assumptions from the protocol + * layers proper - AC. + */ + +asmlinkage int sys_socketcall(int call, unsigned long *args) +{ + int er; + switch(call) + { + case SYS_SOCKET: + er=verify_area(VERIFY_READ, args, 3 * sizeof(long)); + if(er) + return er; + return(sock_socket(get_fs_long(args+0), + get_fs_long(args+1), + get_fs_long(args+2))); + case SYS_BIND: + er=verify_area(VERIFY_READ, args, 3 * sizeof(long)); + if(er) + return er; + return(sock_bind(get_fs_long(args+0), + (struct sockaddr *)get_fs_long(args+1), + get_fs_long(args+2))); + case SYS_CONNECT: + er=verify_area(VERIFY_READ, args, 3 * sizeof(long)); + if(er) + return er; + return(sock_connect(get_fs_long(args+0), + (struct sockaddr *)get_fs_long(args+1), + get_fs_long(args+2))); + case SYS_LISTEN: + er=verify_area(VERIFY_READ, args, 2 * sizeof(long)); + if(er) + return er; + return(sock_listen(get_fs_long(args+0), + get_fs_long(args+1))); + case SYS_ACCEPT: + er=verify_area(VERIFY_READ, args, 3 * sizeof(long)); + if(er) + return er; + return(sock_accept(get_fs_long(args+0), + (struct sockaddr *)get_fs_long(args+1), + (int *)get_fs_long(args+2))); + case SYS_GETSOCKNAME: + er=verify_area(VERIFY_READ, args, 3 * sizeof(long)); + if(er) + return er; + return(sock_getsockname(get_fs_long(args+0), + (struct sockaddr *)get_fs_long(args+1), + (int *)get_fs_long(args+2))); + case SYS_GETPEERNAME: + er=verify_area(VERIFY_READ, args, 3 * sizeof(long)); + if(er) + return er; + return(sock_getpeername(get_fs_long(args+0), + (struct sockaddr *)get_fs_long(args+1), + (int *)get_fs_long(args+2))); + case SYS_SOCKETPAIR: + er=verify_area(VERIFY_READ, args, 4 * sizeof(long)); + if(er) + return er; + return(sock_socketpair(get_fs_long(args+0), + get_fs_long(args+1), + get_fs_long(args+2), + (unsigned long *)get_fs_long(args+3))); + case SYS_SEND: + er=verify_area(VERIFY_READ, args, 4 * sizeof(unsigned long)); + if(er) + return er; + return(sock_send(get_fs_long(args+0), + (void *)get_fs_long(args+1), + get_fs_long(args+2), + get_fs_long(args+3))); + case SYS_SENDTO: + er=verify_area(VERIFY_READ, args, 6 * sizeof(unsigned long)); + if(er) + return er; + return(sock_sendto(get_fs_long(args+0), + (void *)get_fs_long(args+1), + get_fs_long(args+2), + get_fs_long(args+3), + (struct sockaddr *)get_fs_long(args+4), + get_fs_long(args+5))); + case SYS_RECV: + er=verify_area(VERIFY_READ, args, 4 * sizeof(unsigned long)); + if(er) + return er; + return(sock_recv(get_fs_long(args+0), + (void *)get_fs_long(args+1), + get_fs_long(args+2), + get_fs_long(args+3))); + case SYS_RECVFROM: + er=verify_area(VERIFY_READ, args, 6 * sizeof(unsigned long)); + if(er) + return er; + return(sock_recvfrom(get_fs_long(args+0), + (void *)get_fs_long(args+1), + get_fs_long(args+2), + get_fs_long(args+3), + (struct sockaddr *)get_fs_long(args+4), + (int *)get_fs_long(args+5))); + case SYS_SHUTDOWN: + er=verify_area(VERIFY_READ, args, 2* sizeof(unsigned long)); + if(er) + return er; + return(sock_shutdown(get_fs_long(args+0), + get_fs_long(args+1))); + case SYS_SETSOCKOPT: + er=verify_area(VERIFY_READ, args, 5*sizeof(unsigned long)); + if(er) + return er; + return(sock_setsockopt(get_fs_long(args+0), + get_fs_long(args+1), + get_fs_long(args+2), + (char *)get_fs_long(args+3), + get_fs_long(args+4))); + case SYS_GETSOCKOPT: + er=verify_area(VERIFY_READ, args, 5*sizeof(unsigned long)); + if(er) + return er; + return(sock_getsockopt(get_fs_long(args+0), + get_fs_long(args+1), + get_fs_long(args+2), + (char *)get_fs_long(args+3), + (int *)get_fs_long(args+4))); + default: + return(-EINVAL); + } +} + /* - * This function is called by a protocol handler that wants to - * advertise its address family, and have it linked into the - * SOCKET module. + * This function is called by a protocol handler that wants to + * advertise its address family, and have it linked into the + * SOCKET module. */ -int -sock_register(int family, struct proto_ops *ops) + +int sock_register(int family, struct proto_ops *ops) { - int i; + int i; - cli(); - for(i = 0; i < NPROTO; i++) { - if (pops[i] != NULL) continue; - pops[i] = ops; - pops[i]->family = family; + cli(); + for(i = 0; i < NPROTO; i++) + { + if (pops[i] != NULL) + continue; + pops[i] = ops; + pops[i]->family = family; + sti(); + return(i); + } sti(); - return(i); - } - sti(); - return(-ENOMEM); + return(-ENOMEM); } void proto_init(void) @@ -924,25 +1284,43 @@ } -void -sock_init(void) +void sock_init(void) { - struct socket *sock; - int i; - /* Release all sockets. */ - for (sock = sockets; sock <= last_socket; ++sock) sock->state = SS_FREE; + struct socket *sock; + int i; - /* Initialize all address (protocol) families. */ - for (i = 0; i < NPROTO; ++i) pops[i] = NULL; + printk("Swansea University Computer Society NET3.016\n"); - /* Initialize the protocols module. */ - proto_init(); + /* + * Release all sockets. + */ + for (sock = sockets; sock <= last_socket; ++sock) + sock->state = SS_FREE; + + /* + * Initialize all address (protocol) families. + */ + + for (i = 0; i < NPROTO; ++i) pops[i] = NULL; + + /* + * Initialize the protocols module. + */ + proto_init(); + #ifdef CONFIG_NET - /* Initialize the DEV module. */ - dev_init(); + /* + * Initialize the DEV module. + */ + + dev_init(); + + /* + * And the bottom half handler + */ + + bh_base[NET_BH].routine= net_bh; +#endif - /* And the bottom half handler */ - bh_base[NET_BH].routine= net_bh; -#endif } diff -u --recursive --new-file v1.1.19/linux/net/unix/sock.c linux/net/unix/sock.c --- v1.1.19/linux/net/unix/sock.c Fri Jun 17 15:20:08 1994 +++ linux/net/unix/sock.c Fri Jun 17 07:54:05 1994 @@ -51,6 +51,11 @@ #include "unix.h" +/* + * Because these have the address in them they casually waste an extra 8K of kernel data + * space that need not be wasted. + */ + struct unix_proto_data unix_datas[NSOCKETS]; static int unix_proto_create(struct socket *sock, int protocol); @@ -92,11 +97,11 @@ char *optval, int *optlen); -static inline int -min(int a, int b) +static inline int min(int a, int b) { - if (a < b) return(a); - return(b); + if (a < b) + return(a); + return(b); } @@ -110,6 +115,7 @@ * wait queue because it is allowed to 'go away' outside of our control, * whereas unix_proto_data structures stick around. */ + static void unix_lock(struct unix_proto_data *upd) { while (upd->lock_flag) @@ -124,656 +130,772 @@ wake_up(&upd->wait); } -/* don't have to do anything. */ -static int -unix_proto_listen(struct socket *sock, int backlog) +/* + * We don't have to do anything. + */ + +static int unix_proto_listen(struct socket *sock, int backlog) { - return(0); + return(0); } +/* + * Until the new NET3 Unix code is done we have no options. + */ -static int -unix_proto_setsockopt(struct socket *sock, int level, int optname, +static int unix_proto_setsockopt(struct socket *sock, int level, int optname, char *optval, int optlen) { - return(-EOPNOTSUPP); + return(-EOPNOTSUPP); } -static int -unix_proto_getsockopt(struct socket *sock, int level, int optname, +static int unix_proto_getsockopt(struct socket *sock, int level, int optname, char *optval, int *optlen) { - return(-EOPNOTSUPP); + return(-EOPNOTSUPP); } -static int -unix_proto_sendto(struct socket *sock, void *buff, int len, int nonblock, + +/* + * SendTo() doesn't matter as we also have no Datagram support! + */ + +static int unix_proto_sendto(struct socket *sock, void *buff, int len, int nonblock, unsigned flags, struct sockaddr *addr, int addr_len) { - return(-EOPNOTSUPP); + return(-EOPNOTSUPP); } -static int -unix_proto_recvfrom(struct socket *sock, void *buff, int len, int nonblock, +static int unix_proto_recvfrom(struct socket *sock, void *buff, int len, int nonblock, unsigned flags, struct sockaddr *addr, int *addr_len) { - return(-EOPNOTSUPP); + return(-EOPNOTSUPP); } +/* + * You can't shutdown a unix domain socket. + */ -static int -unix_proto_shutdown(struct socket *sock, int how) +static int unix_proto_shutdown(struct socket *sock, int how) { - return(-EOPNOTSUPP); + return(-EOPNOTSUPP); } -/* This error needs to be checked. */ -static int -unix_proto_send(struct socket *sock, void *buff, int len, int nonblock, +/* + * Send data to a unix socket. + */ + +static int unix_proto_send(struct socket *sock, void *buff, int len, int nonblock, unsigned flags) { - if (flags != 0) return(-EINVAL); - return(unix_proto_write(sock, (char *) buff, len, nonblock)); + if (flags != 0) + return(-EINVAL); + return(unix_proto_write(sock, (char *) buff, len, nonblock)); } -/* This error needs to be checked. */ -static int -unix_proto_recv(struct socket *sock, void *buff, int len, int nonblock, +/* + * Receive data. This version of AF_UNIX also lacks MSG_PEEK 8( + */ + +static int unix_proto_recv(struct socket *sock, void *buff, int len, int nonblock, unsigned flags) { - if (flags != 0) return(-EINVAL); - return(unix_proto_read(sock, (char *) buff, len, nonblock)); + if (flags != 0) + return(-EINVAL); + return(unix_proto_read(sock, (char *) buff, len, nonblock)); } - +/* + * Given an address and an inode go find a unix control structure + */ + static struct unix_proto_data * unix_data_lookup(struct sockaddr_un *sockun, int sockaddr_len, struct inode *inode) { - struct unix_proto_data *upd; + struct unix_proto_data *upd; - for(upd = unix_datas; upd <= last_unix_data; ++upd) { - if (upd->refcnt > 0 && upd->socket && - upd->socket->state == SS_UNCONNECTED && - upd->sockaddr_un.sun_family == sockun->sun_family && - upd->inode == inode) return(upd); - } - return(NULL); + for(upd = unix_datas; upd <= last_unix_data; ++upd) + { + if (upd->refcnt > 0 && upd->socket && + upd->socket->state == SS_UNCONNECTED && + upd->sockaddr_un.sun_family == sockun->sun_family && + upd->inode == inode) + + return(upd); + } + return(NULL); } +/* + * We allocate a page of data for the socket. This is woefully inadequate and helps cause vast + * amounts of excess task switching and blocking when transferring stuff like bitmaps via X. + * It doesn't help this problem that the Linux scheduler is desperately in need of a major + * rewrite. Somewhere near 16K would be better maybe 32. + */ static struct unix_proto_data * unix_data_alloc(void) { - struct unix_proto_data *upd; + struct unix_proto_data *upd; - cli(); - for(upd = unix_datas; upd <= last_unix_data; ++upd) { - if (!upd->refcnt) { - upd->refcnt = -1; /* unix domain socket not yet initialised - bgm */ - sti(); - upd->socket = NULL; - upd->sockaddr_len = 0; - upd->sockaddr_un.sun_family = 0; - upd->buf = NULL; - upd->bp_head = upd->bp_tail = 0; - upd->inode = NULL; - upd->peerupd = NULL; - return(upd); + cli(); + for(upd = unix_datas; upd <= last_unix_data; ++upd) + { + if (!upd->refcnt) + { + upd->refcnt = -1; /* unix domain socket not yet initialised - bgm */ + sti(); + upd->socket = NULL; + upd->sockaddr_len = 0; + upd->sockaddr_un.sun_family = 0; + upd->buf = NULL; + upd->bp_head = upd->bp_tail = 0; + upd->inode = NULL; + upd->peerupd = NULL; + return(upd); + } } - } - sti(); - return(NULL); + sti(); + return(NULL); } +/* + * The data area is owned by all its users. Thus we need to track owners + * carefully and not free data at the wrong moment. These look like they need + * interrupt protection but they don't because no interrupt ever fiddles with + * these counts. With an SMP Linux you'll need to protect these! + */ -static inline void -unix_data_ref(struct unix_proto_data *upd) +static inline void unix_data_ref(struct unix_proto_data *upd) { - if (!upd) { - return; - } - ++upd->refcnt; + if (!upd) + { + return; + } + ++upd->refcnt; } -static void -unix_data_deref(struct unix_proto_data *upd) +static void unix_data_deref(struct unix_proto_data *upd) { - if (!upd) { - return; - } - if (upd->refcnt == 1) { - if (upd->buf) { - free_page((unsigned long)upd->buf); - upd->buf = NULL; - upd->bp_head = upd->bp_tail = 0; + if (!upd) + { + return; } - } - --upd->refcnt; + if (upd->refcnt == 1) + { + if (upd->buf) + { + free_page((unsigned long)upd->buf); + upd->buf = NULL; + upd->bp_head = upd->bp_tail = 0; + } + } + --upd->refcnt; } /* - * Upon a create, we allocate an empty protocol data, - * and grab a page to buffer writes. + * Upon a create, we allocate an empty protocol data, + * and grab a page to buffer writes. */ -static int -unix_proto_create(struct socket *sock, int protocol) + +static int unix_proto_create(struct socket *sock, int protocol) { - struct unix_proto_data *upd; + struct unix_proto_data *upd; - if (protocol != 0) { - return(-EINVAL); - } - if (!(upd = unix_data_alloc())) { - printk("UNIX: create: can't allocate buffer\n"); - return(-ENOMEM); - } - if (!(upd->buf = (char*) get_free_page(GFP_USER))) { - printk("UNIX: create: can't get page!\n"); - unix_data_deref(upd); - return(-ENOMEM); - } - upd->protocol = protocol; - upd->socket = sock; - UN_DATA(sock) = upd; - upd->refcnt = 1; /* Now its complete - bgm */ - return(0); + /* + * No funny SOCK_RAW stuff + */ + + if (protocol != 0) + { + return(-EINVAL); + } + + if (!(upd = unix_data_alloc())) + { + printk("UNIX: create: can't allocate buffer\n"); + return(-ENOMEM); + } + if (!(upd->buf = (char*) get_free_page(GFP_USER))) + { + printk("UNIX: create: can't get page!\n"); + unix_data_deref(upd); + return(-ENOMEM); + } + upd->protocol = protocol; + upd->socket = sock; + UN_DATA(sock) = upd; + upd->refcnt = 1; /* Now its complete - bgm */ + return(0); } +/* + * Duplicate a socket. + */ -static int -unix_proto_dup(struct socket *newsock, struct socket *oldsock) +static int unix_proto_dup(struct socket *newsock, struct socket *oldsock) { - struct unix_proto_data *upd = UN_DATA(oldsock); - - return(unix_proto_create(newsock, upd->protocol)); + struct unix_proto_data *upd = UN_DATA(oldsock); + return(unix_proto_create(newsock, upd->protocol)); } -static int -unix_proto_release(struct socket *sock, struct socket *peer) +/* + * Release a Unix domain socket. + */ + +static int unix_proto_release(struct socket *sock, struct socket *peer) { - struct unix_proto_data *upd = UN_DATA(sock); + struct unix_proto_data *upd = UN_DATA(sock); - if (!upd) return(0); - if (upd->socket != sock) { - printk("UNIX: release: socket link mismatch!\n"); - return(-EINVAL); - } - if (upd->inode) { - iput(upd->inode); - upd->inode = NULL; - } - UN_DATA(sock) = NULL; - upd->socket = NULL; - if (upd->peerupd) unix_data_deref(upd->peerupd); - unix_data_deref(upd); - return(0); + if (!upd) + return(0); + + if (upd->socket != sock) + { + printk("UNIX: release: socket link mismatch!\n"); + return(-EINVAL); + } + + if (upd->inode) + { + iput(upd->inode); + upd->inode = NULL; + } + + UN_DATA(sock) = NULL; + upd->socket = NULL; + + if (upd->peerupd) + unix_data_deref(upd->peerupd); + unix_data_deref(upd); + return(0); } /* - * Bind a name to a socket. - * This is where much of the work is done: we allocate a fresh page for - * the buffer, grab the appropriate inode and set things up. + * Bind a name to a socket. + * This is where much of the work is done: we allocate a fresh page for + * the buffer, grab the appropriate inode and set things up. * - * FIXME: what should we do if an address is already bound? + * FIXME: what should we do if an address is already bound? * Here we return EINVAL, but it may be necessary to re-bind. * I think thats what BSD does in the case of datagram sockets... */ -static int -unix_proto_bind(struct socket *sock, struct sockaddr *umyaddr, + +static int unix_proto_bind(struct socket *sock, struct sockaddr *umyaddr, int sockaddr_len) { - char fname[sizeof(((struct sockaddr_un *)0)->sun_path) + 1]; - struct unix_proto_data *upd = UN_DATA(sock); - unsigned long old_fs; - int i; - int er; - - if (sockaddr_len <= UN_PATH_OFFSET || - sockaddr_len > sizeof(struct sockaddr_un)) { - return(-EINVAL); - } - if (upd->sockaddr_len || upd->inode) { - printk("UNIX: bind: already bound!\n"); - return(-EINVAL); - } - er=verify_area(VERIFY_WRITE, umyaddr, sockaddr_len); - if(er) - return er; - memcpy_fromfs(&upd->sockaddr_un, umyaddr, sockaddr_len); - upd->sockaddr_un.sun_path[sockaddr_len-UN_PATH_OFFSET] = '\0'; - if (upd->sockaddr_un.sun_family != AF_UNIX) { - return(-EINVAL); - } - - memcpy(fname, upd->sockaddr_un.sun_path, sockaddr_len-UN_PATH_OFFSET); - fname[sockaddr_len-UN_PATH_OFFSET] = '\0'; - old_fs = get_fs(); - set_fs(get_ds()); - i = do_mknod(fname, S_IFSOCK | S_IRWXUGO, 0); - if (i == -EEXIST) - i = -EADDRINUSE; - if (i == 0) - i = open_namei(fname, 0, S_IFSOCK, &upd->inode, NULL); - set_fs(old_fs); - if (i < 0) - return i; - upd->sockaddr_len = sockaddr_len; /* now its legal */ - - return(0); + char fname[UNIX_PATH_MAX + 1]; + struct unix_proto_data *upd = UN_DATA(sock); + unsigned long old_fs; + int i; + + if (sockaddr_len <= UN_PATH_OFFSET || + sockaddr_len > sizeof(struct sockaddr_un)) + { + return(-EINVAL); + } + if (upd->sockaddr_len || upd->inode) + { + /*printk("UNIX: bind: already bound!\n");*/ + return(-EINVAL); + } + memcpy(&upd->sockaddr_un, umyaddr, sockaddr_len); + upd->sockaddr_un.sun_path[sockaddr_len-UN_PATH_OFFSET] = '\0'; + if (upd->sockaddr_un.sun_family != AF_UNIX) + { + return(-EINVAL); + } + + memcpy(fname, upd->sockaddr_un.sun_path, sockaddr_len-UN_PATH_OFFSET); + fname[sockaddr_len-UN_PATH_OFFSET] = '\0'; + old_fs = get_fs(); + set_fs(get_ds()); + + i = do_mknod(fname, S_IFSOCK | S_IRWXUGO, 0); + + if (i == 0) + i = open_namei(fname, 0, S_IFSOCK, &upd->inode, NULL); + set_fs(old_fs); + if (i < 0) + { +/* printk("UNIX: bind: can't open socket %s\n", fname);*/ + if(i==-EEXIST) + i=-EADDRINUSE; + return(i); + } + upd->sockaddr_len = sockaddr_len; /* now its legal */ + + return(0); } /* - * Perform a connection. we can only connect to unix sockets - * (I can't for the life of me find an application where that - * wouldn't be the case!) + * Perform a connection. we can only connect to unix sockets + * (I can't for the life of me find an application where that + * wouldn't be the case!) */ -static int -unix_proto_connect(struct socket *sock, struct sockaddr *uservaddr, + +static int unix_proto_connect(struct socket *sock, struct sockaddr *uservaddr, int sockaddr_len, int flags) { - char fname[sizeof(((struct sockaddr_un *)0)->sun_path) + 1]; - struct sockaddr_un sockun; - struct unix_proto_data *serv_upd; - struct inode *inode; - unsigned long old_fs; - int i; - int er; - - if (sockaddr_len <= UN_PATH_OFFSET || - sockaddr_len > sizeof(struct sockaddr_un)) { - return(-EINVAL); - } - if (sock->state == SS_CONNECTING) return(-EINPROGRESS); - if (sock->state == SS_CONNECTED) return(-EISCONN); - - er=verify_area(VERIFY_READ, uservaddr, sockaddr_len); - if(er) - return er; - memcpy_fromfs(&sockun, uservaddr, sockaddr_len); - sockun.sun_path[sockaddr_len-UN_PATH_OFFSET] = '\0'; - if (sockun.sun_family != AF_UNIX) { - return(-EINVAL); - } - - /* - * Try to open the name in the filesystem - this is how we - * identify ourselves and our server. Note that we don't - * hold onto the inode that long, just enough to find our - * server. When we're connected, we mooch off the server. - */ - memcpy(fname, sockun.sun_path, sockaddr_len-UN_PATH_OFFSET); - fname[sockaddr_len-UN_PATH_OFFSET] = '\0'; - old_fs = get_fs(); - set_fs(get_ds()); - i = open_namei(fname, 0, S_IFSOCK, &inode, NULL); - set_fs(old_fs); - if (i < 0) { - return(i); - } + char fname[sizeof(((struct sockaddr_un *)0)->sun_path) + 1]; + struct sockaddr_un sockun; + struct unix_proto_data *serv_upd; + struct inode *inode; + unsigned long old_fs; + int i; + + if (sockaddr_len <= UN_PATH_OFFSET || + sockaddr_len > sizeof(struct sockaddr_un)) + { + return(-EINVAL); + } + + if (sock->state == SS_CONNECTING) + return(-EINPROGRESS); + if (sock->state == SS_CONNECTED) + return(-EISCONN); + + memcpy(&sockun, uservaddr, sockaddr_len); + sockun.sun_path[sockaddr_len-UN_PATH_OFFSET] = '\0'; + if (sockun.sun_family != AF_UNIX) + { + return(-EINVAL); + } + +/* + * Try to open the name in the filesystem - this is how we + * identify ourselves and our server. Note that we don't + * hold onto the inode that long, just enough to find our + * server. When we're connected, we mooch off the server. + */ + + memcpy(fname, sockun.sun_path, sockaddr_len-UN_PATH_OFFSET); + fname[sockaddr_len-UN_PATH_OFFSET] = '\0'; + old_fs = get_fs(); + set_fs(get_ds()); + i = open_namei(fname, 0, S_IFSOCK, &inode, NULL); + set_fs(old_fs); + if (i < 0) + { + return(i); + } - serv_upd = unix_data_lookup(&sockun, sockaddr_len, inode); - iput(inode); - if (!serv_upd) { - return(-EINVAL); - } - if ((i = sock_awaitconn(sock, serv_upd->socket)) < 0) { - return(i); - } - if (sock->conn) { - unix_data_ref(UN_DATA(sock->conn)); - UN_DATA(sock)->peerupd = UN_DATA(sock->conn); /* ref server */ - } - return(0); -} - - -/* - * To do a socketpair, we just connect the two datas, easy! - * Since we always wait on the socket inode, they're no contention - * for a wait area, and deadlock prevention in the case of a process - * writing to itself is, ignored, in true unix fashion! - */ -static int -unix_proto_socketpair(struct socket *sock1, struct socket *sock2) -{ - struct unix_proto_data *upd1 = UN_DATA(sock1), *upd2 = UN_DATA(sock2); - - unix_data_ref(upd1); - unix_data_ref(upd2); - upd1->peerupd = upd2; - upd2->peerupd = upd1; - return(0); -} - - -/* On accept, we ref the peer's data for safe writes. */ -static int -unix_proto_accept(struct socket *sock, struct socket *newsock, int flags) -{ - struct socket *clientsock; - - /* - * If there aren't any sockets awaiting connection, - * then wait for one, unless nonblocking. - */ - while(!(clientsock = sock->iconn)) { - if (flags & O_NONBLOCK) return(-EAGAIN); - interruptible_sleep_on(sock->wait); - if (current->signal & ~current->blocked) { - return(-ERESTARTSYS); - } - } - - /* - * Great. Finish the connection relative to server and client, - * wake up the client and return the new fd to the server. - */ - sock->iconn = clientsock->next; - clientsock->next = NULL; - newsock->conn = clientsock; - clientsock->conn = newsock; - clientsock->state = SS_CONNECTED; - newsock->state = SS_CONNECTED; - unix_data_ref(UN_DATA(clientsock)); - UN_DATA(newsock)->peerupd = UN_DATA(clientsock); - UN_DATA(newsock)->sockaddr_un = UN_DATA(sock)->sockaddr_un; - UN_DATA(newsock)->sockaddr_len = UN_DATA(sock)->sockaddr_len; - wake_up_interruptible(clientsock->wait); - return(0); -} - - -/* Gets the current name or the name of the connected socket. */ -static int -unix_proto_getname(struct socket *sock, struct sockaddr *usockaddr, - int *usockaddr_len, int peer) + serv_upd = unix_data_lookup(&sockun, sockaddr_len, inode); + iput(inode); + if (!serv_upd) + { + return(-EINVAL); + } + + if ((i = sock_awaitconn(sock, serv_upd->socket)) < 0) + { + return(i); + } + + if (sock->conn) + { + unix_data_ref(UN_DATA(sock->conn)); + UN_DATA(sock)->peerupd = UN_DATA(sock->conn); /* ref server */ + } + return(0); +} + + +/* + * To do a socketpair, we just connect the two datas, easy! + * Since we always wait on the socket inode, they're no contention + * for a wait area, and deadlock prevention in the case of a process + * writing to itself is, ignored, in true unix fashion! + */ + +static int unix_proto_socketpair(struct socket *sock1, struct socket *sock2) { - struct unix_proto_data *upd; - int len; - int er; + struct unix_proto_data *upd1 = UN_DATA(sock1), *upd2 = UN_DATA(sock2); - if (peer) { - if (sock->state != SS_CONNECTED) { - return(-EINVAL); + unix_data_ref(upd1); + unix_data_ref(upd2); + upd1->peerupd = upd2; + upd2->peerupd = upd1; + return(0); +} + + +/* + * On accept, we ref the peer's data for safe writes. + */ + +static int unix_proto_accept(struct socket *sock, struct socket *newsock, int flags) +{ + struct socket *clientsock; + +/* + * If there aren't any sockets awaiting connection, + * then wait for one, unless nonblocking. + */ + + while(!(clientsock = sock->iconn)) + { + if (flags & O_NONBLOCK) + return(-EAGAIN); + interruptible_sleep_on(sock->wait); + if (current->signal & ~current->blocked) + { + return(-ERESTARTSYS); + } } - upd = UN_DATA(sock->conn); - } else - upd = UN_DATA(sock); + +/* + * Great. Finish the connection relative to server and client, + * wake up the client and return the new fd to the server. + */ + + sock->iconn = clientsock->next; + clientsock->next = NULL; + newsock->conn = clientsock; + clientsock->conn = newsock; + clientsock->state = SS_CONNECTED; + newsock->state = SS_CONNECTED; + unix_data_ref(UN_DATA(clientsock)); + UN_DATA(newsock)->peerupd = UN_DATA(clientsock); + UN_DATA(newsock)->sockaddr_un = UN_DATA(sock)->sockaddr_un; + UN_DATA(newsock)->sockaddr_len = UN_DATA(sock)->sockaddr_len; + wake_up_interruptible(clientsock->wait); + return(0); +} + + +/* + * Gets the current name or the name of the connected socket. + */ + +static int unix_proto_getname(struct socket *sock, struct sockaddr *usockaddr, + int *usockaddr_len, int peer) +{ + struct unix_proto_data *upd; + int len; - er=verify_area(VERIFY_WRITE, usockaddr_len, sizeof(*usockaddr_len)); - if(er) - return er; - if ((len = get_fs_long(usockaddr_len)) <= 0) return(-EINVAL); - if (len > upd->sockaddr_len) len = upd->sockaddr_len; - if (len) { - er=verify_area(VERIFY_WRITE, usockaddr, len); - if(er) - return er; - memcpy_tofs(usockaddr, &upd->sockaddr_un, len); - } - put_fs_long(len, usockaddr_len); - return(0); + if (peer) + { + if (sock->state != SS_CONNECTED) + { + return(-EINVAL); + } + upd = UN_DATA(sock->conn); + } + else + upd = UN_DATA(sock); + + len = upd->sockaddr_len; + memcpy(usockaddr, &upd->sockaddr_un, len); + *usockaddr_len=len; + return(0); } -/* We read from our own buf. */ -static int -unix_proto_read(struct socket *sock, char *ubuf, int size, int nonblock) -{ - struct unix_proto_data *upd; - int todo, avail; - int er; - - if ((todo = size) <= 0) return(0); - upd = UN_DATA(sock); - while(!(avail = UN_BUF_AVAIL(upd))) { - if (sock->state != SS_CONNECTED) { - return((sock->state == SS_DISCONNECTING) ? 0 : -EINVAL); - } - if (nonblock) return(-EAGAIN); - interruptible_sleep_on(sock->wait); - if (current->signal & ~current->blocked) { - return(-ERESTARTSYS); - } - } - - /* - * Copy from the read buffer into the user's buffer, - * watching for wraparound. Then we wake up the writer. - */ +/* + * We read from our own buf. + */ + +static int unix_proto_read(struct socket *sock, char *ubuf, int size, int nonblock) +{ + struct unix_proto_data *upd; + int todo, avail; + + if ((todo = size) <= 0) + return(0); + + upd = UN_DATA(sock); + while(!(avail = UN_BUF_AVAIL(upd))) + { + if (sock->state != SS_CONNECTED) + { + return((sock->state == SS_DISCONNECTING) ? 0 : -EINVAL); + } + if (nonblock) + return(-EAGAIN); + interruptible_sleep_on(sock->wait); + if (current->signal & ~current->blocked) + { + return(-ERESTARTSYS); + } + } + +/* + * Copy from the read buffer into the user's buffer, + * watching for wraparound. Then we wake up the writer. + */ - unix_lock(upd); - do { - int part, cando; - - if (avail <= 0) { - printk("UNIX: read: AVAIL IS NEGATIVE!!!\n"); - send_sig(SIGKILL, current, 1); - return(-EPIPE); - } - - if ((cando = todo) > avail) cando = avail; - if (cando >(part = BUF_SIZE - upd->bp_tail)) cando = part; - if((er=verify_area(VERIFY_WRITE,ubuf,cando))<0) - { - unix_unlock(upd); - return er; - } - memcpy_tofs(ubuf, upd->buf + upd->bp_tail, cando); - upd->bp_tail =(upd->bp_tail + cando) &(BUF_SIZE-1); - ubuf += cando; - todo -= cando; - if (sock->state == SS_CONNECTED) - wake_up_interruptible(sock->conn->wait); - avail = UN_BUF_AVAIL(upd); - } while(todo && avail); - unix_unlock(upd); - return(size - todo); + unix_lock(upd); + do + { + int part, cando; + + if (avail <= 0) + { + printk("UNIX: read: AVAIL IS NEGATIVE!!!\n"); + send_sig(SIGKILL, current, 1); + return(-EPIPE); + } + + if ((cando = todo) > avail) + cando = avail; + if (cando >(part = BUF_SIZE - upd->bp_tail)) + cando = part; + memcpy_tofs(ubuf, upd->buf + upd->bp_tail, cando); + upd->bp_tail =(upd->bp_tail + cando) &(BUF_SIZE-1); + ubuf += cando; + todo -= cando; + if (sock->state == SS_CONNECTED) + wake_up_interruptible(sock->conn->wait); + avail = UN_BUF_AVAIL(upd); + } + while(todo && avail); + unix_unlock(upd); + return(size - todo); } /* - * We write to our peer's buf. When we connected we ref'd this - * peer so we are safe that the buffer remains, even after the - * peer has disconnected, which we check other ways. + * We write to our peer's buf. When we connected we ref'd this + * peer so we are safe that the buffer remains, even after the + * peer has disconnected, which we check other ways. */ -static int -unix_proto_write(struct socket *sock, char *ubuf, int size, int nonblock) + +static int unix_proto_write(struct socket *sock, char *ubuf, int size, int nonblock) { - struct unix_proto_data *pupd; - int todo, space; - int er; + struct unix_proto_data *pupd; + int todo, space; - if ((todo = size) <= 0) return(0); - if (sock->state != SS_CONNECTED) { - if (sock->state == SS_DISCONNECTING) { - send_sig(SIGPIPE, current, 1); - return(-EPIPE); + if ((todo = size) <= 0) + return(0); + if (sock->state != SS_CONNECTED) + { + if (sock->state == SS_DISCONNECTING) + { + send_sig(SIGPIPE, current, 1); + return(-EPIPE); + } + return(-EINVAL); } - return(-EINVAL); - } - pupd = UN_DATA(sock)->peerupd; /* safer than sock->conn */ + pupd = UN_DATA(sock)->peerupd; /* safer than sock->conn */ - while(!(space = UN_BUF_SPACE(pupd))) { - if (nonblock) return(-EAGAIN); - interruptible_sleep_on(sock->wait); - if (current->signal & ~current->blocked) { - return(-ERESTARTSYS); - } - if (sock->state == SS_DISCONNECTING) { - send_sig(SIGPIPE, current, 1); - return(-EPIPE); + while(!(space = UN_BUF_SPACE(pupd))) + { + if (nonblock) + return(-EAGAIN); + interruptible_sleep_on(sock->wait); + if (current->signal & ~current->blocked) + { + return(-ERESTARTSYS); + } + if (sock->state == SS_DISCONNECTING) + { + send_sig(SIGPIPE, current, 1); + return(-EPIPE); + } } - } - /* - * Copy from the user's buffer to the write buffer, - * watching for wraparound. Then we wake up the reader. - */ +/* + * Copy from the user's buffer to the write buffer, + * watching for wraparound. Then we wake up the reader. + */ - unix_lock(pupd); - - do { - int part, cando; - - if (space <= 0) { - printk("UNIX: write: SPACE IS NEGATIVE!!!\n"); - send_sig(SIGKILL, current, 1); - return(-EPIPE); + unix_lock(pupd); + + do + { + int part, cando; + + if (space <= 0) + { + printk("UNIX: write: SPACE IS NEGATIVE!!!\n"); + send_sig(SIGKILL, current, 1); + return(-EPIPE); + } + + /* + * We may become disconnected inside this loop, so watch + * for it (peerupd is safe until we close). + */ + + if (sock->state == SS_DISCONNECTING) + { + send_sig(SIGPIPE, current, 1); + unix_unlock(pupd); + return(-EPIPE); + } + + if ((cando = todo) > space) + cando = space; + + if (cando >(part = BUF_SIZE - pupd->bp_head)) + cando = part; + + memcpy_fromfs(pupd->buf + pupd->bp_head, ubuf, cando); + pupd->bp_head =(pupd->bp_head + cando) &(BUF_SIZE-1); + ubuf += cando; + todo -= cando; + if (sock->state == SS_CONNECTED) + wake_up_interruptible(sock->conn->wait); + space = UN_BUF_SPACE(pupd); } + while(todo && space); - /* - * We may become disconnected inside this loop, so watch - * for it (peerupd is safe until we close). - */ - if (sock->state == SS_DISCONNECTING) { - send_sig(SIGPIPE, current, 1); - unix_unlock(pupd); - return(-EPIPE); - } - if ((cando = todo) > space) cando = space; - if (cando >(part = BUF_SIZE - pupd->bp_head)) cando = part; - er=verify_area(VERIFY_READ, ubuf, cando); - if(er) - { - unix_unlock(pupd); - return er; - } - memcpy_fromfs(pupd->buf + pupd->bp_head, ubuf, cando); - pupd->bp_head =(pupd->bp_head + cando) &(BUF_SIZE-1); - ubuf += cando; - todo -= cando; - if (sock->state == SS_CONNECTED) - wake_up_interruptible(sock->conn->wait); - space = UN_BUF_SPACE(pupd); - } while(todo && space); - unix_unlock(pupd); - return(size - todo); + unix_unlock(pupd); + return(size - todo); } +/* + * Select on a unix domain socket. + */ -static int -unix_proto_select(struct socket *sock, int sel_type, select_table * wait) +static int unix_proto_select(struct socket *sock, int sel_type, select_table * wait) { - struct unix_proto_data *upd, *peerupd; + struct unix_proto_data *upd, *peerupd; - /* Handle server sockets specially. */ - if (sock->flags & SO_ACCEPTCON) { - if (sel_type == SEL_IN) { - if (sock->iconn) return(1); + /* + * Handle server sockets specially. + */ + if (sock->flags & SO_ACCEPTCON) + { + if (sel_type == SEL_IN) + { + if (sock->iconn) + return(1); + select_wait(sock->wait, wait); + return(sock->iconn ? 1 : 0); + } select_wait(sock->wait, wait); - return(sock->iconn ? 1 : 0); + return(0); } - select_wait(sock->wait, wait); - return(0); - } - if (sel_type == SEL_IN) { - upd = UN_DATA(sock); - if (UN_BUF_AVAIL(upd)) /* even if disconnected */ + if (sel_type == SEL_IN) + { + upd = UN_DATA(sock); + if (UN_BUF_AVAIL(upd)) /* even if disconnected */ + return(1); + else if (sock->state != SS_CONNECTED) + { return(1); - else if (sock->state != SS_CONNECTED) { - return(1); + } + select_wait(sock->wait,wait); + return(0); } - select_wait(sock->wait,wait); - return(0); - } - if (sel_type == SEL_OUT) { - if (sock->state != SS_CONNECTED) { - return(1); - } - peerupd = UN_DATA(sock->conn); - if (UN_BUF_SPACE(peerupd) > 0) return(1); - select_wait(sock->wait,wait); - return(0); - } + + if (sel_type == SEL_OUT) + { + if (sock->state != SS_CONNECTED) + { + return(1); + } + peerupd = UN_DATA(sock->conn); + if (UN_BUF_SPACE(peerupd) > 0) + return(1); + select_wait(sock->wait,wait); + return(0); + } + + /* + * Exceptions - SEL_EX + */ - /* SEL_EX */ - return(0); + return(0); } -static int -unix_proto_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) -{ - struct unix_proto_data *upd, *peerupd; - int er; - - upd = UN_DATA(sock); - peerupd = (sock->state == SS_CONNECTED) ? UN_DATA(sock->conn) : NULL; - - switch(cmd) { - case TIOCINQ: - if (sock->flags & SO_ACCEPTCON) return(-EINVAL); - er=verify_area(VERIFY_WRITE,(void *)arg, sizeof(unsigned long)); - if(er) - return er; - if (UN_BUF_AVAIL(upd) || peerupd) - put_fs_long(UN_BUF_AVAIL(upd),(unsigned long *)arg); - else - put_fs_long(0,(unsigned long *)arg); - break; - case TIOCOUTQ: - if (sock->flags & SO_ACCEPTCON) return(-EINVAL); - er=verify_area(VERIFY_WRITE,(void *)arg, sizeof(unsigned long)); - if(er) - return er; - if (peerupd) put_fs_long(UN_BUF_SPACE(peerupd), - (unsigned long *)arg); - else - put_fs_long(0,(unsigned long *)arg); - break; - default: - return(-EINVAL); - } - return(0); +/* + * ioctl() calls sent to an AF_UNIX socket + */ + +static int unix_proto_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + struct unix_proto_data *upd, *peerupd; + int er; + + upd = UN_DATA(sock); + peerupd = (sock->state == SS_CONNECTED) ? UN_DATA(sock->conn) : NULL; + + switch(cmd) + { + case TIOCINQ: + if (sock->flags & SO_ACCEPTCON) + return(-EINVAL); + er=verify_area(VERIFY_WRITE,(void *)arg, sizeof(unsigned long)); + if(er) + return er; + if (UN_BUF_AVAIL(upd) || peerupd) + put_fs_long(UN_BUF_AVAIL(upd),(unsigned long *)arg); + else + put_fs_long(0,(unsigned long *)arg); + break; + case TIOCOUTQ: + if (sock->flags & SO_ACCEPTCON) + return(-EINVAL); + er=verify_area(VERIFY_WRITE,(void *)arg, sizeof(unsigned long)); + if(er) + return er; + if (peerupd) + put_fs_long(UN_BUF_SPACE(peerupd),(unsigned long *)arg); + else + put_fs_long(0,(unsigned long *)arg); + break; + default: + return(-EINVAL); + } + return(0); } static struct proto_ops unix_proto_ops = { - AF_UNIX, - unix_proto_create, - unix_proto_dup, - unix_proto_release, - unix_proto_bind, - unix_proto_connect, - unix_proto_socketpair, - unix_proto_accept, - unix_proto_getname, - unix_proto_read, - unix_proto_write, - unix_proto_select, - unix_proto_ioctl, - unix_proto_listen, - unix_proto_send, - unix_proto_recv, - unix_proto_sendto, - unix_proto_recvfrom, - unix_proto_shutdown, - unix_proto_setsockopt, - unix_proto_getsockopt, - NULL /* unix_proto_fcntl */ + AF_UNIX, + unix_proto_create, + unix_proto_dup, + unix_proto_release, + unix_proto_bind, + unix_proto_connect, + unix_proto_socketpair, + unix_proto_accept, + unix_proto_getname, + unix_proto_read, + unix_proto_write, + unix_proto_select, + unix_proto_ioctl, + unix_proto_listen, + unix_proto_send, + unix_proto_recv, + unix_proto_sendto, + unix_proto_recvfrom, + unix_proto_shutdown, + unix_proto_setsockopt, + unix_proto_getsockopt, + NULL /* unix_proto_fcntl */ }; +/* + * Initialise the Unix domain protocol. + */ -void -unix_proto_init(struct net_proto *pro) +void unix_proto_init(struct net_proto *pro) { - struct unix_proto_data *upd; + struct unix_proto_data *upd; + + /* + * Tell SOCKET that we are alive... + */ - /* Tell SOCKET that we are alive... */ - (void) sock_register(unix_proto_ops.family, &unix_proto_ops); + (void) sock_register(unix_proto_ops.family, &unix_proto_ops); - for(upd = unix_datas; upd <= last_unix_data; ++upd) { - upd->refcnt = 0; - } + for(upd = unix_datas; upd <= last_unix_data; ++upd) + { + upd->refcnt = 0; + } }