diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/Documentation/filesystems/vfs.txt linux.ac/Documentation/filesystems/vfs.txt --- linux.t2/Documentation/filesystems/vfs.txt Sat Jun 24 13:37:19 2000 +++ linux.ac/Documentation/filesystems/vfs.txt Sun Jun 11 15:28:50 2000 @@ -176,7 +176,7 @@ struct super_operations { void (*read_inode) (struct inode *); - void (*write_inode) (struct inode *); + void (*write_inode) (struct inode *, int); void (*put_inode) (struct inode *); void (*delete_inode) (struct inode *); int (*notify_change) (struct dentry *, struct iattr *); @@ -198,7 +198,8 @@ read. Other members are filled in by this method write_inode: this method is called when the VFS needs to write an - inode to disc + inode to disc. The second parameter indicates whether the write + should be synchronous or not, not all filesystems check this flag. put_inode: called when the VFS inode is removed from the inode cache. This method is optional diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/arch/i386/kernel/setup.c linux.ac/arch/i386/kernel/setup.c --- linux.t2/arch/i386/kernel/setup.c Sat Jun 24 13:49:55 2000 +++ linux.ac/arch/i386/kernel/setup.c Sat Jun 17 18:35:54 2000 @@ -39,10 +39,15 @@ * Detection for Celeron coppermine, identify_cpu() overhauled, * and a few other clean ups. * Dave Jones , April 2000 - * + * Pentium-III code by Ingo Molnar and modifications by Goutham Rao + * Updated to: * Pentium III FXSR, SSE support - * General FPU state handling cleanups * Gareth Hughes , May 2000 + * + * Added proper Cascades CPU and L2 cache detection for Cascades + * and 8-way type cache happy bunch from Intel:^) + * Dragan Stancevic , May 2000 + * */ /* diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/arch/sparc/config.in linux.ac/arch/sparc/config.in --- linux.t2/arch/sparc/config.in Sat Jun 24 13:49:56 2000 +++ linux.ac/arch/sparc/config.in Thu Jun 22 16:29:02 2000 @@ -1,6 +1,6 @@ -# $Id: config.in,v 1.96 2000/06/20 01:10:00 anton Exp $ +# $Id: config.in,v 1.94 2000/06/04 22:23:10 anton Exp $ # For a description of the syntax of this configuration file, -# see the Configure script. +# see Documentation/kbuild/config-language.txt. # mainmenu_name "Linux/SPARC Kernel Configuration" diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/arch/sparc64/kernel/sys_sparc.c linux.ac/arch/sparc64/kernel/sys_sparc.c --- linux.t2/arch/sparc64/kernel/sys_sparc.c Sat Jun 24 13:49:56 2000 +++ linux.ac/arch/sparc64/kernel/sys_sparc.c Mon Jun 19 16:17:49 2000 @@ -1,4 +1,4 @@ -/* $Id: sys_sparc.c,v 1.41 2000/06/22 11:42:25 davem Exp $ +/* $Id: sys_sparc.c,v 1.40 2000/06/19 06:24:37 davem Exp $ * linux/arch/sparc64/kernel/sys_sparc.c * * This file contains various random system calls that @@ -227,7 +227,6 @@ len = PAGE_ALIGN(len); retval = -EINVAL; - down(¤t->mm->mmap_sem); lock_kernel(); if (current->thread.flags & SPARC_FLAG_32BIT) { @@ -241,11 +240,12 @@ goto out_putf; } + down(¤t->mm->mmap_sem); retval = do_mmap(file, addr, len, prot, flags, off); + up(¤t->mm->mmap_sem); out_putf: unlock_kernel(); - up(¤t->mm->mmap_sem); if (file) fput(file); out: diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/arch/sparc64/kernel/sys_sparc32.c linux.ac/arch/sparc64/kernel/sys_sparc32.c --- linux.t2/arch/sparc64/kernel/sys_sparc32.c Sat Jun 24 13:49:56 2000 +++ linux.ac/arch/sparc64/kernel/sys_sparc32.c Mon Jun 19 16:17:49 2000 @@ -1,4 +1,4 @@ -/* $Id: sys_sparc32.c,v 1.152 2000/06/22 17:44:47 davem Exp $ +/* $Id: sys_sparc32.c,v 1.149 2000/06/19 06:24:37 davem Exp $ * sys_sparc32.c: Conversion between 32bit and 64bit native syscalls. * * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) @@ -2624,8 +2624,7 @@ * the cmsg_len for MSG_TRUNC cases, we need not check that case either. */ ucmsg = (struct cmsghdr *) orig_cmsg_uptr; - while(((unsigned long)ucmsg) <= - (((unsigned long)kmsg->msg_control) - sizeof(struct cmsghdr))) { + while(((unsigned long)ucmsg) < ((unsigned long)kmsg->msg_control)) { struct cmsghdr32 *kcmsg32 = (struct cmsghdr32 *) wp; int clen64, clen32; @@ -3029,9 +3028,7 @@ bprm.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *); memset(bprm.page, 0, MAX_ARG_PAGES * sizeof(bprm.page[0])); - lock_kernel(); file = open_exec(filename); - unlock_kernel(); retval = PTR_ERR(file); if (IS_ERR(file)) @@ -3043,10 +3040,12 @@ bprm.loader = 0; bprm.exec = 0; if ((bprm.argc = count32(argv)) < 0) { + allow_write_access(file); fput(file); return bprm.argc; } if ((bprm.envc = count32(envp)) < 0) { + allow_write_access(file); fput(file); return bprm.envc; } @@ -3075,6 +3074,7 @@ out: /* Something went wrong, return the inode and free the argument pages*/ + allow_write_access(bprm.file); if (bprm.file) fput(bprm.file); @@ -3777,6 +3777,8 @@ { return copy_to_user(res32, kres, sizeof(*res32)); } + +extern asmlinkage int sys_nfsservctl(int cmd, void *arg, void *resp); int asmlinkage sys32_nfsservctl(int cmd, struct nfsctl_arg32 *arg32, union nfsctl_res32 *res32) { diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/arch/sparc64/kernel/sys_sunos32.c linux.ac/arch/sparc64/kernel/sys_sunos32.c --- linux.t2/arch/sparc64/kernel/sys_sunos32.c Sat Jun 24 13:49:56 2000 +++ linux.ac/arch/sparc64/kernel/sys_sunos32.c Mon Jun 19 16:17:49 2000 @@ -1,4 +1,4 @@ -/* $Id: sys_sunos32.c,v 1.49 2000/06/22 11:42:25 davem Exp $ +/* $Id: sys_sunos32.c,v 1.48 2000/06/19 06:24:37 davem Exp $ * sys_sunos32.c: SunOS binary compatability layer on sparc64. * * Copyright (C) 1995, 1996, 1997 David S. Miller (davem@caip.rutgers.edu) @@ -68,7 +68,6 @@ struct file *file = NULL; unsigned long retval, ret_type; - down(¤t->mm->mmap_sem); lock_kernel(); if(flags & MAP_NORESERVE) { static int cnt; @@ -102,10 +101,12 @@ flags &= ~_MAP_NEW; flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); + down(¤t->mm->mmap_sem); retval = do_mmap(file, (unsigned long) addr, (unsigned long) len, (unsigned long) prot, (unsigned long) flags, (unsigned long) off); + up(¤t->mm->mmap_sem); if(!ret_type) retval = ((retval < 0xf0000000) ? 0 : retval); out_putf: @@ -113,7 +114,6 @@ fput(file); out: unlock_kernel(); - up(¤t->mm->mmap_sem); return (u32) retval; } diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/drivers/block/rd.c linux.ac/drivers/block/rd.c --- linux.t2/drivers/block/rd.c Sat Jun 24 13:49:56 2000 +++ linux.ac/drivers/block/rd.c Thu Jun 22 16:32:02 2000 @@ -107,7 +107,9 @@ * architecture-specific setup routine (from the stored boot sector * information). */ -int rd_size = 4096; /* Size of the RAM disks */ + +int rd_size = CONFIG_BLK_DEV_RAM_SIZE; /* Size of the RAM disks */ + /* * It would be very desiderable to have a soft-blocksize (that in the case * of the ramdisk driver is also the hardblocksize ;) of PAGE_SIZE because diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/drivers/char/applicom.c linux.ac/drivers/char/applicom.c --- linux.t2/drivers/char/applicom.c Sat Jun 24 13:36:42 2000 +++ linux.ac/drivers/char/applicom.c Sat Jun 17 18:52:50 2000 @@ -226,6 +226,7 @@ continue; } + /* &ac_open as dev_id? David, could you pass me this joint? */ if (request_irq(dev->irq, &ac_interrupt, SA_SHIRQ, "Applicom PCI", &ac_open)) { printk(KERN_INFO "Could not allocate IRQ %d for PCI Applicom device.\n", dev->irq); iounmap(RamIO); diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/drivers/char/i2c-old.c linux.ac/drivers/char/i2c-old.c --- linux.t2/drivers/char/i2c-old.c Sat Jun 24 13:36:43 2000 +++ linux.ac/drivers/char/i2c-old.c Tue Jun 13 16:25:53 2000 @@ -37,8 +37,8 @@ static int bus_count = 0, driver_count = 0; #ifdef CONFIG_VIDEO_BT848 -extern int i2c_tuner_init(void); -extern int msp3400c_init(void); +extern int tuner_init_module(void); +extern int msp3400_init_module(void); #endif #ifdef CONFIG_VIDEO_BUZ extern int saa7111_init(void); @@ -55,8 +55,8 @@ scan ? " (i2c bus scan enabled)" : ""); /* anything to do here ? */ #ifdef CONFIG_VIDEO_BT848 - i2c_tuner_init(); - msp3400c_init(); + tuner_init_module(); + msp3400_init_module(); #endif #ifdef CONFIG_VIDEO_BUZ saa7111_init(); diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/drivers/char/videodev.c linux.ac/drivers/char/videodev.c --- linux.t2/drivers/char/videodev.c Sat Jun 24 13:49:57 2000 +++ linux.ac/drivers/char/videodev.c Thu Jun 22 17:20:11 2000 @@ -62,7 +62,7 @@ #ifdef CONFIG_VIDEO_BT848 -extern int i2c_tuner_init(struct video_init *); +extern int tuner_init_module(struct video_init *); #endif #ifdef CONFIG_VIDEO_BWQCAM extern int init_bw_qcams(struct video_init *); @@ -79,7 +79,7 @@ static struct video_init video_init_list[]={ #ifdef CONFIG_VIDEO_BT848 - {"i2c-tuner", i2c_tuner_init}, + {"i2c-tuner", tuner_init_module}, #endif #ifdef CONFIG_VIDEO_BWQCAM {"bw-qcam", init_bw_qcams}, @@ -286,14 +286,6 @@ PRINT_VID_TYPE(VID_TYPE_MJPEG_ENCODER); out += sprintf (out, "\n"); out += sprintf (out, "hardware : 0x%x\n", vfd->hardware); -#if 0 - out += sprintf (out, "channels : %d\n", d->vcap.channels); - out += sprintf (out, "audios : %d\n", d->vcap.audios); - out += sprintf (out, "maxwidth : %d\n", d->vcap.maxwidth); - out += sprintf (out, "maxheight : %d\n", d->vcap.maxheight); - out += sprintf (out, "minwidth : %d\n", d->vcap.minwidth); - out += sprintf (out, "minheight : %d\n", d->vcap.minheight); -#endif skip: len = out - page; @@ -358,8 +350,6 @@ d->proc_entry = p; d->vdev = vfd; strcpy (d->name, name); - - /* How can I get capability information ? */ list_add (&d->proc_list, &videodev_proc_list); } diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/drivers/net/Config.in linux.ac/drivers/net/Config.in --- linux.t2/drivers/net/Config.in Sat Jun 24 13:49:58 2000 +++ linux.ac/drivers/net/Config.in Mon Jun 5 19:39:44 2000 @@ -197,7 +197,7 @@ bool 'FDDI driver support' CONFIG_FDDI if [ "$CONFIG_FDDI" = "y" ]; then - tristate ' Digital DEFEA and DEFPA adapter support' CONFIG_DEFXX + dep_tristate ' Digital DEFEA and DEFPA adapter support' CONFIG_DEFXX $CONFIG_FDDI tristate ' SysKonnect FDDI PCI support' CONFIG_SKFP fi diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/drivers/net/irda/irport.c linux.ac/drivers/net/irda/irport.c --- linux.t2/drivers/net/irda/irport.c Sat Jun 24 13:36:35 2000 +++ linux.ac/drivers/net/irda/irport.c Tue Jun 13 17:22:32 2000 @@ -941,7 +941,12 @@ switch (cmd) { case SIOCSBANDWIDTH: /* Set bandwidth */ - if (!capable(CAP_NET_ADMIN)) + /* + * This function will also be used by IrLAP to change the + * speed, so we still must allow for speed change within + * interrupt context. + */ + if (!in_interrupt() && !capable(CAP_NET_ADMIN)) return -EPERM; irda_task_execute(self, __irport_change_speed, NULL, NULL, (void *) irq->ifr_baudrate); diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/drivers/net/irda/irtty.c linux.ac/drivers/net/irda/irtty.c --- linux.t2/drivers/net/irda/irtty.c Sat Jun 24 13:36:35 2000 +++ linux.ac/drivers/net/irda/irtty.c Tue Jun 13 17:23:16 2000 @@ -6,7 +6,7 @@ * Status: Experimental. * Author: Dag Brattli * Created at: Tue Dec 9 21:18:38 1997 - * Modified at: Sat Mar 11 07:43:30 2000 + * Modified at: Tue Apr 25 21:17:49 2000 * Modified by: Dag Brattli * Sources: slip.c by Laurence Culhane, * Fred N. van Kempen, @@ -962,7 +962,12 @@ switch (cmd) { case SIOCSBANDWIDTH: /* Set bandwidth */ - if (!capable(CAP_NET_ADMIN)) + /* + * This function will also be used by IrLAP to change the + * speed, so we still must allow for speed change within + * interrupt context. + */ + if (!in_interrupt() && !capable(CAP_NET_ADMIN)) return -EPERM; irda_task_execute(self, irtty_change_speed, NULL, NULL, (void *) irq->ifr_baudrate); diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/drivers/net/irda/nsc-ircc.c linux.ac/drivers/net/irda/nsc-ircc.c --- linux.t2/drivers/net/irda/nsc-ircc.c Sat Jun 24 13:36:35 2000 +++ linux.ac/drivers/net/irda/nsc-ircc.c Tue Jun 13 17:23:24 2000 @@ -56,6 +56,7 @@ #include #include #include +#include #include @@ -1947,8 +1948,15 @@ switch (cmd) { case SIOCSBANDWIDTH: /* Set bandwidth */ - if (!capable(CAP_NET_ADMIN)) + /* + * This function will also be used by IrLAP to change the + * speed, so we still must allow for speed change within + * interrupt context. + */ + if (!in_interrupt() && !capable(CAP_NET_ADMIN)) { + IRDA_DEBUG(0, __FUNCTION__ "(), not capable sysadm\n"); return -EPERM; + } nsc_ircc_change_speed(self, irq->ifr_baudrate); break; case SIOCSMEDIABUSY: /* Set media busy */ diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/drivers/net/irda/toshoboe.c linux.ac/drivers/net/irda/toshoboe.c --- linux.t2/drivers/net/irda/toshoboe.c Sat Jun 24 13:36:35 2000 +++ linux.ac/drivers/net/irda/toshoboe.c Tue Jun 13 17:23:48 2000 @@ -603,7 +603,12 @@ switch (cmd) { case SIOCSBANDWIDTH: /* Set bandwidth */ - if (!capable(CAP_NET_ADMIN)) + /* + * This function will also be used by IrLAP to change the + * speed, so we still must allow for speed change within + * interrupt context. + */ + if (!in_interrupt() && !capable(CAP_NET_ADMIN)) return -EPERM; /* toshoboe_setbaud(self, irq->ifr_baudrate); */ /* Just change speed once - inserted by Paul Bristow */ @@ -717,7 +722,7 @@ self->open = 0; self->stopped = 0; self->pdev = pci_dev; - self->base = pci_dev->resource[0].start; + self->base = pci_resource_start (pci_dev, 0); self->io.sir_base = self->base; self->io.irq = pci_dev->irq; @@ -900,7 +905,6 @@ static void toshoboe_wakeup (struct toshoboe_cb *self) { - struct net_device *dev = self->netdev; unsigned long flags; if (!self->stopped) @@ -952,36 +956,26 @@ struct pci_dev *pci_dev = NULL; int found = 0; - do - { - pci_dev = pci_find_device (PCI_VENDOR_ID_TOSHIBA, - PCI_DEVICE_ID_FIR701, pci_dev); - if (pci_dev) - { + while ((pci_dev = pci_find_device (PCI_VENDOR_ID_TOSHIBA, + PCI_DEVICE_ID_FIR701, pci_dev)) != NULL) { + if (pci_enable_device(pci_dev)) + continue; printk (KERN_WARNING "ToshOboe: Found 701 chip at 0x%0lx irq %d\n", - pci_dev->resource[0].start, + pci_resource_start (pci_dev, 0), pci_dev->irq); if (!toshoboe_open (pci_dev)) found++; - } - - } - while (pci_dev); - + } if (found) - { return 0; - } return -ENODEV; } -#ifdef MODULE -static void -toshoboe_cleanup (void) +static void __exit toshoboe_cleanup (void) { int i; @@ -997,19 +991,8 @@ } - -int -init_module (void) -{ - return toshoboe_init (); -} - - -void -cleanup_module (void) -{ - toshoboe_cleanup (); -} - - +#ifdef MODULE +module_init(toshoboe_init); #endif +module_exit(toshoboe_cleanup); + diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/drivers/net/irda/w83977af_ir.c linux.ac/drivers/net/irda/w83977af_ir.c --- linux.t2/drivers/net/irda/w83977af_ir.c Sat Jun 24 13:36:35 2000 +++ linux.ac/drivers/net/irda/w83977af_ir.c Tue Jun 13 17:23:53 2000 @@ -255,13 +255,12 @@ dev->get_stats = w83977af_net_get_stats; rtnl_lock(); - err = register_netdev(dev); + err = register_netdevice(dev); rtnl_unlock(); if (err) { - ERROR(__FUNCTION__ "(), register_netdev() failed!\n"); + ERROR(__FUNCTION__ "(), register_netdevice() failed!\n"); return -1; } - MESSAGE("IrDA: Registered device %s\n", dev->name); return 0; @@ -302,7 +301,7 @@ /* Release the PORT that this driver is using */ IRDA_DEBUG(0 , __FUNCTION__ "(), Releasing Region %03x\n", - self->io.fir_base); + self->io.fir_base); release_region(self->io.fir_base, self->io.fir_ext); if (self->tx_buff.head) @@ -1332,7 +1331,12 @@ switch (cmd) { case SIOCSBANDWIDTH: /* Set bandwidth */ - if (!capable(CAP_NET_ADMIN)) + /* + * This function will also be used by IrLAP to change the + * speed, so we still must allow for speed change within + * interrupt context. + */ + if (!in_interrupt() && !capable(CAP_NET_ADMIN)) return -EPERM; w83977af_change_speed(self, irq->ifr_baudrate); break; diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/Config.in linux.ac/fs/Config.in --- linux.t2/fs/Config.in Sat Jun 24 13:36:11 2000 +++ linux.ac/fs/Config.in Fri Jun 9 15:51:51 2000 @@ -78,6 +78,7 @@ dep_tristate 'NFS server support' CONFIG_NFSD $CONFIG_INET dep_mbool ' Provide NFSv3 server support' CONFIG_NFSD_V3 $CONFIG_NFSD + dep_mbool ' Provide NFS server over TCP support (DEVELOPER-ONLY)' CONFIG_NFSD_TCP $CONFIG_NFSD $CONFIG_EXPERIMENTAL if [ "$CONFIG_NFS_FS" = "y" -o "$CONFIG_NFSD" = "y" ]; then define_tristate CONFIG_SUNRPC y diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/adfs/adfs.h linux.ac/fs/adfs/adfs.h --- linux.t2/fs/adfs/adfs.h Sat Jun 24 13:36:12 2000 +++ linux.ac/fs/adfs/adfs.h Sun Jun 11 15:28:11 2000 @@ -73,7 +73,7 @@ #endif struct inode *adfs_iget(struct super_block *sb, struct object_info *obj); void adfs_read_inode(struct inode *inode); -void adfs_write_inode(struct inode *inode); +void adfs_write_inode(struct inode *inode, int unused); int adfs_notify_change(struct dentry *dentry, struct iattr *attr); /* map.c */ diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/adfs/inode.c linux.ac/fs/adfs/inode.c --- linux.t2/fs/adfs/inode.c Sat Jun 24 13:50:01 2000 +++ linux.ac/fs/adfs/inode.c Sat Jun 17 20:17:04 2000 @@ -349,7 +349,7 @@ * The adfs-specific inode data has already been updated by * adfs_notify_change() */ -void adfs_write_inode(struct inode *inode) +void adfs_write_inode(struct inode *inode, int unused) { struct super_block *sb = inode->i_sb; struct object_info obj; diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/affs/inode.c linux.ac/fs/affs/inode.c --- linux.t2/fs/affs/inode.c Sat Jun 24 13:50:01 2000 +++ linux.ac/fs/affs/inode.c Sun Jun 11 15:28:11 2000 @@ -191,7 +191,7 @@ } void -affs_write_inode(struct inode *inode) +affs_write_inode(struct inode *inode, int unused) { struct buffer_head *bh; struct file_end *file_end; diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/bfs/inode.c linux.ac/fs/bfs/inode.c --- linux.t2/fs/bfs/inode.c Sat Jun 24 13:50:01 2000 +++ linux.ac/fs/bfs/inode.c Sun Jun 11 15:27:57 2000 @@ -84,7 +84,7 @@ brelse(bh); } -static void bfs_write_inode(struct inode * inode) +static void bfs_write_inode(struct inode * inode, int unused) { unsigned long ino = inode->i_ino; kdev_t dev = inode->i_dev; diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/binfmt_elf.c linux.ac/fs/binfmt_elf.c --- linux.t2/fs/binfmt_elf.c Sat Jun 24 13:50:01 2000 +++ linux.ac/fs/binfmt_elf.c Wed Jun 21 17:29:10 2000 @@ -666,9 +666,8 @@ interpreter, &interp_load_addr); - lock_kernel(); + allow_write_access(interpreter); fput(interpreter); - unlock_kernel(); kfree(elf_interpreter); if (elf_entry == ~0UL) { @@ -755,9 +754,8 @@ /* error cleanup */ out_free_dentry: - lock_kernel(); + allow_write_access(interpreter); fput(interpreter); - unlock_kernel(); out_free_interp: if (elf_interpreter) kfree(elf_interpreter); diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/binfmt_em86.c linux.ac/fs/binfmt_em86.c --- linux.t2/fs/binfmt_em86.c Sat Jun 24 13:36:11 2000 +++ linux.ac/fs/binfmt_em86.c Mon Jun 12 12:33:49 2000 @@ -43,6 +43,7 @@ } bprm->sh_bang++; /* Well, the bang-shell is implicit... */ + allow_write_access(bprm->file); fput(bprm->file); bprm->file = NULL; diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/binfmt_misc.c linux.ac/fs/binfmt_misc.c --- linux.t2/fs/binfmt_misc.c Sat Jun 24 13:36:11 2000 +++ linux.ac/fs/binfmt_misc.c Mon Jun 12 12:33:49 2000 @@ -201,6 +201,7 @@ if (!fmt) goto _ret; + allow_write_access(bprm->file); fput(bprm->file); bprm->file = NULL; diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/binfmt_script.c linux.ac/fs/binfmt_script.c --- linux.t2/fs/binfmt_script.c Sat Jun 24 13:36:11 2000 +++ linux.ac/fs/binfmt_script.c Mon Jun 12 12:33:49 2000 @@ -29,6 +29,7 @@ */ bprm->sh_bang++; + allow_write_access(bprm->file); fput(bprm->file); bprm->file = NULL; diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/block_dev.c linux.ac/fs/block_dev.c --- linux.t2/fs/block_dev.c Sat Jun 24 13:36:10 2000 +++ linux.ac/fs/block_dev.c Sat Jun 10 21:51:07 2000 @@ -313,7 +313,7 @@ * since the vma has no handle. */ -static int block_fsync(struct file *filp, struct dentry *dentry) +static int block_fsync(struct file *filp, struct dentry *dentry, int datasync) { return fsync_dev(dentry->d_inode->i_rdev); } @@ -597,6 +597,8 @@ ret = bdev->bd_op->open(fake_inode, &fake_file); if (!ret) atomic_inc(&bdev->bd_openers); + else if (!atomic_read(&bdev->bd_openers)) + bdev->bd_op = NULL; iput(fake_inode); } } @@ -617,6 +619,8 @@ ret = bdev->bd_op->open(inode,filp); if (!ret) atomic_inc(&bdev->bd_openers); + else if (!atomic_read(&bdev->bd_openers)) + bdev->bd_op = NULL; } up(&bdev->bd_sem); return ret; diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/buffer.c linux.ac/fs/buffer.c --- linux.t2/fs/buffer.c Sat Jun 24 13:36:10 2000 +++ linux.ac/fs/buffer.c Mon Jun 19 19:54:57 2000 @@ -68,6 +68,8 @@ * lru_list_lock > hash_table_lock > free_list_lock > unused_list_lock */ +#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_inode_buffers) + /* * Hash table gook.. */ @@ -323,7 +325,7 @@ * filp may be NULL if called via the msync of a vma. */ -int file_fsync(struct file *filp, struct dentry *dentry) +int file_fsync(struct file *filp, struct dentry *dentry, int datasync) { struct inode * inode = dentry->d_inode; struct super_block * sb; @@ -332,7 +334,7 @@ lock_kernel(); /* sync the inode to buffers */ - write_inode_now(inode); + write_inode_now(inode, 0); /* sync the superblock to buffers */ sb = inode->i_sb; @@ -360,12 +362,7 @@ goto out; dentry = file->f_dentry; - if (!dentry) - goto out_putf; - inode = dentry->d_inode; - if (!inode) - goto out_putf; err = -EINVAL; if (!file->f_op || !file->f_op->fsync) @@ -373,7 +370,7 @@ /* We need to protect against concurrent writers.. */ down(&inode->i_sem); - err = file->f_op->fsync(file, dentry); + err = file->f_op->fsync(file, dentry, 0); up(&inode->i_sem); out_putf: @@ -395,20 +392,14 @@ goto out; dentry = file->f_dentry; - if (!dentry) - goto out_putf; - inode = dentry->d_inode; - if (!inode) - goto out_putf; err = -EINVAL; if (!file->f_op || !file->f_op->fsync) goto out_putf; - /* this needs further work, at the moment it is identical to fsync() */ down(&inode->i_sem); - err = file->f_op->fsync(file, dentry); + err = file->f_op->fsync(file, dentry, 1); up(&inode->i_sem); out_putf: @@ -535,8 +526,7 @@ * As we don't lock buffers (unless we are reading them, that is), * something might happen to it while we sleep (ie a read-error * will force it bad). This shouldn't really happen currently, but - * the code is ready. - */ + * the code is ready. */ struct buffer_head * get_hash_table(kdev_t dev, int block, int size) { struct buffer_head **head = &hash(dev, block); @@ -574,6 +564,42 @@ return 0; } +void buffer_insert_inode_queue(struct buffer_head *bh, struct inode *inode) +{ + spin_lock(&lru_list_lock); + if (bh->b_inode) + list_del(&bh->b_inode_buffers); + bh->b_inode = inode; + list_add(&bh->b_inode_buffers, &inode->i_dirty_buffers); + spin_unlock(&lru_list_lock); +} + +/* The caller must have the lru_list lock before calling the + remove_inode_queue functions. */ +static void __remove_inode_queue(struct buffer_head *bh) +{ + bh->b_inode = NULL; + list_del(&bh->b_inode_buffers); +} + +static inline void remove_inode_queue(struct buffer_head *bh) +{ + if (bh->b_inode) + __remove_inode_queue(bh); +} + +int inode_has_buffers(struct inode *inode) +{ + int ret; + + spin_lock(&lru_list_lock); + ret = !list_empty(&inode->i_dirty_buffers); + spin_unlock(&lru_list_lock); + + return ret; +} + + /* If invalidate_buffers() will trash dirty buffers, it means some kind of fs corruption is going on. Trashing dirty data always imply losing information that was supposed to be just stored on the physical layer @@ -801,6 +827,137 @@ return; } + +/* + * Synchronise all the inode's dirty buffers to the disk. + * + * We have conflicting pressures: we want to make sure that all + * initially dirty buffers get waited on, but that any subsequently + * dirtied buffers don't. After all, we don't want fsync to last + * forever if somebody is actively writing to the file. + * + * Do this in two main stages: first we copy dirty buffers to a + * temporary inode list, queueing the writes as we go. Then we clean + * up, waiting for those writes to complete. + * + * During this second stage, any subsequent updates to the file may end + * up refiling the buffer on the original inode's dirty list again, so + * there is a chance we will end up with a buffer queued for write but + * not yet completed on that list. So, as a final cleanup we go through + * the osync code to catch these locked, dirty buffers without requeuing + * any newly dirty buffers for write. + */ + +int fsync_inode_buffers(struct inode *inode) +{ + struct buffer_head *bh; + struct inode tmp; + int err = 0, err2; + + INIT_LIST_HEAD(&tmp.i_dirty_buffers); + + spin_lock(&lru_list_lock); + + while (!list_empty(&inode->i_dirty_buffers)) { + bh = BH_ENTRY(inode->i_dirty_buffers.next); + list_del(&bh->b_inode_buffers); + if (!buffer_dirty(bh) && !buffer_locked(bh)) + bh->b_inode = NULL; + else { + bh->b_inode = &tmp; + list_add(&bh->b_inode_buffers, &tmp.i_dirty_buffers); + atomic_inc(&bh->b_count); + if (buffer_dirty(bh)) { + spin_unlock(&lru_list_lock); + ll_rw_block(WRITE, 1, &bh); + spin_lock(&lru_list_lock); + } + } + } + + while (!list_empty(&tmp.i_dirty_buffers)) { + bh = BH_ENTRY(tmp.i_dirty_buffers.prev); + remove_inode_queue(bh); + spin_unlock(&lru_list_lock); + wait_on_buffer(bh); + if (!buffer_uptodate(bh)) + err = -EIO; + brelse(bh); + spin_lock(&lru_list_lock); + } + + spin_unlock(&lru_list_lock); + err2 = osync_inode_buffers(inode); + + if (err) + return err; + else + return err2; +} + + +/* + * osync is designed to support O_SYNC io. It waits synchronously for + * all already-submitted IO to complete, but does not queue any new + * writes to the disk. + * + * To do O_SYNC writes, just queue the buffer writes with ll_rw_block as + * you dirty the buffers, and then use osync_inode_buffers to wait for + * completion. Any other dirty buffers which are not yet queued for + * write will not be flushed to disk by the osync. + */ + +int osync_inode_buffers(struct inode *inode) +{ + struct buffer_head *bh; + struct list_head *list; + int err = 0; + + spin_lock(&lru_list_lock); + + repeat: + + for (list = inode->i_dirty_buffers.prev; + bh = BH_ENTRY(list), list != &inode->i_dirty_buffers; + list = bh->b_inode_buffers.prev) { + if (buffer_locked(bh)) { + atomic_inc(&bh->b_count); + spin_unlock(&lru_list_lock); + wait_on_buffer(bh); + brelse(bh); + if (!buffer_uptodate(bh)) + err = -EIO; + spin_lock(&lru_list_lock); + goto repeat; + } + } + + spin_unlock(&lru_list_lock); + return err; +} + + +/* + * Invalidate any and all dirty buffers on a given inode. We are + * probably unmounting the fs, but that doesn't mean we have already + * done a sync(). Just drop the buffers from the inode list. + */ + +void invalidate_inode_buffers(struct inode *inode) +{ + struct list_head *list, *next; + + spin_lock(&lru_list_lock); + list = inode->i_dirty_buffers.next; + while (list != &inode->i_dirty_buffers) { + next = list->next; + remove_inode_queue(BH_ENTRY(list)); + list = next; + } + spin_unlock(&lru_list_lock); +} + + /* * Ok, this is getblk, and it isn't very clear, again to hinder * race-conditions. Most of the code is seldom used, (ie repeating), @@ -932,6 +1089,8 @@ __remove_from_lru_list(bh, bh->b_list); bh->b_list = dispose; __insert_into_lru_list(bh, dispose); + if (dispose == BUF_CLEAN) + remove_inode_queue(bh); } } @@ -968,6 +1127,7 @@ if (!atomic_dec_and_test(&buf->b_count) || buffer_locked(buf)) goto in_use; __hash_unlink(buf); + remove_inode_queue(buf); write_unlock(&hash_table_lock); __remove_from_lru_list(buf, buf->b_list); spin_unlock(&lru_list_lock); @@ -1068,6 +1228,8 @@ */ static __inline__ void __put_unused_buffer_head(struct buffer_head * bh) { + if (bh->b_inode) + BUG(); if (nr_unused_buffer_heads >= MAX_UNUSED_BUFFERS) { kmem_cache_free(bh_cachep, bh); } else { @@ -1281,6 +1443,46 @@ } } +/** + * discard_buffer - discard that buffer without doing any IO + * @bh: buffer to discard + * + * This function removes a buffer from all the queues, without doing + * any IO, we are not interested in the contents of the buffer. This + * function can block if the buffer is locked. + */ +static inline struct buffer_head *discard_buffer(struct buffer_head * bh) +{ + struct buffer_head *next; + + if (bh->b_dev == B_FREE) + BUG(); + + next = bh->b_this_page; + + unmap_buffer(bh); + + spin_lock(&lru_list_lock); + write_lock(&hash_table_lock); + spin_lock(&unused_list_lock); + + if (atomic_read(&bh->b_count)) + BUG(); + + __hash_unlink(bh); + write_unlock(&hash_table_lock); + + remove_inode_queue(bh); + __remove_from_lru_list(bh, bh->b_list); + spin_unlock(&lru_list_lock); + + __put_unused_buffer_head(bh); + spin_unlock(&unused_list_lock); + + return next; +} + + /* * We don't have to release all buffers here, but * we have to be sure that no dirty buffer is left @@ -1313,26 +1515,45 @@ bh = next; } while (bh != head); - /* - * subtle. We release buffer-heads only if this is - * the 'final' flushpage. We have invalidated the get_block - * cached value unconditionally, so real IO is not - * possible anymore. - * - * If the free doesn't work out, the buffers can be - * left around - they just turn into anonymous buffers - * instead. - */ - if (!offset) { - if (!try_to_free_buffers(page, 0)) { - atomic_inc(&buffermem_pages); - return 0; - } - } - return 1; } +/** + * block_destroy_buffers - Will destroy the contents of all the + * buffers in this page + * @page: page to examine the buffers + * + * This function destroy all the buffers in one page without making + * any IO. The function can block due to the fact that discad_bufferr + * can block. + */ +void block_destroy_buffers(struct page *page) +{ + struct buffer_head *bh, *head; + + if (!PageLocked(page)) + BUG(); + if (!page->buffers) + return; + + head = page->buffers; + bh = head; + do { + /* We need to get the next buffer from discard buffer + * because discard buffer can block and anybody else + * can change the buffer list under our feet. + */ + bh = discard_buffer(bh); + }while (bh != head); + + /* Wake up anyone waiting for buffer heads */ + wake_up(&buffer_wait); + + /* And free the page */ + page->buffers = NULL; + page_cache_release(page); +} + static void create_empty_buffers(struct page *page, struct inode *inode, unsigned long blocksize) { struct buffer_head *bh, *head, *tail; @@ -1433,7 +1654,7 @@ unsigned long block; int err = 0; unsigned blocksize, bbits; - struct buffer_head *bh, *head, *wait[2], **wait_bh=wait; + struct buffer_head *bh, *head, *wait[MAX_BUF_PER_PAGE], **wait_bh=wait; char *kaddr = (char *)kmap(page); blocksize = inode->i_sb->s_blocksize; @@ -1507,6 +1728,7 @@ } else { set_bit(BH_Uptodate, &bh->b_state); if (!atomic_set_buffer_dirty(bh)) { + buffer_insert_inode_queue(bh, inode); __mark_dirty(bh, 0); need_balance_dirty = 1; } @@ -1799,6 +2021,7 @@ } spin_unlock(&unused_list_lock); + wake_up(&buffer_wait); return iosize; } @@ -1935,6 +2158,8 @@ __put_unused_buffer_head(bh[bhind]); } spin_unlock(&unused_list_lock); + wake_up(&buffer_wait); + goto finished; } @@ -2112,6 +2337,12 @@ } /* + * Can the buffer be thrown out? + */ +#define BUFFER_BUSY_BITS ((1<b_count) | ((bh)->b_state & BUFFER_BUSY_BITS)) + +/* * Sync all the buffers on one page.. * * If we have old buffers that are locked, we'll @@ -2121,7 +2352,7 @@ * This all is required so that we can free up memory * later. */ -static void sync_page_buffers(struct buffer_head *bh, int wait) +static int sync_page_buffers(struct buffer_head *bh, int wait) { struct buffer_head * tmp = bh; @@ -2134,13 +2365,17 @@ } else if (buffer_dirty(p)) ll_rw_block(WRITE, 1, &p); } while (tmp != bh); -} -/* - * Can the buffer be thrown out? - */ -#define BUFFER_BUSY_BITS ((1<b_count) | ((bh)->b_state & BUFFER_BUSY_BITS)) + do { + struct buffer_head *p = tmp; + tmp = tmp->b_this_page; + if (buffer_busy(p)) + return 0; + } while (tmp != bh); + + /* Success. Now try_to_free_buffers can free the page. */ + return 1; +} /* * try_to_free_buffers() checks if all the buffers on this particular page @@ -2158,6 +2393,7 @@ struct buffer_head * tmp, * bh = page->buffers; int index = BUFSIZE_INDEX(bh->b_size); +again: spin_lock(&lru_list_lock); write_lock(&hash_table_lock); spin_lock(&free_list[index].lock); @@ -2179,8 +2415,10 @@ /* The buffer can be either on the regular * queues or on the free list.. */ - if (p->b_dev != B_FREE) + if (p->b_dev != B_FREE) { + remove_inode_queue(p); __remove_from_queues(p); + } else __remove_from_free_list(p, index); __put_unused_buffer_head(p); @@ -2203,7 +2441,8 @@ spin_unlock(&free_list[index].lock); write_unlock(&hash_table_lock); spin_unlock(&lru_list_lock); - sync_page_buffers(bh, wait); + if (sync_page_buffers(bh, wait)) + goto again; return 0; } @@ -2499,7 +2738,7 @@ * the syscall above, but now we launch it ourselves internally with * kernel_thread(...) directly after the first thread in init/main.c */ -int bdflush(void * unused) +int bdflush(void *sem) { struct task_struct *tsk = current; int flushed; @@ -2521,6 +2760,8 @@ recalc_sigpending(tsk); spin_unlock_irq(&tsk->sigmask_lock); + up((struct semaphore *)sem); + for (;;) { CHECK_EMERGENCY_SYNC @@ -2555,7 +2796,7 @@ * You don't need to change your userspace configuration since * the userspace `update` will do_exit(0) at the first sys_bdflush(). */ -int kupdate(void * unused) +int kupdate(void *sem) { struct task_struct * tsk = current; int interval; @@ -2571,6 +2812,8 @@ recalc_sigpending(tsk); spin_unlock_irq(&tsk->sigmask_lock); + up((struct semaphore *)sem); + for (;;) { /* update interval */ interval = bdf_prm.b_un.interval; @@ -2604,8 +2847,11 @@ static int __init bdflush_init(void) { - kernel_thread(bdflush, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND); - kernel_thread(kupdate, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND); + DECLARE_MUTEX_LOCKED(sem); + kernel_thread(bdflush, &sem, CLONE_FS | CLONE_FILES | CLONE_SIGHAND); + down(&sem); + kernel_thread(kupdate, &sem, CLONE_FS | CLONE_FILES | CLONE_SIGHAND); + down(&sem); return 0; } diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/coda/dir.c linux.ac/fs/coda/dir.c --- linux.t2/fs/coda/dir.c Sat Jun 24 13:50:01 2000 +++ linux.ac/fs/coda/dir.c Tue Jun 13 16:35:38 2000 @@ -55,7 +55,7 @@ struct dentry *open_dentry); static int coda_venus_readdir(struct file *filp, void *dirent, filldir_t filldir); -int coda_fsync(struct file *, struct dentry *dentry); +int coda_fsync(struct file *, struct dentry *dentry, int); int coda_hasmknod = 0; diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/coda/file.c linux.ac/fs/coda/file.c --- linux.t2/fs/coda/file.c Sat Jun 24 13:50:01 2000 +++ linux.ac/fs/coda/file.c Tue Jun 13 16:35:38 2000 @@ -40,7 +40,7 @@ } /* exported from this file (used for dirs) */ -int coda_fsync(struct file *coda_file, struct dentry *coda_dentry) +int coda_fsync(struct file *coda_file, struct dentry *coda_dentry, int datasync) { struct inode *inode = coda_dentry->d_inode; struct dentry cont_dentry; @@ -60,10 +60,10 @@ cont_dentry.d_inode = (struct inode *)inode->i_mapping->host; down(&cont_dentry.d_inode->i_sem); - result = file_fsync(NULL, &cont_dentry); + result = file_fsync(NULL, &cont_dentry, datasync); up(&cont_dentry.d_inode->i_sem); - if ( result == 0 ) { + if ( !datasync && result == 0 ) { lock_kernel(); result = venus_fsync(inode->i_sb, coda_i2f(inode)); unlock_kernel(); diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/devfs/base.c linux.ac/fs/devfs/base.c --- linux.t2/fs/devfs/base.c Sat Jun 24 13:50:01 2000 +++ linux.ac/fs/devfs/base.c Sat Jun 17 18:52:52 2000 @@ -451,31 +451,6 @@ Added CONFIG_DEVFS_MOUNT. Work sponsored by SGI. v0.96 - 20000608 Richard Gooch - Disabled multi-mount capability (use VFS bindings instead). - Work sponsored by SGI. - v0.97 - 20000610 Richard Gooch - Switched to FS_SINGLE to disable multi-mounts. - 20000612 Richard Gooch - Removed module support. - Removed multi-mount code. - Removed compatibility macros: VFS has changed too much. - Work sponsored by SGI. - v0.98 - 20000614 Richard Gooch - Merged devfs inode into devfs entry. - Work sponsored by SGI. - v0.99 - 20000619 Richard Gooch - Removed dead code in which used to call - . - Work sponsored by SGI. - v0.100 - 20000621 Richard Gooch - Changed interface to . - Work sponsored by SGI. - v0.101 */ #include #include @@ -510,9 +485,29 @@ #include #include -#define DEVFS_VERSION "0.101 (20000621)" +#define DEVFS_VERSION "0.96 (20000430)" -#define DEVFS_NAME "devfs" +#ifndef DEVFS_NAME +# define DEVFS_NAME "devfs" +#endif + +/* Compatibility for 2.2.x kernel series */ +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,3,1)) +# define init_waitqueue_head(p) init_waitqueue(p) +# define DECLARE_WAITQUEUE(wait, p) struct wait_queue wait = {p, NULL} +typedef struct wait_queue *wait_queue_head_t; +#endif +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,3,6)) +# define D_ALLOC_ROOT(inode) d_alloc_root (inode, NULL) +#else +# define D_ALLOC_ROOT(inode) d_alloc_root (inode) +#endif +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,3,13)) +# define SETUP_STATIC +# define __setup(a,b) +#else +# define SETUP_STATIC static +#endif #define INODE_TABLE_INC 250 #define FIRST_INODE 1 @@ -609,19 +604,6 @@ gid_t gid; }; -struct devfs_inode /* This structure is for "persistent" inode storage */ -{ - time_t atime; - time_t mtime; - time_t ctime; - unsigned int ino; /* Inode number as seen in the VFS */ - struct dentry *dentry; - umode_t mode; - uid_t uid; - gid_t gid; - nlink_t nlink; -}; - struct devfs_entry { void *info; @@ -637,7 +619,8 @@ struct devfs_entry *next; /* Next entry in the parent directory */ struct devfs_entry *parent; /* The parent directory */ struct devfs_entry *slave; /* Another entry to unregister */ - struct devfs_inode inode; + struct devfs_inode *first_inode; + struct devfs_inode *last_inode; umode_t mode; unsigned short namelen; /* I think 64k+ filenames are a way off... */ unsigned char registered:1; @@ -651,6 +634,26 @@ /* The root of the device tree */ static struct devfs_entry *root_entry = NULL; +struct devfs_inode /* This structure is for "persistent" inode storage */ +{ + time_t atime; + time_t mtime; + time_t ctime; + unsigned int ino; /* Inode number as seen in the VFS */ + struct devfs_entry *de; + struct fs_info *fs_info; + struct devfs_inode *prev; /* This pair are used to associate a list of */ + struct devfs_inode *next; /* inodes (one per FS) for a devfs entry */ + struct dentry *dentry; +#ifdef CONFIG_DEVFS_TUNNEL + struct dentry *covered; +#endif + umode_t mode; + uid_t uid; + gid_t gid; + nlink_t nlink; +}; + struct devfsd_buf_entry { void *data; @@ -664,7 +667,7 @@ { unsigned int num_inodes; /* Number of inodes created */ unsigned int table_size; /* Size of the inode pointer table */ - struct devfs_entry **table; + struct devfs_inode **table; struct super_block *sb; volatile struct devfsd_buf_entry *devfsd_buffer; volatile unsigned int devfsd_buf_in; @@ -677,15 +680,23 @@ atomic_t devfsd_overrun_count; wait_queue_head_t devfsd_wait_queue; wait_queue_head_t revalidate_wait_queue; + struct fs_info *prev; + struct fs_info *next; + unsigned char require_explicit:1; }; -static struct fs_info fs_info; +static struct fs_info *first_fs = NULL; +static struct fs_info *last_fs = NULL; static unsigned int next_devnum_char = MIN_DEVNUM; static unsigned int next_devnum_block = MIN_DEVNUM; static const int devfsd_buf_size = PAGE_SIZE / sizeof(struct devfsd_buf_entry); #ifdef CONFIG_DEVFS_DEBUG +# ifdef MODULE +unsigned int devfs_debug = DEBUG_NONE; +# else static unsigned int devfs_debug_init __initdata = DEBUG_NONE; static unsigned int devfs_debug = DEBUG_NONE; +# endif #endif #ifdef CONFIG_DEVFS_MOUNT @@ -759,40 +770,15 @@ static struct devfs_entry *create_entry (struct devfs_entry *parent, const char *name,unsigned int namelen) { - struct devfs_entry *new, **table; + struct devfs_entry *new; - /* First ensure table size is enough */ - if (fs_info.num_inodes >= fs_info.table_size) - { - if ( ( table = kmalloc (sizeof *table * - (fs_info.table_size + INODE_TABLE_INC), - GFP_KERNEL) ) == NULL ) return NULL; - fs_info.table_size += INODE_TABLE_INC; -#ifdef CONFIG_DEVFS_DEBUG - if (devfs_debug & DEBUG_I_CREATE) - printk ("%s: create_entry(): grew inode table to: %u entries\n", - DEVFS_NAME, fs_info.table_size); -#endif - if (fs_info.table) - { - memcpy (table, fs_info.table, sizeof *table *fs_info.num_inodes); - kfree (fs_info.table); - } - fs_info.table = table; - } if ( name && (namelen < 1) ) namelen = strlen (name); if ( ( new = kmalloc (sizeof *new + namelen, GFP_KERNEL) ) == NULL ) return NULL; - /* Magic: this will set the ctime to zero, thus subsequent lookups will - trigger the call to */ memset (new, 0, sizeof *new + namelen); new->parent = parent; if (name) memcpy (new->name, name, namelen); new->namelen = namelen; - new->inode.ino = fs_info.num_inodes + FIRST_INODE; - new->inode.nlink = 1; - fs_info.table[fs_info.num_inodes] = new; - ++fs_info.num_inodes; if (parent == NULL) return new; new->prev = parent->u.dir.last; /* Insert into the parent directory's list of children */ @@ -802,36 +788,6 @@ return new; } /* End Function create_entry */ -static void update_devfs_inode_from_entry (struct devfs_entry *de) -{ - if (de == NULL) return; - if ( S_ISDIR (de->mode) ) - { - de->inode.mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; - de->inode.uid = 0; - de->inode.gid = 0; - } - else if ( S_ISLNK (de->mode) ) - { - de->inode.mode = S_IFLNK | S_IRUGO | S_IXUGO; - de->inode.uid = 0; - de->inode.gid = 0; - } - else if ( S_ISFIFO (de->mode) ) - { - de->inode.mode = de->mode; - de->inode.uid = de->u.fifo.uid; - de->inode.gid = de->u.fifo.gid; - } - else - { - if (de->u.fcb.auto_owner) - de->inode.mode = (de->mode & ~S_IALLUGO) | S_IRUGO | S_IWUGO; - else de->inode.mode = de->mode; - de->inode.uid = de->u.fcb.default_uid; - de->inode.gid = de->u.fcb.default_gid; - } -} /* End Function update_devfs_inode_from_entry */ /** * get_root_entry - Get the root devfs entry. @@ -848,8 +804,6 @@ if ( ( root_entry = create_entry (NULL, NULL, 0) ) == NULL ) return NULL; root_entry->registered = TRUE; root_entry->mode = S_IFDIR; - /* Force an inode update, because lookup() is never done for the root */ - update_devfs_inode_from_entry (root_entry); /* And create the entry for ".devfsd" */ if ( ( new = create_entry (root_entry, ".devfsd", 0) ) == NULL ) return NULL; @@ -1048,7 +1002,7 @@ return find_by_dev (root_entry, major, minor, type); } /* End Function find_entry */ -static struct devfs_entry *get_devfs_entry_from_vfs_inode (struct inode *inode) +static struct devfs_inode *get_devfs_inode_from_vfs_inode (struct inode *inode) { struct fs_info *fs_info; @@ -1058,7 +1012,7 @@ if (fs_info == NULL) return NULL; if (inode->i_ino - FIRST_INODE >= fs_info->num_inodes) return NULL; return fs_info->table[inode->i_ino - FIRST_INODE]; -} /* End Function get_devfs_entry_from_vfs_inode */ +} /* End Function get_devfs_inode_from_vfs_inode */ /** @@ -1068,17 +1022,21 @@ static void free_dentries (struct devfs_entry *de) { + struct devfs_inode *di; struct dentry *dentry; - dentry = de->inode.dentry; - if (dentry != NULL) + for (di = de->first_inode; di != NULL; di = di->next) { - dget (dentry); - de->inode.dentry = NULL; - /* Forcefully remove the inode */ - if (dentry->d_inode != NULL) dentry->d_inode->i_nlink = 0; - d_drop (dentry); - dput (dentry); + dentry = di->dentry; + if (dentry != NULL) + { + dget (dentry); + di->dentry = NULL; + /* Forcefully remove the inode */ + if (dentry->d_inode != NULL) dentry->d_inode->i_nlink = 0; + d_drop (dentry); + dput (dentry); + } } } /* End Function free_dentries */ @@ -1197,9 +1155,14 @@ static void devfsd_notify (struct devfs_entry *de, unsigned int type, int wait) { - if (devfsd_notify_one (de, type, de->mode, current->euid, - current->egid, &fs_info) && wait) - wait_for_devfsd_finished (&fs_info); + struct fs_info *fs_info; + + for (fs_info = first_fs; fs_info != NULL; fs_info = fs_info->next) + { + if (devfsd_notify_one (de, type, de->mode, current->euid, + current->egid, fs_info) && wait) + wait_for_devfsd_finished (fs_info); + } } /* End Function devfsd_notify */ @@ -1208,10 +1171,15 @@ * @dir: The handle to the parent devfs directory entry. If this is %NULL the * new name is relative to the root of the devfs. * @name: The name of the entry. + * @namelen: The number of characters in @name, not including a %NULL + * terminator. If this is 0, then @name must be %NULL-terminated and the + * length is computed internally. * @flags: A set of bitwise-ORed flags (DEVFS_FL_*). * @major: The major number. Not needed for regular files. * @minor: The minor number. Not needed for regular files. * @mode: The default file mode. + * @uid: The default UID of the file. + * @guid: The default GID of the file. * @ops: The &file_operations or &block_device_operations structure. * This must not be externally deallocated. * @info: An arbitrary pointer which will be written to the @private_data @@ -1223,10 +1191,12 @@ * On failure %NULL is returned. */ -devfs_handle_t devfs_register (devfs_handle_t dir, const char *name, +devfs_handle_t devfs_register (devfs_handle_t dir, + const char *name, unsigned int namelen, unsigned int flags, unsigned int major, unsigned int minor, - umode_t mode, void *ops, void *info) + umode_t mode, uid_t uid, gid_t gid, + void *ops, void *info) { int is_new; struct devfs_entry *de; @@ -1261,6 +1231,7 @@ DEVFS_NAME, name); return NULL; } + if (namelen < 1) namelen = strlen (name); if ( S_ISCHR (mode) && (flags & DEVFS_FL_AUTO_DEVNUM) ) { if (next_devnum_char >= MAX_DEVNUM) @@ -1285,8 +1256,7 @@ minor = next_devnum_block & 0xff; ++next_devnum_block; } - de = search_for_entry (dir, name, strlen (name), TRUE, TRUE, &is_new, - FALSE); + de = search_for_entry (dir, name, namelen, TRUE, TRUE, &is_new, FALSE); if (de == NULL) { printk ("%s: devfs_register(): could not create entry: \"%s\"\n", @@ -1314,6 +1284,8 @@ DEVFS_NAME, name); return NULL; } + /* If entry already exists free any dentries associated with it */ + if (de->registered) free_dentries (de); } de->registered = TRUE; if ( S_ISCHR (mode) || S_ISBLK (mode) ) @@ -1330,16 +1302,8 @@ } de->info = info; de->mode = mode; - if (flags & DEVFS_FL_CURRENT_OWNER) - { - de->u.fcb.default_uid = current->uid; - de->u.fcb.default_gid = current->gid; - } - else - { - de->u.fcb.default_uid = 0; - de->u.fcb.default_gid = 0; - } + de->u.fcb.default_uid = uid; + de->u.fcb.default_gid = gid; de->registered = TRUE; de->u.fcb.ops = ops; de->u.fcb.auto_owner = (flags & DEVFS_FL_AUTO_OWNER) ? TRUE : FALSE; @@ -1723,9 +1687,13 @@ devfs_handle_t devfs_get_handle_from_inode (struct inode *inode) { + struct devfs_inode *di; + if (!inode || !inode->i_sb) return NULL; if (inode->i_sb->s_magic != DEVFS_SUPER_MAGIC) return NULL; - return get_devfs_entry_from_vfs_inode (inode); + di = get_devfs_inode_from_vfs_inode (inode); + if (!di) return NULL; + return di->de; } /* End Function devfs_get_handle_from_inode */ @@ -1788,14 +1756,19 @@ int devfs_set_file_size (devfs_handle_t de, unsigned long size) { + struct devfs_inode *di; + if (de == NULL) return -EINVAL; if (!de->registered) return -EINVAL; if ( !S_ISREG (de->mode) ) return -EINVAL; if (de->u.fcb.u.file.size == size) return 0; de->u.fcb.u.file.size = size; - if (de->inode.dentry == NULL) return 0; - if (de->inode.dentry->d_inode == NULL) return 0; - de->inode.dentry->d_inode->i_size = size; + for (di = de->first_inode; di != NULL; di = di->next) + { + if (di->dentry == NULL) continue; + if (di->dentry->d_inode == NULL) continue; + di->dentry->d_inode->i_size = size; + } return 0; } /* End Function devfs_set_file_size */ @@ -2001,16 +1974,18 @@ return unregister_blkdev (major, name); } /* End Function devfs_unregister_blkdev */ +#ifndef MODULE + /** * devfs_setup - Process kernel boot options. * @str: The boot options after the "devfs=". */ -static int __init devfs_setup (char *str) +SETUP_STATIC int __init devfs_setup (char *str) { while ( (*str != '\0') && !isspace (*str) ) { -#ifdef CONFIG_DEVFS_DEBUG +# ifdef CONFIG_DEVFS_DEBUG if (strncmp (str, "dall", 4) == 0) { devfs_debug_init |= DEBUG_ALL; @@ -2062,7 +2037,7 @@ str += 8; } else -#endif /* CONFIG_DEVFS_DEBUG */ +# endif /* CONFIG_DEVFS_DEBUG */ if (strncmp (str, "show", 4) == 0) { boot_options |= OPTION_SHOW; @@ -2093,6 +2068,8 @@ __setup("devfs=", devfs_setup); +#endif /* !MODULE */ + EXPORT_SYMBOL(devfs_register); EXPORT_SYMBOL(devfs_unregister); EXPORT_SYMBOL(devfs_mk_symlink); @@ -2117,6 +2094,101 @@ EXPORT_SYMBOL(devfs_unregister_chrdev); EXPORT_SYMBOL(devfs_unregister_blkdev); +#ifdef CONFIG_DEVFS_DEBUG +MODULE_PARM(devfs_debug, "i"); +#endif + +static void update_devfs_inode_from_entry (struct devfs_inode *di) +{ + if (di == NULL) return; + if (di->de == NULL) + { + printk ("%s: update_devfs_inode_from_entry(): NULL entry\n", + DEVFS_NAME); + return; + } + if ( S_ISDIR (di->de->mode) ) + { + di->mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; + di->uid = 0; + di->gid = 0; + } + else if ( S_ISLNK (di->de->mode) ) + { + di->mode = S_IFLNK | S_IRUGO | S_IXUGO; + di->uid = 0; + di->gid = 0; + } + else if ( S_ISFIFO (di->de->mode) ) + { + di->mode = di->de->mode; + di->uid = di->de->u.fifo.uid; + di->gid = di->de->u.fifo.gid; + } + else + { + if (di->de->u.fcb.auto_owner) + di->mode = (di->de->mode & ~S_IALLUGO) | S_IRUGO | S_IWUGO; + else di->mode = di->de->mode; + di->uid = di->de->u.fcb.default_uid; + di->gid = di->de->u.fcb.default_gid; + } +} /* End Function update_devfs_inode_from_entry */ + + +/** + * create_devfs_inode - Create a devfs inode entry. + * @de: The devfs entry to associate the new inode with. + * @fs_info: The FS info. + * + * Returns a pointer to the devfs inode on success, else %NULL. + */ + +static struct devfs_inode *create_devfs_inode (struct devfs_entry *de, + struct fs_info *fs_info) +{ + struct devfs_inode *di, **table; + + /* First ensure table size is enough */ + if (fs_info->num_inodes >= fs_info->table_size) + { + if ( ( table = kmalloc (sizeof *table * + (fs_info->table_size + INODE_TABLE_INC), + GFP_KERNEL) ) == NULL ) return NULL; + fs_info->table_size += INODE_TABLE_INC; +#ifdef CONFIG_DEVFS_DEBUG + if (devfs_debug & DEBUG_I_CREATE) + printk ("%s: create_devfs_inode(): grew inode table to: %u entries\n", + DEVFS_NAME, fs_info->table_size); +#endif + if (fs_info->table) + { + memcpy (table, fs_info->table, sizeof *table *fs_info->num_inodes); + kfree (fs_info->table); + } + fs_info->table = table; + } + if ( ( di = kmalloc (sizeof *di, GFP_KERNEL) ) == NULL ) return NULL; + memset (di, 0, sizeof *di); + di->ino = fs_info->num_inodes + FIRST_INODE; + di->nlink = 1; + fs_info->table[fs_info->num_inodes] = di; + ++fs_info->num_inodes; + di->de = de; + di->fs_info = fs_info; + di->prev = de->last_inode; + if (de->first_inode == NULL) de->first_inode = di; + else de->last_inode->next = di; + de->last_inode = di; + update_devfs_inode_from_entry (di); +#ifdef CONFIG_DEVFS_DEBUG + if (devfs_debug & DEBUG_I_CREATE) + printk ("%s: create_devfs_inode(): new di(%u): %p\n", + DEVFS_NAME, di->ino, di); +#endif + return di; +} /* End Function create_devfs_inode */ + /** * try_modload - Notify devfsd of an inode lookup. @@ -2152,6 +2224,34 @@ return 0; } /* End Function try_modload */ +static void delete_fs (struct fs_info *fs_info) +{ + unsigned int count; + struct devfs_inode *di; + struct devfs_entry *de; + + if (fs_info == NULL) return; + for (count = 0; count < fs_info->num_inodes; ++count) + { + /* Unhook this inode from the devfs tree */ + di = fs_info->table[count]; + de = di->de; + if (di->prev == NULL) de->first_inode = di->next; + else di->prev->next = di->next; + if (di->next == NULL) de->last_inode = di->prev; + else di->next->prev = di->prev; + memset (di, 0, sizeof *di); + kfree (di); + } + if (fs_info->table) kfree (fs_info->table); + if (fs_info->prev == NULL) first_fs = fs_info->next; + else fs_info->prev->next = fs_info->next; + if (fs_info->next == NULL) last_fs = fs_info->prev; + else fs_info->next->prev = fs_info->prev; + memset (fs_info, 0, sizeof *fs_info); + kfree (fs_info); +} /* End Function delete_fs */ + /** * check_disc_changed - Check if a removable disc was changed. @@ -2245,19 +2345,19 @@ static void devfs_read_inode (struct inode *inode) { - struct devfs_entry *de; + struct devfs_inode *di; - de = get_devfs_entry_from_vfs_inode (inode); - if (de == NULL) + di = get_devfs_inode_from_vfs_inode (inode); + if (di == NULL) { - printk ("%s: read_inode(%d): VFS inode: %p NO devfs_entry\n", + printk ("%s: read_inode(%d): VFS inode: %p NO devfs_inode\n", DEVFS_NAME, (int) inode->i_ino, inode); return; } #ifdef CONFIG_DEVFS_DEBUG if (devfs_debug & DEBUG_I_READ) - printk ("%s: read_inode(%d): VFS inode: %p devfs_entry: %p\n", - DEVFS_NAME, (int) inode->i_ino, inode, de); + printk ("%s: read_inode(%d): VFS inode: %p devfs_inode: %p\n", + DEVFS_NAME, (int) inode->i_ino, inode, di); #endif inode->i_size = 0; inode->i_blocks = 0; @@ -2265,39 +2365,39 @@ inode->i_op = &devfs_iops; inode->i_fop = &devfs_fops; inode->i_rdev = NODEV; - if ( S_ISCHR (de->inode.mode) ) + if ( S_ISCHR (di->mode) ) { - inode->i_rdev = MKDEV (de->u.fcb.u.device.major, - de->u.fcb.u.device.minor); + inode->i_rdev = MKDEV (di->de->u.fcb.u.device.major, + di->de->u.fcb.u.device.minor); } - else if ( S_ISBLK (de->inode.mode) ) + else if ( S_ISBLK (di->mode) ) { - inode->i_rdev = MKDEV (de->u.fcb.u.device.major, - de->u.fcb.u.device.minor); + inode->i_rdev = MKDEV (di->de->u.fcb.u.device.major, + di->de->u.fcb.u.device.minor); inode->i_bdev = bdget (inode->i_rdev); if (inode->i_bdev) { - if (!inode->i_bdev->bd_op && de->u.fcb.ops) - inode->i_bdev->bd_op = de->u.fcb.ops; + if (!inode->i_bdev->bd_op && di->de->u.fcb.ops) + inode->i_bdev->bd_op = di->de->u.fcb.ops; } else printk ("%s: read_inode(%d): no block device from bdget()\n", DEVFS_NAME, (int) inode->i_ino); } - else if ( S_ISFIFO (de->inode.mode) ) inode->i_fop = &def_fifo_fops; - else if ( S_ISREG (de->inode.mode) ) inode->i_size = de->u.fcb.u.file.size; - else if ( S_ISDIR (de->inode.mode) ) inode->i_op = &devfs_dir_iops; - else if ( S_ISLNK (de->inode.mode) ) + else if ( S_ISFIFO (di->mode) ) inode->i_fop = &def_fifo_fops; + else if ( S_ISREG (di->mode) ) inode->i_size = di->de->u.fcb.u.file.size; + else if ( S_ISDIR (di->mode) ) inode->i_op = &devfs_dir_iops; + else if ( S_ISLNK (di->mode) ) { inode->i_op = &devfs_symlink_iops; - inode->i_size = de->u.symlink.length; + inode->i_size = di->de->u.symlink.length; } - inode->i_mode = de->inode.mode; - inode->i_uid = de->inode.uid; - inode->i_gid = de->inode.gid; - inode->i_atime = de->inode.atime; - inode->i_mtime = de->inode.mtime; - inode->i_ctime = de->inode.ctime; - inode->i_nlink = de->inode.nlink; + inode->i_mode = di->mode; + inode->i_uid = di->uid; + inode->i_gid = di->gid; + inode->i_atime = di->atime; + inode->i_mtime = di->mtime; + inode->i_ctime = di->ctime; + inode->i_nlink = di->nlink; #ifdef CONFIG_DEVFS_DEBUG if (devfs_debug & DEBUG_I_READ) printk ("%s: mode: 0%o uid: %d gid: %d\n", @@ -2309,7 +2409,7 @@ static void devfs_write_inode (struct inode *inode, int unused) { int index; - struct devfs_entry *de; + struct devfs_inode *di; struct fs_info *fs_info = inode->i_sb->u.generic_sbp; if (inode->i_ino < FIRST_INODE) return; @@ -2320,43 +2420,57 @@ DEVFS_NAME, inode->i_ino); return; } - de = fs_info->table[index]; + di = fs_info->table[index]; #ifdef CONFIG_DEVFS_DEBUG if (devfs_debug & DEBUG_I_WRITE) { - printk ("%s: write_inode(%d): VFS inode: %p devfs_entry: %p\n", - DEVFS_NAME, (int) inode->i_ino, inode, de); + printk ("%s: write_inode(%d): VFS inode: %p devfs_inode: %p\n", + DEVFS_NAME, (int) inode->i_ino, inode, di); printk ("%s: mode: 0%o uid: %d gid: %d\n", DEVFS_NAME, (int) inode->i_mode, (int) inode->i_uid, (int) inode->i_gid); } #endif - de->inode.mode = inode->i_mode; - de->inode.uid = inode->i_uid; - de->inode.gid = inode->i_gid; - de->inode.atime = inode->i_atime; - de->inode.mtime = inode->i_mtime; - de->inode.ctime = inode->i_ctime; + di->mode = inode->i_mode; + di->uid = inode->i_uid; + di->gid = inode->i_gid; + di->atime = inode->i_atime; + di->mtime = inode->i_mtime; + di->ctime = inode->i_ctime; } /* End Function devfs_write_inode */ static int devfs_notify_change (struct dentry *dentry, struct iattr *iattr) { int retval; - struct devfs_entry *de; + struct devfs_inode *di; struct inode *inode = dentry->d_inode; struct fs_info *fs_info = inode->i_sb->u.generic_sbp; - de = get_devfs_entry_from_vfs_inode (inode); - if (de == NULL) return -ENODEV; + di = get_devfs_inode_from_vfs_inode (inode); + if (di == NULL) return -ENODEV; retval = inode_change_ok (inode, iattr); if (retval != 0) return retval; inode_setattr (inode, iattr); if ( iattr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID) ) - devfsd_notify_one (de, DEVFSD_NOTIFY_CHANGE, inode->i_mode, + devfsd_notify_one (di->de, DEVFSD_NOTIFY_CHANGE, inode->i_mode, inode->i_uid, inode->i_gid, fs_info); return 0; } /* End Function devfs_notify_change */ +static void devfs_put_super (struct super_block *sb) +{ + struct fs_info *fs_info = sb->u.generic_sbp; + +#ifdef CONFIG_DEVFS_DEBUG + if (devfs_debug & DEBUG_S_PUT) + printk ("%s: put_super(): devfs ptr: %p\n", DEVFS_NAME, fs_info); +#endif +#ifdef CONFIG_DEVFS_TUNNEL + dput (fs_info->table[0]->covered); +#endif + delete_fs (fs_info); +} /* End Function devfs_put_super */ + static int devfs_statfs (struct super_block *sb, struct statfs *buf) { buf->f_type = DEVFS_SUPER_MAGIC; @@ -2372,6 +2486,7 @@ { read_inode: devfs_read_inode, write_inode: devfs_write_inode, + put_super: devfs_put_super, statfs: devfs_statfs, }; @@ -2386,22 +2501,21 @@ */ static struct inode *get_vfs_inode (struct super_block *sb, - struct devfs_entry *de, + struct devfs_inode *di, struct dentry *dentry) { struct inode *inode; - if (de->inode.dentry != NULL) + if (di->dentry != NULL) { - printk ("%s: get_vfs_inode(%u): old de->inode.dentry: %p \"%s\" new dentry: %p \"%s\"\n", - DEVFS_NAME, de->inode.ino, - de->inode.dentry, de->inode.dentry->d_name.name, + printk ("%s: get_vfs_inode(%u): old di->dentry: %p \"%s\" new dentry: %p \"%s\"\n", + DEVFS_NAME, di->ino, di->dentry, di->dentry->d_name.name, dentry, dentry->d_name.name); - printk (" old inode: %p\n", de->inode.dentry->d_inode); + printk (" old inode: %p\n", di->dentry->d_inode); return NULL; } - if ( ( inode = iget (sb, de->inode.ino) ) == NULL ) return NULL; - de->inode.dentry = dentry; + if ( ( inode = iget (sb, di->ino) ) == NULL ) return NULL; + di->dentry = dentry; return inode; } /* End Function get_vfs_inode */ @@ -2419,6 +2533,7 @@ int err, count; int stored = 0; struct fs_info *fs_info; + struct devfs_inode *di; struct devfs_entry *parent, *de; struct inode *inode = file->f_dentry->d_inode; @@ -2433,7 +2548,8 @@ return -ENOTDIR; } fs_info = inode->i_sb->u.generic_sbp; - parent = get_devfs_entry_from_vfs_inode (file->f_dentry->d_inode); + di = get_devfs_inode_from_vfs_inode (file->f_dentry->d_inode); + parent = di->de; if ( (long) file->f_pos < 0 ) return -EINVAL; #ifdef CONFIG_DEVFS_DEBUG if (devfs_debug & DEBUG_F_READDIR) @@ -2463,13 +2579,38 @@ count = file->f_pos - 2; for (de = parent->u.dir.first; (de != NULL) && (count > 0); de = de->next) - if ( !IS_HIDDEN (de) ) --count; + { + if ( IS_HIDDEN (de) ) continue; + if (!fs_info->require_explicit) + { + --count; + continue; + } + /* Must search for an inode for this FS */ + for (di = de->first_inode; di != NULL; di = di->next) + { + if (fs_info == di->fs_info) break; + } + if (di != NULL) --count; + } /* Now add all remaining entries */ for (; de != NULL; de = de->next) { if ( IS_HIDDEN (de) ) continue; + /* Must search for an inode for this FS */ + for (di = de->first_inode; di != NULL; di = di->next) + { + if (fs_info == di->fs_info) break; + } + if (di == NULL) + { + if (fs_info->require_explicit) continue; + /* Have to create the inode right now to get the inum */ + di = create_devfs_inode (de, fs_info); + if (di == NULL) return -ENOMEM; + } err = (*filldir) (dirent, de->name, de->namelen, - file->f_pos, de->inode.ino); + file->f_pos, di->ino); if (err == -EINVAL) break; if (err < 0) return err; file->f_pos++; @@ -2484,15 +2625,16 @@ { int err; struct fcb_type *df; - struct devfs_entry *de; + struct devfs_inode *di; + struct dentry *dentry = file->f_dentry; struct fs_info *fs_info = inode->i_sb->u.generic_sbp; - de = get_devfs_entry_from_vfs_inode (inode); - if (de == NULL) return -ENODEV; - if ( S_ISDIR (de->mode) ) return 0; - df = &de->u.fcb; - if (!de->registered) return -ENODEV; - file->private_data = de->info; + di = get_devfs_inode_from_vfs_inode (inode); + if (di == NULL) return -ENODEV; + if ( S_ISDIR (di->de->mode) ) return 0; + df = &di->de->u.fcb; + if (!di->de->registered) return -ENODEV; + file->private_data = di->de->info; if ( S_ISBLK (inode->i_mode) ) { file->f_op = &def_blk_fops; @@ -2509,20 +2651,21 @@ } if (err < 0) return err; /* Open was successful */ - if (df->open) return 0; - df->open = TRUE; /* This is the first open */ + df->open = TRUE; + if (dentry->d_count != 1) return 0; /* No fancy operations */ + /* This is the first open */ if (df->auto_owner) { /* Change the ownership/protection */ - de->inode.mode = (de->inode.mode & ~S_IALLUGO) |(de->mode & S_IRWXUGO); - de->inode.uid = current->euid; - de->inode.gid = current->egid; - inode->i_mode = de->inode.mode; - inode->i_uid = de->inode.uid; - inode->i_gid = de->inode.gid; + di->mode = (di->mode & ~S_IALLUGO) | (di->de->mode & S_IRWXUGO); + di->uid = current->euid; + di->gid = current->egid; + inode->i_mode = di->mode; + inode->i_uid = di->uid; + inode->i_gid = di->gid; } if (df->aopen_notify) - devfsd_notify_one (de, DEVFSD_NOTIFY_ASYNC_OPEN, inode->i_mode, + devfsd_notify_one (di->de, DEVFSD_NOTIFY_ASYNC_OPEN, inode->i_mode, current->euid, current->egid, fs_info); return 0; } /* End Function devfs_open */ @@ -2562,17 +2705,21 @@ static void devfs_d_iput (struct dentry *dentry, struct inode *inode) { - struct devfs_entry *de; + struct devfs_inode *di; - de = get_devfs_entry_from_vfs_inode (inode); + di = get_devfs_inode_from_vfs_inode (inode); #ifdef CONFIG_DEVFS_DEBUG if (devfs_debug & DEBUG_D_IPUT) - printk ("%s: d_iput(): dentry: %p inode: %p de: %p de->dentry: %p\n", - DEVFS_NAME, dentry, inode, de, de->inode.dentry); + printk ("%s: d_iput(): dentry: %p inode: %p di: %p di->dentry: %p\n", + DEVFS_NAME, dentry, inode, di, di->dentry); #endif - if (de->inode.dentry == dentry) + if (di->dentry == dentry) { - de->inode.dentry = NULL; + di->dentry = NULL; +#ifdef CONFIG_DEVFS_TUNNEL + dput (di->covered); + di->covered = NULL; +#endif } iput (inode); } /* End Function devfs_d_iput */ @@ -2604,7 +2751,7 @@ static int devfs_d_delete (struct dentry *dentry) { struct inode *inode = dentry->d_inode; - struct devfs_entry *de; + struct devfs_inode *di; struct fs_info *fs_info; if (dentry->d_op == &devfs_wait_dops) dentry->d_op = &devfs_dops; @@ -2619,28 +2766,29 @@ return 1; } fs_info = inode->i_sb->u.generic_sbp; - de = get_devfs_entry_from_vfs_inode (inode); + di = get_devfs_inode_from_vfs_inode (inode); #ifdef CONFIG_DEVFS_DEBUG if (devfs_debug & DEBUG_D_DELETE) - printk ("%s: d_delete(): dentry: %p inode: %p devfs_entry: %p\n", - DEVFS_NAME, dentry, inode, de); + printk ("%s: d_delete(): dentry: %p inode: %p devfs_inode: %p\n", + DEVFS_NAME, dentry, inode, di); #endif - if (de == NULL) return 0; - if ( !S_ISCHR (de->mode) && !S_ISBLK (de->mode) && !S_ISREG (de->mode) ) + if (di == NULL) return 0; + if (di->de == NULL) return 0; + if ( !S_ISCHR (di->mode) && !S_ISBLK (di->mode) && !S_ISREG (di->mode) ) return 0; - if (!de->u.fcb.open) return 0; - de->u.fcb.open = FALSE; - if (de->u.fcb.aopen_notify) - devfsd_notify_one (de, DEVFSD_NOTIFY_CLOSE, inode->i_mode, + if (!di->de->u.fcb.open) return 0; + di->de->u.fcb.open = FALSE; + if (di->de->u.fcb.aopen_notify) + devfsd_notify_one (di->de, DEVFSD_NOTIFY_CLOSE, inode->i_mode, current->euid, current->egid, fs_info); - if (!de->u.fcb.auto_owner) return 0; + if (!di->de->u.fcb.auto_owner) return 0; /* Change the ownership/protection back */ - de->inode.mode = (de->inode.mode & ~S_IALLUGO) | S_IRUGO | S_IWUGO; - de->inode.uid = de->u.fcb.default_uid; - de->inode.gid = de->u.fcb.default_gid; - inode->i_mode = de->inode.mode; - inode->i_uid = de->inode.uid; - inode->i_gid = de->inode.gid; + di->mode = (di->mode & ~S_IALLUGO) | S_IRUGO | S_IWUGO; + di->uid = di->de->u.fcb.default_uid; + di->gid = di->de->u.fcb.default_gid; + inode->i_mode = di->mode; + inode->i_uid = di->uid; + inode->i_gid = di->gid; return 0; } /* End Function devfs_d_delete */ @@ -2654,6 +2802,7 @@ { if ( !dentry->d_inode && is_devfsd_or_child (fs_info) ) { + struct devfs_inode *di = NULL; struct inode *inode; #ifdef CONFIG_DEVFS_DEBUG @@ -2667,27 +2816,36 @@ printk ("%s: d_revalidate(): dentry: %p name: \"%s\" by: \"%s\"\n", DEVFS_NAME, dentry, txt, current->comm); #endif + if (de) + { + /* Search for an inode for this FS */ + for (di = de->first_inode; di != NULL; di = di->next) + if (di->fs_info == fs_info) break; + } if (de == NULL) { devfs_handle_t parent; + struct devfs_inode *pi; - parent = get_devfs_entry_from_vfs_inode (dir); + pi = get_devfs_inode_from_vfs_inode (dir); + parent = pi->de; de = search_for_entry_in_dir (parent, dentry->d_name.name, dentry->d_name.len, FALSE); } if (de == NULL) return 1; /* Create an inode, now that the driver information is available */ - if (de->no_persistence) update_devfs_inode_from_entry (de); - else if (de->inode.ctime == 0) update_devfs_inode_from_entry (de); - else de->inode.mode = - (de->mode & ~S_IALLUGO) | (de->inode.mode & S_IALLUGO); - if ( ( inode = get_vfs_inode (dir->i_sb, de, dentry) ) == NULL ) + if (di == NULL) di = create_devfs_inode (de, fs_info); + else if (de->no_persistence) update_devfs_inode_from_entry (di); + else if (di->ctime == 0) update_devfs_inode_from_entry (di); + else di->mode = (de->mode & ~S_IALLUGO) | (di->mode & S_IALLUGO); + if (di == NULL) return 1; + if ( ( inode = get_vfs_inode (dir->i_sb, di, dentry) ) == NULL ) return 1; #ifdef CONFIG_DEVFS_DEBUG if (devfs_debug & DEBUG_I_LOOKUP) - printk ("%s: d_revalidate(): new VFS inode(%u): %p devfs_entry: %p\n", - DEVFS_NAME, de->inode.ino, inode, de); + printk ("%s: d_revalidate(): new VFS inode(%u): %p devfs_inode: %p\n", + DEVFS_NAME, di->ino, inode, di); #endif d_instantiate (dentry, inode); return 1; @@ -2703,6 +2861,8 @@ static struct dentry *devfs_lookup (struct inode *dir, struct dentry *dentry) { struct fs_info *fs_info; + struct devfs_inode *di = NULL; + struct devfs_inode *pi; struct devfs_entry *parent, *de; struct inode *inode; char txt[STRING_LENGTH]; @@ -2727,13 +2887,32 @@ #endif fs_info = dir->i_sb->u.generic_sbp; /* First try to get the devfs entry for this directory */ - parent = get_devfs_entry_from_vfs_inode (dir); - if (parent == NULL) return ERR_PTR (-EINVAL); + pi = get_devfs_inode_from_vfs_inode (dir); + if (pi == NULL) return ERR_PTR (-EINVAL); + parent = pi->de; if (!parent->registered) return ERR_PTR (-ENOENT); /* Try to reclaim an existing devfs entry */ de = search_for_entry_in_dir (parent, dentry->d_name.name, dentry->d_name.len, FALSE); + if (de) + { + /* Search for an inode for this FS */ + for (di = de->first_inode; di != NULL; di = di->next) + if (di->fs_info == fs_info) break; + } + if (fs_info->require_explicit) + { + if (di == NULL) + { + /* Make the dentry negative so a subsequent operation can deal + with it (for the benefit of mknod()). Leaving the dentry + unhashed will cause to fail which in turns causes + to fail */ + d_add (dentry, NULL); + return NULL; + } + } if ( ( (de == NULL) || !de->registered ) && (parent->u.dir.num_removable > 0) && get_removable_partition (parent, dentry->d_name.name, @@ -2782,16 +2961,17 @@ d_add (dentry, NULL); /* Open the floodgates */ } /* Create an inode, now that the driver information is available */ - if (de->no_persistence) update_devfs_inode_from_entry (de); - else if (de->inode.ctime == 0) update_devfs_inode_from_entry (de); - else de->inode.mode = - (de->mode & ~S_IALLUGO) | (de->inode.mode & S_IALLUGO); - if ( ( inode = get_vfs_inode (dir->i_sb, de, dentry) ) == NULL ) + if (di == NULL) di = create_devfs_inode (de, fs_info); + else if (de->no_persistence) update_devfs_inode_from_entry (di); + else if (di->ctime == 0) update_devfs_inode_from_entry (di); + else di->mode = (de->mode & ~S_IALLUGO) | (di->mode & S_IALLUGO); + if (di == NULL) return ERR_PTR (-ENOMEM); + if ( ( inode = get_vfs_inode (dir->i_sb, di, dentry) ) == NULL ) return ERR_PTR (-ENOMEM); #ifdef CONFIG_DEVFS_DEBUG if (devfs_debug & DEBUG_I_LOOKUP) - printk ("%s: lookup(): new VFS inode(%u): %p devfs_entry: %p\n", - DEVFS_NAME, de->inode.ino, inode, de); + printk ("%s: lookup(): new VFS inode(%u): %p devfs_inode: %p\n", + DEVFS_NAME, di->ino, inode, di); #endif d_instantiate (dentry, inode); /* Unlock directory semaphore, which will release any waiters. They will @@ -2818,7 +2998,7 @@ static int devfs_unlink (struct inode *dir, struct dentry *dentry) { - struct devfs_entry *de; + struct devfs_inode *di; #ifdef CONFIG_DEVFS_DEBUG char txt[STRING_LENGTH]; @@ -2834,12 +3014,12 @@ if ( !dir || !S_ISDIR (dir->i_mode) ) return -ENOTDIR; if (!dentry || !dentry->d_inode) return -ENOENT; - de = get_devfs_entry_from_vfs_inode (dentry->d_inode); - if (de == NULL) return -ENOENT; - if (!de->registered) return -ENOENT; - de->registered = FALSE; - de->hide = TRUE; - free_dentries (de); + di = get_devfs_inode_from_vfs_inode (dentry->d_inode); + if (di == NULL) return -ENOENT; + if (!di->de->registered) return -ENOENT; + di->de->registered = FALSE; + di->de->hide = TRUE; + free_dentries (di->de); return 0; } /* End Function devfs_unlink */ @@ -2848,14 +3028,17 @@ { int err; struct fs_info *fs_info; + struct devfs_inode *pi; + struct devfs_inode *di = NULL; struct devfs_entry *parent, *de; struct inode *inode; if ( !dir || !S_ISDIR (dir->i_mode) ) return -ENOTDIR; fs_info = dir->i_sb->u.generic_sbp; /* First try to get the devfs entry for this directory */ - parent = get_devfs_entry_from_vfs_inode (dir); - if (parent == NULL) return -EINVAL; + pi = get_devfs_inode_from_vfs_inode (dir); + if (pi == NULL) return -EINVAL; + parent = pi->de; if (!parent->registered) return -ENOENT; err = devfs_mk_symlink (parent, dentry->d_name.name, dentry->d_name.len, DEVFS_FL_NONE, symname, 0, &de, NULL); @@ -2865,20 +3048,27 @@ DEVFS_NAME, err); #endif if (err < 0) return err; - de->inode.mode = de->mode; - de->inode.atime = CURRENT_TIME; - de->inode.mtime = CURRENT_TIME; - de->inode.ctime = CURRENT_TIME; - if ( ( inode = get_vfs_inode (dir->i_sb, de, dentry) ) == NULL ) + /* Search for an inode for this FS */ + for (di = de->first_inode; di != NULL; di = di->next) + { + if (di->fs_info == fs_info) break; + } + if (di == NULL) di = create_devfs_inode (de, fs_info); + if (di == NULL) return -ENOMEM; + di->mode = de->mode; + di->atime = CURRENT_TIME; + di->mtime = CURRENT_TIME; + di->ctime = CURRENT_TIME; + if ( ( inode = get_vfs_inode (dir->i_sb, di, dentry) ) == NULL ) return -ENOMEM; #ifdef CONFIG_DEVFS_DEBUG if (devfs_debug & DEBUG_DISABLED) printk ("%s: symlink(): new VFS inode(%u): %p dentry: %p\n", - DEVFS_NAME, de->inode.ino, inode, dentry); + DEVFS_NAME, di->ino, inode, dentry); #endif de->hide = FALSE; d_instantiate (dentry, inode); - devfsd_notify_one (de, DEVFSD_NOTIFY_CREATE, inode->i_mode, + devfsd_notify_one (di->de, DEVFSD_NOTIFY_CREATE, inode->i_mode, inode->i_uid, inode->i_gid, fs_info); return 0; } /* End Function devfs_symlink */ @@ -2887,6 +3077,8 @@ { int is_new; struct fs_info *fs_info; + struct devfs_inode *di = NULL; + struct devfs_inode *pi; struct devfs_entry *parent, *de; struct inode *inode; @@ -2895,8 +3087,9 @@ fs_info = dir->i_sb->u.generic_sbp; /* We are allowed to create the directory */ /* First try to get the devfs entry for this directory */ - parent = get_devfs_entry_from_vfs_inode (dir); - if (parent == NULL) return -EINVAL; + pi = get_devfs_inode_from_vfs_inode (dir); + if (pi == NULL) return -EINVAL; + parent = pi->de; if (!parent->registered) return -ENOENT; /* Try to reclaim an existing devfs entry, create if there isn't one */ de = search_for_entry (parent, dentry->d_name.name, dentry->d_name.len, @@ -2917,21 +3110,28 @@ } de->mode = mode; de->u.dir.num_removable = 0; - de->inode.mode = mode; - de->inode.uid = current->euid; - de->inode.gid = current->egid; - de->inode.atime = CURRENT_TIME; - de->inode.mtime = CURRENT_TIME; - de->inode.ctime = CURRENT_TIME; - if ( ( inode = get_vfs_inode (dir->i_sb, de, dentry) ) == NULL ) + /* Search for an inode for this FS */ + for (di = de->first_inode; di != NULL; di = di->next) + { + if (di->fs_info == fs_info) break; + } + if (di == NULL) di = create_devfs_inode (de, fs_info); + if (di == NULL) return -ENOMEM; + di->mode = mode; + di->uid = current->euid; + di->gid = current->egid; + di->atime = CURRENT_TIME; + di->mtime = CURRENT_TIME; + di->ctime = CURRENT_TIME; + if ( ( inode = get_vfs_inode (dir->i_sb, di, dentry) ) == NULL ) return -ENOMEM; #ifdef CONFIG_DEVFS_DEBUG if (devfs_debug & DEBUG_DISABLED) printk ("%s: mkdir(): new VFS inode(%u): %p dentry: %p\n", - DEVFS_NAME, de->inode.ino, inode, dentry); + DEVFS_NAME, di->ino, inode, dentry); #endif d_instantiate (dentry, inode); - devfsd_notify_one (de, DEVFSD_NOTIFY_CREATE, inode->i_mode, + devfsd_notify_one (di->de, DEVFSD_NOTIFY_CREATE, inode->i_mode, inode->i_uid, inode->i_gid, fs_info); return 0; } /* End Function devfs_mkdir */ @@ -2940,6 +3140,7 @@ { int has_children = FALSE; struct fs_info *fs_info; + struct devfs_inode *di = NULL; struct devfs_entry *de, *child; struct inode *inode = dentry->d_inode; @@ -2947,8 +3148,9 @@ if (dir->i_sb->u.generic_sbp != inode->i_sb->u.generic_sbp) return -EINVAL; if (inode == dir) return -EPERM; fs_info = dir->i_sb->u.generic_sbp; - de = get_devfs_entry_from_vfs_inode (inode); - if (de == NULL) return -ENOENT; + di = get_devfs_inode_from_vfs_inode (inode); + if (di == NULL) return -ENOENT; + de = di->de; if (!de->registered) return -ENOENT; if ( !S_ISDIR (de->mode) ) return -ENOTDIR; for (child = de->u.dir.first; child != NULL; child = child->next) @@ -2971,6 +3173,8 @@ { int is_new; struct fs_info *fs_info; + struct devfs_inode *di = NULL; + struct devfs_inode *pi; struct devfs_entry *parent, *de; struct inode *inode; @@ -2993,8 +3197,9 @@ !S_ISSOCK (mode) ) return -EPERM; /* We are allowed to create the node */ /* First try to get the devfs entry for this directory */ - parent = get_devfs_entry_from_vfs_inode (dir); - if (parent == NULL) return -EINVAL; + pi = get_devfs_inode_from_vfs_inode (dir); + if (pi == NULL) return -EINVAL; + parent = pi->de; if (!parent->registered) return -ENOENT; /* Try to reclaim an existing devfs entry, create if there isn't one */ de = search_for_entry (parent, dentry->d_name.name, dentry->d_name.len, @@ -3025,37 +3230,44 @@ de->registered = TRUE; de->show_unreg = FALSE; de->hide = FALSE; - de->inode.mode = mode; - de->inode.uid = current->euid; - de->inode.gid = current->egid; - de->inode.atime = CURRENT_TIME; - de->inode.mtime = CURRENT_TIME; - de->inode.ctime = CURRENT_TIME; - if ( ( inode = get_vfs_inode (dir->i_sb, de, dentry) ) == NULL ) + /* Search for an inode for this FS */ + for (di = de->first_inode; di != NULL; di = di->next) + { + if (di->fs_info == fs_info) break; + } + if (di == NULL) di = create_devfs_inode (de, fs_info); + if (di == NULL) return -ENOMEM; + di->mode = mode; + di->uid = current->euid; + di->gid = current->egid; + di->atime = CURRENT_TIME; + di->mtime = CURRENT_TIME; + di->ctime = CURRENT_TIME; + if ( ( inode = get_vfs_inode (dir->i_sb, di, dentry) ) == NULL ) return -ENOMEM; #ifdef CONFIG_DEVFS_DEBUG if (devfs_debug & DEBUG_I_MKNOD) printk ("%s: new VFS inode(%u): %p dentry: %p\n", - DEVFS_NAME, de->inode.ino, inode, dentry); + DEVFS_NAME, di->ino, inode, dentry); #endif d_instantiate (dentry, inode); - devfsd_notify_one (de, DEVFSD_NOTIFY_CREATE, inode->i_mode, + devfsd_notify_one (di->de, DEVFSD_NOTIFY_CREATE, inode->i_mode, inode->i_uid, inode->i_gid, fs_info); return 0; } /* End Function devfs_mknod */ static int devfs_readlink (struct dentry *dentry, char *buffer, int buflen) { - struct devfs_entry *de = get_devfs_entry_from_vfs_inode (dentry->d_inode); + struct devfs_inode *di = get_devfs_inode_from_vfs_inode (dentry->d_inode); - return vfs_readlink (dentry, buffer, buflen, de->u.symlink.linkname); + return vfs_readlink (dentry, buffer, buflen, di->de->u.symlink.linkname); } /* End Function devfs_readlink */ static int devfs_follow_link (struct dentry *dentry, struct nameidata *nd) { - struct devfs_entry *de = get_devfs_entry_from_vfs_inode (dentry->d_inode); + struct devfs_inode *di = get_devfs_inode_from_vfs_inode (dentry->d_inode); - return vfs_follow_link (nd, de->u.symlink.linkname); + return vfs_follow_link (nd, di->de->u.symlink.linkname); } /* End Function devfs_follow_link */ static struct inode_operations devfs_iops = @@ -3091,22 +3303,47 @@ static struct super_block *devfs_read_super (struct super_block *sb, void *data, int silent) { + char *aopt = data; + struct fs_info *fs_info = NULL; + struct devfs_inode *di; struct inode *root_inode = NULL; if (get_root_entry () == NULL) goto out_no_root; - atomic_set (&fs_info.devfsd_overrun_count, 0); - init_waitqueue_head (&fs_info.devfsd_wait_queue); - init_waitqueue_head (&fs_info.revalidate_wait_queue); - fs_info.sb = sb; - sb->u.generic_sbp = &fs_info; + if ( ( fs_info = kmalloc (sizeof *fs_info, GFP_KERNEL) ) == NULL ) + return NULL; + memset (fs_info, 0, sizeof *fs_info); + atomic_set (&fs_info->devfsd_overrun_count, 0); + init_waitqueue_head (&fs_info->devfsd_wait_queue); + init_waitqueue_head (&fs_info->revalidate_wait_queue); + fs_info->prev = last_fs; + if (first_fs == NULL) first_fs = fs_info; + else last_fs->next = fs_info; + last_fs = fs_info; + fs_info->sb = sb; + if (aopt) + { + if (strcmp (aopt, "explicit") == 0) fs_info->require_explicit = TRUE; + } + sb->u.generic_sbp = fs_info; sb->s_blocksize = 1024; sb->s_blocksize_bits = 10; sb->s_magic = DEVFS_SUPER_MAGIC; sb->s_op = &devfs_sops; - if ( ( root_inode = get_vfs_inode (sb, root_entry, NULL) ) == NULL ) + di = create_devfs_inode (root_entry, fs_info); + if (di == NULL) goto out_no_root; + if (di->ino != 1) + { + printk ("%s: read_super: root inode number is: %d!\n", + DEVFS_NAME, di->ino); + goto out_no_root; + } + if ( ( root_inode = get_vfs_inode (sb, di, NULL) ) == NULL ) goto out_no_root; - sb->s_root = d_alloc_root (root_inode); + sb->s_root = D_ALLOC_ROOT (root_inode); if (!sb->s_root) goto out_no_root; +#ifdef CONFIG_DEVFS_TUNNEL + di->covered = dget (sb->s_root->d_covered); +#endif #ifdef CONFIG_DEVFS_DEBUG if (devfs_debug & DEBUG_DISABLED) printk ("%s: read super, made devfs ptr: %p\n", @@ -3116,12 +3353,13 @@ out_no_root: printk ("devfs_read_super: get root inode failed\n"); + delete_fs (fs_info); if (root_inode) iput (root_inode); return NULL; } /* End Function devfs_read_super */ -static DECLARE_FSTYPE (devfs_fs_type, DEVFS_NAME, devfs_read_super, FS_SINGLE); +static DECLARE_FSTYPE (devfs_fs_type, DEVFS_NAME, devfs_read_super, 0); /* File operations for devfsd follow */ @@ -3311,27 +3549,25 @@ } /* End Function devfsd_close */ +#ifdef MODULE +int init_module (void) +#else int __init init_devfs_fs (void) +#endif { - int err; - printk ("%s: v%s Richard Gooch (rgooch@atnf.csiro.au)\n", DEVFS_NAME, DEVFS_VERSION); -#ifdef CONFIG_DEVFS_DEBUG +#if defined(CONFIG_DEVFS_DEBUG) && !defined(MODULE) devfs_debug = devfs_debug_init; printk ("%s: devfs_debug: 0x%0x\n", DEVFS_NAME, devfs_debug); #endif +#if !defined(MODULE) printk ("%s: boot_options: 0x%0x\n", DEVFS_NAME, boot_options); - err = register_filesystem (&devfs_fs_type); - if (!err) - { - struct vfsmount *devfs_mnt = kern_mount (&devfs_fs_type); - err = PTR_ERR (devfs_mnt); - if ( !IS_ERR (devfs_mnt) ) err = 0; - } - return err; -} /* End Function init_devfs_fs */ +#endif + return register_filesystem (&devfs_fs_type); +} +#ifndef MODULE void __init mount_devfs_fs (void) { int err; @@ -3341,3 +3577,39 @@ if (err == 0) printk ("Mounted devfs on /dev\n"); else printk ("Warning: unable to mount devfs, err: %d\n", err); } /* End Function mount_devfs_fs */ +#endif + +#ifdef MODULE +static void free_entry (struct devfs_entry *parent) +{ + struct devfs_entry *de, *next; + + if (parent == NULL) return; + for (de = parent->u.dir.first; de != NULL; de = next) + { + next = de->next; + if (de->first_inode != NULL) + { + printk ("%s: free_entry(): unfreed inodes!\n", DEVFS_NAME); + } + if ( S_ISDIR (de->mode) ) + { + /* Recursively free the subdirectories: this is a stack chomper */ + free_entry (de); + } + else kfree (de); + } + kfree (parent); +} /* End Function free_entry */ + +void cleanup_module (void) +{ + unregister_filesystem (&devfs_fs_type); + if (first_fs != NULL) + { + printk ("%s: cleanup_module(): still mounted mounted filesystems!\n", + DEVFS_NAME); + } + free_entry (root_entry); +} +#endif /* MODULE */ diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/devfs/util.c linux.ac/fs/devfs/util.c --- linux.t2/fs/devfs/util.c Sat Jun 24 13:50:01 2000 +++ linux.ac/fs/devfs/util.c Tue May 16 23:06:44 2000 @@ -28,8 +28,6 @@ Created <_devfs_convert_name> and supported SCSI and IDE CD-ROMs 20000203 Richard Gooch Changed operations pointer type to void *. - 20000621 Richard Gooch - Changed interface to . */ #include #include @@ -136,6 +134,8 @@ * @major: The major number. Not needed for regular files. * @minor_start: The starting minor number. Not needed for regular files. * @mode: The default file mode. + * @uid: The default UID of the file. + * @guid: The default GID of the file. * @ops: The &file_operations or &block_device_operations structure. * This must not be externally deallocated. * @info: An arbitrary pointer which will be written to the private_data @@ -147,7 +147,8 @@ void devfs_register_series (devfs_handle_t dir, const char *format, unsigned int num_entries, unsigned int flags, unsigned int major, unsigned int minor_start, - umode_t mode, void *ops, void *info) + umode_t mode, uid_t uid, gid_t gid, + void *ops, void *info) { unsigned int count; char devname[128]; @@ -155,8 +156,8 @@ for (count = 0; count < num_entries; ++count) { sprintf (devname, format, count); - devfs_register (dir, devname, flags, major, minor_start + count, - mode, ops, info); + devfs_register (dir, devname, 0, flags, major, minor_start + count, + mode, uid, gid, ops, info); } } /* End Function devfs_register_series */ EXPORT_SYMBOL(devfs_register_series); diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/exec.c linux.ac/fs/exec.c --- linux.t2/fs/exec.c Sat Jun 24 13:50:01 2000 +++ linux.ac/fs/exec.c Thu Jun 22 17:07:35 2000 @@ -101,37 +101,54 @@ */ asmlinkage long sys_uselib(const char * library) { - int fd, retval; struct file * file; + struct nameidata nd; + int error; - fd = sys_open(library, 0, 0); - if (fd < 0) - return fd; - file = fget(fd); - retval = -ENOEXEC; - if (file) { - if(file->f_op && file->f_op->read) { - struct linux_binfmt * fmt; + error = user_path_walk(library, &nd); + if (error) + goto out; + + error = -EINVAL; + if (!S_ISREG(nd.dentry->d_inode->i_mode)) + goto exit; + + error = permission(nd.dentry->d_inode, MAY_READ | MAY_EXEC); + if (error) + goto exit; - read_lock(&binfmt_lock); - for (fmt = formats ; fmt ; fmt = fmt->next) { - if (!fmt->load_shlib) - continue; - if (!try_inc_mod_count(fmt->module)) - continue; - read_unlock(&binfmt_lock); - retval = fmt->load_shlib(file); - read_lock(&binfmt_lock); - put_binfmt(fmt); - if (retval != -ENOEXEC) - break; - } + lock_kernel(); + file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); + unlock_kernel(); + error = PTR_ERR(file); + if (IS_ERR(file)) + goto out; + + error = -ENOEXEC; + if(file->f_op && file->f_op->read) { + struct linux_binfmt * fmt; + + read_lock(&binfmt_lock); + for (fmt = formats ; fmt ; fmt = fmt->next) { + if (!fmt->load_shlib) + continue; + if (!try_inc_mod_count(fmt->module)) + continue; read_unlock(&binfmt_lock); + error = fmt->load_shlib(file); + read_lock(&binfmt_lock); + put_binfmt(fmt); + if (error != -ENOEXEC) + break; } - fput(file); + read_unlock(&binfmt_lock); } - sys_close(fd); - return retval; + fput(file); +out: + return error; +exit: + path_release(&nd); + goto out; } /* @@ -319,6 +336,7 @@ struct file *open_exec(const char *name) { struct nameidata nd; + struct inode *inode; struct file *file; int err = 0; @@ -328,14 +346,22 @@ unlock_kernel(); file = ERR_PTR(err); if (!err) { + inode = nd.dentry->d_inode; file = ERR_PTR(-EACCES); - if (S_ISREG(nd.dentry->d_inode->i_mode)) { - int err = permission(nd.dentry->d_inode, MAY_EXEC); + if (!IS_NOEXEC(inode) && S_ISREG(inode->i_mode)) { + int err = permission(inode, MAY_EXEC); file = ERR_PTR(err); if (!err) { lock_kernel(); file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); unlock_kernel(); + if (!IS_ERR(file)) { + err = deny_write_access(file); + if (err) { + fput(file); + file = ERR_PTR(err); + } + } out: return file; } @@ -540,23 +566,13 @@ int prepare_binprm(struct linux_binprm *bprm) { int mode; - int retval,id_change,cap_raised; + int id_change,cap_raised; struct inode * inode = bprm->file->f_dentry->d_inode; mode = inode->i_mode; - if (!S_ISREG(mode)) /* must be regular file */ - return -EACCES; - if (!(mode & 0111)) /* with at least _one_ execute bit set */ + /* Huh? We had already checked for MAY_EXEC, WTF do we check this? */ + if (!(mode & 0111)) /* with at least _one_ execute bit set */ return -EACCES; - if (IS_NOEXEC(inode)) /* FS mustn't be mounted noexec */ - return -EACCES; - if (!inode->i_sb) - return -EACCES; - if ((retval = permission(inode, MAY_EXEC)) != 0) - return retval; - /* better not execute files which are being written to */ - if (atomic_read(&inode->i_writecount) > 0) - return -ETXTBSY; bprm->e_uid = current->euid; bprm->e_gid = current->egid; @@ -728,6 +744,7 @@ char * dynloader[] = { "/sbin/loader" }; struct file * file; + allow_write_access(bprm->file); fput(bprm->file); bprm->file = NULL; @@ -761,6 +778,7 @@ retval = fn(bprm, regs); if (retval >= 0) { put_binfmt(fmt); + allow_write_access(bprm->file); if (bprm->file) fput(bprm->file); bprm->file = NULL; @@ -822,11 +840,13 @@ bprm.loader = 0; bprm.exec = 0; if ((bprm.argc = count(argv, bprm.p / sizeof(void *))) < 0) { + allow_write_access(file); fput(file); return bprm.argc; } if ((bprm.envc = count(envp, bprm.p / sizeof(void *))) < 0) { + allow_write_access(file); fput(file); return bprm.envc; } @@ -855,6 +875,7 @@ out: /* Something went wrong, return the inode and free the argument pages*/ + allow_write_access(bprm.file); if (bprm.file) fput(bprm.file); diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/ext2/balloc.c linux.ac/fs/ext2/balloc.c --- linux.t2/fs/ext2/balloc.c Sat Jun 24 13:36:11 2000 +++ linux.ac/fs/ext2/balloc.c Tue May 30 14:38:13 2000 @@ -473,11 +473,8 @@ if (i >= sb->u.ext2_sb.s_groups_count) i = 0; gdp = ext2_get_group_desc (sb, i, &bh2); - if (!gdp) { - *err = -EIO; - unlock_super (sb); - return 0; - } + if (!gdp) + goto io_error; if (le16_to_cpu(gdp->bg_free_blocks_count) > 0) break; } diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/ext2/dir.c linux.ac/fs/ext2/dir.c --- linux.t2/fs/ext2/dir.c Sat Jun 24 13:36:11 2000 +++ linux.ac/fs/ext2/dir.c Sat Jun 10 21:51:07 2000 @@ -26,7 +26,7 @@ read: generic_read_dir, readdir: ext2_readdir, ioctl: ext2_ioctl, - fsync: ext2_sync_file, + fsync: ext2_fsync_file, }; int ext2_check_dir_entry (const char * function, struct inode * dir, diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/ext2/file.c linux.ac/fs/ext2/file.c --- linux.t2/fs/ext2/file.c Sat Jun 24 13:36:11 2000 +++ linux.ac/fs/ext2/file.c Sat Jun 10 21:51:07 2000 @@ -91,6 +91,7 @@ return 0; } + /* * We have mostly NULL's here: the current defaults are ok for * the ext2 filesystem. @@ -103,7 +104,7 @@ mmap: generic_file_mmap, open: ext2_open_file, release: ext2_release_file, - fsync: ext2_sync_file, + fsync: ext2_fsync_file, }; struct inode_operations ext2_file_inode_operations = { diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/ext2/fsync.c linux.ac/fs/ext2/fsync.c --- linux.t2/fs/ext2/fsync.c Sat Jun 24 13:36:11 2000 +++ linux.ac/fs/ext2/fsync.c Sat Jun 10 21:51:07 2000 @@ -27,131 +27,28 @@ #include -#define blocksize (EXT2_BLOCK_SIZE(inode->i_sb)) -#define addr_per_block (EXT2_ADDR_PER_BLOCK(inode->i_sb)) - -static int sync_indirect(struct inode * inode, u32 * block, int wait) -{ - struct buffer_head * bh; - - if (!*block) - return 0; - bh = get_hash_table(inode->i_dev, le32_to_cpu(*block), blocksize); - if (!bh) - return 0; - if (wait && buffer_req(bh) && !buffer_uptodate(bh)) { - /* There can be a parallell read(2) that started read-I/O - on the buffer so we can't assume that there's been - an I/O error without first waiting I/O completation. */ - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) - { - brelse (bh); - return -1; - } - } - if (wait || !buffer_uptodate(bh) || !buffer_dirty(bh)) { - if (wait) - /* when we return from fsync all the blocks - must be _just_ stored on disk */ - wait_on_buffer(bh); - brelse(bh); - return 0; - } - ll_rw_block(WRITE, 1, &bh); - atomic_dec(&bh->b_count); - return 0; -} - -static int sync_iblock(struct inode * inode, u32 * iblock, - struct buffer_head ** bh, int wait) -{ - int rc, tmp; - - *bh = NULL; - tmp = le32_to_cpu(*iblock); - if (!tmp) - return 0; - rc = sync_indirect(inode, iblock, wait); - if (rc) - return rc; - *bh = bread(inode->i_dev, tmp, blocksize); - if (!*bh) - return -1; - return 0; -} - -static int sync_dindirect(struct inode * inode, u32 * diblock, int wait) -{ - int i; - struct buffer_head * dind_bh; - int rc, err = 0; - - rc = sync_iblock(inode, diblock, &dind_bh, wait); - if (rc || !dind_bh) - return rc; - - for (i = 0; i < addr_per_block; i++) { - rc = sync_indirect(inode, ((u32 *) dind_bh->b_data) + i, wait); - if (rc) - err = rc; - } - brelse(dind_bh); - return err; -} - -static int sync_tindirect(struct inode * inode, u32 * tiblock, int wait) -{ - int i; - struct buffer_head * tind_bh; - int rc, err = 0; - - rc = sync_iblock(inode, tiblock, &tind_bh, wait); - if (rc || !tind_bh) - return rc; - - for (i = 0; i < addr_per_block; i++) { - rc = sync_dindirect(inode, ((u32 *) tind_bh->b_data) + i, wait); - if (rc) - err = rc; - } - brelse(tind_bh); - return err; -} - /* * File may be NULL when we are called. Perhaps we shouldn't * even pass file to fsync ? */ -int ext2_sync_file(struct file * file, struct dentry *dentry) +int ext2_fsync_file(struct file * file, struct dentry *dentry, int datasync) { - int wait, err = 0; struct inode *inode = dentry->d_inode; + return ext2_fsync_inode(inode, datasync); +} - lock_kernel(); - if (S_ISLNK(inode->i_mode) && !(inode->i_blocks)) - /* - * Don't sync fast links! - */ - goto skip; - - err = generic_buffer_fdatasync(inode, 0, ~0UL); - - for (wait=0; wait<=1; wait++) - { - err |= sync_indirect(inode, - inode->u.ext2_i.i_data+EXT2_IND_BLOCK, - wait); - err |= sync_dindirect(inode, - inode->u.ext2_i.i_data+EXT2_DIND_BLOCK, - wait); - err |= sync_tindirect(inode, - inode->u.ext2_i.i_data+EXT2_TIND_BLOCK, - wait); - } -skip: - err |= ext2_sync_inode (inode); - unlock_kernel(); +int ext2_fsync_inode(struct inode *inode, int datasync) +{ + int err; + + err = fsync_inode_buffers(inode); + if (!(inode->i_state & I_DIRTY)) + return err; + if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) + return err; + + err |= ext2_sync_inode(inode); return err ? -EIO : 0; } + diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/ext2/ialloc.c linux.ac/fs/ext2/ialloc.c --- linux.t2/fs/ext2/ialloc.c Sat Jun 24 13:50:01 2000 +++ linux.ac/fs/ext2/ialloc.c Sat Jun 10 22:18:33 2000 @@ -287,7 +287,6 @@ repeat: gdp = NULL; i=0; - *err = -ENOSPC; if (S_ISDIR(mode)) { avefreei = le32_to_cpu(es->s_free_inodes_count) / sb->u.ext2_sb.s_groups_count; @@ -369,6 +368,7 @@ if (!gdp) { unlock_super (sb); iput(inode); + *err = -ENOSPC; return NULL; } bitmap_nr = load_inode_bitmap (sb, i); @@ -398,9 +398,8 @@ ext2_error (sb, "ext2_new_inode", "Free inodes count corrupted in group %d", i); - unlock_super (sb); - iput (inode); - return NULL; + /* If we continue recover from this case */ + gdp->bg_free_inodes_count = 0; } goto repeat; } @@ -411,6 +410,7 @@ "block_group = %d,inode=%d", i, j); unlock_super (sb); iput (inode); + *err = EIO; /* Should never happen */ return NULL; } gdp->bg_free_inodes_count = diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/ext2/inode.c linux.ac/fs/ext2/inode.c --- linux.t2/fs/ext2/inode.c Sat Jun 24 13:36:11 2000 +++ linux.ac/fs/ext2/inode.c Sat Jun 10 21:51:07 2000 @@ -117,7 +117,7 @@ inode->u.ext2_i.i_prealloc_count--; ext2_debug ("preallocation hit (%lu/%lu).\n", ++alloc_hits, ++alloc_attempts); - + *err = 0; } else { ext2_discard_prealloc (inode); ext2_debug ("preallocation miss (%lu/%lu).\n", @@ -200,6 +200,7 @@ return ret; } +/* returns NULL and sets *err on error */ static struct buffer_head * inode_getblk (struct inode * inode, int nr, int new_block, int * err, int metadata, long *phys, int *new) { @@ -223,7 +224,6 @@ return NULL; } } - *err = -EFBIG; /* Check file limits.. */ { @@ -311,7 +311,7 @@ * can fail due to: - not present * - out of space * - * NULL return in the data case is mandatory. + * NULL return in the data case, or an error, is mandatory. */ static struct buffer_head * block_getblk (struct inode * inode, struct buffer_head * bh, int nr, @@ -341,6 +341,7 @@ if (tmp == le32_to_cpu(*p)) goto out; brelse (result); + result = NULL; goto repeat; } else { *phys = tmp; @@ -402,11 +403,9 @@ *new = 1; } *p = le32_to_cpu(tmp); - mark_buffer_dirty(bh, 1); - if (IS_SYNC(inode) || inode->u.ext2_i.i_osync) { + mark_buffer_dirty_inode(bh, 1, inode); + if (IS_SYNC(inode) || inode->u.ext2_i.i_osync) ll_rw_block (WRITE, 1, &bh); - wait_on_buffer (bh); - } inode->i_ctime = CURRENT_TIME; inode->i_blocks += blocksize/512; mark_inode_dirty(inode); @@ -487,9 +486,9 @@ #define GET_INODE_PTR(x) \ inode_getblk(inode, x, iblock, &err, 1, NULL, NULL) #define GET_INDIRECT_DATABLOCK(x) \ - block_getblk (inode, bh, x, iblock, &err, 0, &phys, &new); + block_getblk (inode, bh, x, iblock, &err, 0, &phys, &new) #define GET_INDIRECT_PTR(x) \ - block_getblk (inode, bh, x, iblock, &err, 1, NULL, NULL); + block_getblk (inode, bh, x, iblock, &err, 1, NULL, NULL) if (ptr < direct_blocks) { bh = GET_INODE_DATABLOCK(ptr); @@ -547,13 +546,11 @@ struct buffer_head * ext2_getblk(struct inode * inode, long block, int create, int * err) { struct buffer_head dummy; - int error; dummy.b_state = 0; dummy.b_blocknr = -1000; - error = ext2_get_block(inode, block, &dummy, create); - *err = error; - if (!error && buffer_mapped(&dummy)) { + *err = ext2_get_block(inode, block, &dummy, create); + if (!*err && buffer_mapped(&dummy)) { struct buffer_head *bh; bh = getblk(dummy.b_dev, dummy.b_blocknr, inode->i_sb->s_blocksize); if (buffer_new(&dummy)) { @@ -881,8 +878,23 @@ raw_inode->i_file_acl = cpu_to_le32(inode->u.ext2_i.i_file_acl); if (S_ISDIR(inode->i_mode)) raw_inode->i_dir_acl = cpu_to_le32(inode->u.ext2_i.i_dir_acl); - else + else { raw_inode->i_size_high = cpu_to_le32(inode->i_size >> 32); + if (inode->i_size >> 31) { + struct super_block *sb = inode->i_sb; + struct ext2_super_block *es = sb->u.ext2_sb.s_es; + if (!(es->s_feature_ro_compat & cpu_to_le32(EXT2_FEATURE_RO_COMPAT_LARGE_FILE))) { + /* If this is the first large file + * created, add a flag to the superblock + * SMP Note: we're currently protected by the + * big kernel lock here, so this will need + * to be changed if that's no longer true. + */ + es->s_feature_ro_compat |= cpu_to_le32(EXT2_FEATURE_RO_COMPAT_LARGE_FILE); + ext2_write_super(sb); + } + } + } raw_inode->i_generation = cpu_to_le32(inode->i_generation); if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) @@ -904,10 +916,10 @@ return err; } -void ext2_write_inode (struct inode * inode) +void ext2_write_inode (struct inode * inode, int wait) { lock_kernel(); - ext2_update_inode (inode, 0); + ext2_update_inode (inode, wait); unlock_kernel(); } diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/ext2/namei.c linux.ac/fs/ext2/namei.c --- linux.t2/fs/ext2/namei.c Sat Jun 24 13:50:01 2000 +++ linux.ac/fs/ext2/namei.c Sat Jun 10 22:18:33 2000 @@ -366,12 +366,9 @@ struct inode * inode; int err; - /* - * N.B. Several error exits in ext2_new_inode don't set err. - */ inode = ext2_new_inode (dir, mode, &err); if (!inode) - return -EIO; + return err; inode->i_op = &ext2_file_inode_operations; inode->i_fop = &ext2_file_operations; @@ -397,7 +394,7 @@ inode = ext2_new_inode (dir, mode, &err); if (!inode) - return -EIO; + return err; inode->i_uid = current->fsuid; init_special_inode(inode, mode, rdev); @@ -428,7 +425,7 @@ inode = ext2_new_inode (dir, S_IFDIR, &err); if (!inode) - return -EIO; + return err; inode->i_op = &ext2_dir_inode_operations; inode->i_fop = &ext2_dir_operations; @@ -454,7 +451,7 @@ strcpy (de->name, ".."); ext2_set_de_type(dir->i_sb, de, S_IFDIR); inode->i_nlink = 2; - mark_buffer_dirty(dir_block, 1); + mark_buffer_dirty_inode(dir_block, 1, dir); brelse (dir_block); inode->i_mode = S_IFDIR | mode; if (dir->i_mode & S_ISGID) @@ -634,7 +631,7 @@ return -ENAMETOOLONG; if (!(inode = ext2_new_inode (dir, S_IFLNK, &err))) - return -EIO; + return err; inode->i_mode = S_IFLNK | S_IRWXUGO; @@ -791,7 +788,7 @@ mark_inode_dirty(old_dir); if (dir_bh) { PARENT_INO(dir_bh->b_data) = le32_to_cpu(new_dir->i_ino); - mark_buffer_dirty(dir_bh, 1); + mark_buffer_dirty_inode(dir_bh, 1, old_inode); old_dir->i_nlink--; mark_inode_dirty(old_dir); if (new_inode) { diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/ext2/super.c linux.ac/fs/ext2/super.c --- linux.t2/fs/ext2/super.c Sat Jun 24 13:36:11 2000 +++ linux.ac/fs/ext2/super.c Mon Jun 12 00:08:08 2000 @@ -593,7 +593,6 @@ /* * set up enough so that it can read an inode */ - sb->s_dev = dev; sb->s_op = &ext2_sops; sb->s_root = d_alloc_root(iget(sb, EXT2_ROOT_INO)); if (!sb->s_root) { diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/ext2/truncate.c linux.ac/fs/ext2/truncate.c --- linux.t2/fs/ext2/truncate.c Sat Jun 24 13:36:11 2000 +++ linux.ac/fs/ext2/truncate.c Sat Jun 10 21:51:07 2000 @@ -211,7 +211,7 @@ inode->i_ino, tmp); *p = 0; if (dind_bh) - mark_buffer_dirty(dind_bh, 1); + mark_buffer_dirty_inode(dind_bh, 1, inode); else mark_inode_dirty(inode); return 0; @@ -279,7 +279,7 @@ inode->i_ino, tmp); *p = 0; if (tind_bh) - mark_buffer_dirty(tind_bh, 1); + mark_buffer_dirty_inode(tind_bh, 1, inode); else mark_inode_dirty(inode); return 0; diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/fat/inode.c linux.ac/fs/fat/inode.c --- linux.t2/fs/fat/inode.c Sat Jun 24 13:36:11 2000 +++ linux.ac/fs/fat/inode.c Sun Jun 11 15:28:21 2000 @@ -837,7 +837,7 @@ MSDOS_I(inode)->i_ctime_ms = de->ctime_ms; } -void fat_write_inode(struct inode *inode) +void fat_write_inode(struct inode *inode, int unused) { struct super_block *sb = inode->i_sb; struct buffer_head *bh; diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/fcntl.c linux.ac/fs/fcntl.c --- linux.t2/fs/fcntl.c Sat Jun 24 13:50:01 2000 +++ linux.ac/fs/fcntl.c Mon Jun 12 00:08:08 2000 @@ -252,8 +252,8 @@ err = sock_fcntl (filp, cmd, arg); break; } - fput(filp); unlock_kernel(); + fput(filp); out: return err; } diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/file_table.c linux.ac/fs/file_table.c --- linux.t2/fs/file_table.c Sat Jun 24 13:50:01 2000 +++ linux.ac/fs/file_table.c Thu Jun 22 17:24:05 2000 @@ -100,7 +100,8 @@ /* * Clear and initialize a (private) struct file for the given dentry, * and call the open function (if any). The caller must verify that - * inode->i_fop is not NULL. + * inode->i_fop is not NULL. The only user is nfsfh.c and this function + * will eventually go away. */ int init_private_file(struct file *filp, struct dentry *dentry, int mode) { diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/filesystems.c linux.ac/fs/filesystems.c --- linux.t2/fs/filesystems.c Sat Jun 24 13:50:01 2000 +++ linux.ac/fs/filesystems.c Tue May 16 22:56:45 2000 @@ -52,7 +52,7 @@ #ifdef CONFIG_NFSD_MODULE int (*do_nfsservctl)(int, void *, void *); #endif -long +int asmlinkage sys_nfsservctl(int cmd, void *argp, void *resp) { #ifndef CONFIG_NFSD_MODULE diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/hpfs/file.c linux.ac/fs/hpfs/file.c --- linux.t2/fs/hpfs/file.c Sat Jun 24 13:36:11 2000 +++ linux.ac/fs/hpfs/file.c Sun Jun 11 15:28:50 2000 @@ -56,7 +56,7 @@ i->i_blocks = 1 + ((i->i_size + 511) >> 9); i->u.hpfs_i.mmu_private = i->i_size; hpfs_truncate_btree(i->i_sb, i->i_ino, 1, ((i->i_size + 511) >> 9)); - hpfs_write_inode(i); + hpfs_write_inode(i, 0); } int hpfs_get_block(struct inode *inode, long iblock, struct buffer_head *bh_result, int create) diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/hpfs/hpfs_fn.h linux.ac/fs/hpfs/hpfs_fn.h --- linux.t2/fs/hpfs/hpfs_fn.h Sat Jun 24 13:36:11 2000 +++ linux.ac/fs/hpfs/hpfs_fn.h Sat Jun 24 14:22:42 2000 @@ -266,7 +266,7 @@ void hpfs_read_inode(struct inode *); void hpfs_write_inode_ea(struct inode *, struct fnode *); -void hpfs_write_inode(struct inode *); +void hpfs_write_inode(struct inode *, int); void hpfs_write_inode_nolock(struct inode *); int hpfs_notify_change(struct dentry *, struct iattr *); void hpfs_write_if_changed(struct inode *); diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/hpfs/inode.c linux.ac/fs/hpfs/inode.c --- linux.t2/fs/hpfs/inode.c Sat Jun 24 13:50:01 2000 +++ linux.ac/fs/hpfs/inode.c Sun Jun 11 15:28:50 2000 @@ -228,7 +228,7 @@ } } -void hpfs_write_inode(struct inode *i) +void hpfs_write_inode(struct inode *i, int unused) { struct inode *parent; if (!i->i_nlink) return; @@ -300,14 +300,14 @@ if (inode->i_sb->s_hpfs_root == inode->i_ino) return -EINVAL; if ((error = inode_change_ok(inode, attr))) return error; inode_setattr(inode, attr); - hpfs_write_inode(inode); + hpfs_write_inode(inode, 0); return 0; } void hpfs_write_if_changed(struct inode *inode) { if (inode->i_hpfs_dirty) { - hpfs_write_inode(inode); + hpfs_write_inode(inode, 0); } } diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/hpfs/namei.c linux.ac/fs/hpfs/namei.c --- linux.t2/fs/hpfs/namei.c Sat Jun 24 13:36:11 2000 +++ linux.ac/fs/hpfs/namei.c Mon Jun 12 00:59:50 2000 @@ -330,7 +330,15 @@ struct iattr newattrs; int err; hpfs_unlock_2inodes(dir, inode); - if (rep || dentry->d_count > 1 || permission(inode, MAY_WRITE) || get_write_access(inode)) goto ret; + if (rep) + goto ret; + d_drop(dentry); + if (dentry->d_count > 1 || + permission(inode, MAY_WRITE) || + get_write_access(inode)) { + d_rehash(dentry); + goto ret; + } /*printk("HPFS: truncating file before delete.\n");*/ down(&inode->i_sem); newattrs.ia_size = 0; diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/inode.c linux.ac/fs/inode.c --- linux.t2/fs/inode.c Sat Jun 24 13:50:01 2000 +++ linux.ac/fs/inode.c Mon Jun 19 19:54:42 2000 @@ -96,6 +96,7 @@ INIT_LIST_HEAD(&inode->i_hash); INIT_LIST_HEAD(&inode->i_data.pages); INIT_LIST_HEAD(&inode->i_dentry); + INIT_LIST_HEAD(&inode->i_dirty_buffers); sema_init(&inode->i_sem, 1); sema_init(&inode->i_zombie, 1); spin_lock_init(&inode->i_data.i_shared_lock); @@ -122,14 +123,14 @@ * Mark an inode as dirty. Callers should use mark_inode_dirty. */ -void __mark_inode_dirty(struct inode *inode) +void __mark_inode_dirty(struct inode *inode, int flags) { struct super_block * sb = inode->i_sb; if (sb) { spin_lock(&inode_lock); - if (!(inode->i_state & I_DIRTY)) { - inode->i_state |= I_DIRTY; + if ((inode->i_state & flags) != flags) { + inode->i_state |= flags; /* Only add valid (ie hashed) inodes to the dirty list */ if (!list_empty(&inode->i_hash)) { list_del(&inode->i_list); @@ -162,10 +163,10 @@ } -static inline void write_inode(struct inode *inode) +static inline void write_inode(struct inode *inode, int wait) { if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->write_inode) - inode->i_sb->s_op->write_inode(inode); + inode->i_sb->s_op->write_inode(inode, wait); } static inline void __iget(struct inode * inode) @@ -182,7 +183,7 @@ inodes_stat.nr_unused--; } -static inline void sync_one(struct inode *inode) +static inline void sync_one(struct inode *inode, int wait) { if (inode->i_state & I_LOCK) { __iget(inode); @@ -196,10 +197,11 @@ ? &inode_in_use : &inode_unused); /* Set I_LOCK, reset I_DIRTY */ - inode->i_state ^= I_DIRTY | I_LOCK; + inode->i_state |= I_LOCK; + inode->i_state &= ~I_DIRTY; spin_unlock(&inode_lock); - write_inode(inode); + write_inode(inode, wait); spin_lock(&inode_lock); inode->i_state &= ~I_LOCK; @@ -212,7 +214,7 @@ struct list_head * tmp; while ((tmp = head->prev) != head) - sync_one(list_entry(tmp, struct inode, i_list)); + sync_one(list_entry(tmp, struct inode, i_list), 0); } /** @@ -245,6 +247,7 @@ spin_unlock(&inode_lock); } + /* * Called with the spinlock already held.. */ @@ -261,19 +264,20 @@ /** * write_inode_now - write an inode to disk * @inode: inode to write to disk + * @wait: if set, we wait for the write to complete on disk * * This function commits an inode to disk immediately if it is * dirty. This is primarily needed by knfsd. */ -void write_inode_now(struct inode *inode) +void write_inode_now(struct inode *inode, int wait) { struct super_block * sb = inode->i_sb; if (sb) { spin_lock(&inode_lock); while (inode->i_state & I_DIRTY) - sync_one(inode); + sync_one(inode, wait); spin_unlock(&inode_lock); } else @@ -281,6 +285,60 @@ } /** + * generic_osync_inode - flush all dirty data for a given inode to disk + * @inode: inode to write + * @datasync: if set, don't bother flushing timestamps + * + * This is called by generic_file_write for files which have the O_SYNC + * flag set, to flush dirty writes to disk. + */ + +int generic_osync_inode(struct inode *inode, int datasync) +{ + int err; + + /* + * WARNING + * + * Currently, the filesystem write path does not pass the + * filp down to the low-level write functions. Therefore it + * is impossible for (say) __block_commit_write to know if + * the operation is O_SYNC or not. + * + * Ideally, O_SYNC writes would have the filesystem call + * ll_rw_block as it went to kick-start the writes, and we + * could call osync_inode_buffers() here to wait only for + * those IOs which have already been submitted to the device + * driver layer. As it stands, if we did this we'd not write + * anything to disk since our writes have not been queued by + * this point: they are still on the dirty LRU. + * + * So, currently we will call fsync_inode_buffers() instead, + * to flush _all_ dirty buffers for this inode to disk on + * every O_SYNC write, not just the synchronous I/Os. --sct + */ + +#ifdef WRITERS_QUEUE_IO + err = osync_inode_buffers(inode); +#else + err = fsync_inode_buffers(inode); +#endif + + spin_lock(&inode_lock); + if (!(inode->i_state & I_DIRTY)) + goto out; + if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) + goto out; + spin_unlock(&inode_lock); + write_inode_now(inode, 1); + return err; + + out: + spin_unlock(&inode_lock); + return err; +} + +/** * clear_inode - clear an inode * @inode: inode to clear * @@ -324,7 +382,7 @@ inode = list_entry(inode_entry, struct inode, i_list); if (inode->i_data.nrpages) - truncate_inode_pages(&inode->i_data, 0); + truncate_all_inode_pages(&inode->i_data); clear_inode(inode); destroy_inode(inode); } @@ -349,6 +407,7 @@ inode = list_entry(tmp, struct inode, i_list); if (inode->i_sb != sb) continue; + invalidate_inode_buffers(inode); if (!atomic_read(&inode->i_count)) { list_del(&inode->i_hash); INIT_LIST_HEAD(&inode->i_hash); @@ -410,7 +469,8 @@ * dispose_list. */ #define CAN_UNUSE(inode) \ - (((inode)->i_state | (inode)->i_data.nrpages) == 0) + ((((inode)->i_state | (inode)->i_data.nrpages) == 0) && \ + !inode_has_buffers(inode)) #define INODE(entry) (list_entry(entry, struct inode, i_list)) void prune_icache(int goal) @@ -770,7 +830,7 @@ spin_unlock(&inode_lock); if (inode->i_data.nrpages) - truncate_inode_pages(&inode->i_data, 0); + truncate_all_inode_pages(&inode->i_data); destroy = 1; if (op && op->delete_inode) { @@ -925,7 +985,7 @@ if ( IS_NODIRATIME (inode) && S_ISDIR (inode->i_mode) ) return; if ( IS_RDONLY (inode) ) return; inode->i_atime = CURRENT_TIME; - mark_inode_dirty (inode); + mark_inode_dirty_sync (inode); } /* End Function update_atime */ diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/ioctl.c linux.ac/fs/ioctl.c --- linux.t2/fs/ioctl.c Sat Jun 24 13:50:01 2000 +++ linux.ac/fs/ioctl.c Mon Jun 12 00:08:08 2000 @@ -107,8 +107,8 @@ else if (filp->f_op && filp->f_op->ioctl) error = filp->f_op->ioctl(filp->f_dentry->d_inode, filp, cmd, arg); } - fput(filp); unlock_kernel(); + fput(filp); out: return error; diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/lockd/clntlock.c linux.ac/fs/lockd/clntlock.c --- linux.t2/fs/lockd/clntlock.c Sat Jun 24 13:36:11 2000 +++ linux.ac/fs/lockd/clntlock.c Sun Jun 11 15:19:33 2000 @@ -162,8 +162,7 @@ { struct nlm_host *host = (struct nlm_host *) ptr; struct nlm_wait *block; - struct file_lock *fl; - struct inode *inode; + struct list_head *tmp; /* This one ensures that our parent doesn't terminate while the * reclaim is in progress */ @@ -171,19 +170,21 @@ lockd_up(); /* First, reclaim all locks that have been granted previously. */ - do { - for (fl = file_lock_table; fl; fl = fl->fl_nextlink) { - inode = fl->fl_file->f_dentry->d_inode; - if (inode->i_sb->s_magic == NFS_SUPER_MAGIC - && nlm_cmp_addr(NFS_ADDR(inode), &host->h_addr) - && fl->fl_u.nfs_fl.state != host->h_state - && (fl->fl_u.nfs_fl.flags & NFS_LCK_GRANTED)) { - fl->fl_u.nfs_fl.flags &= ~ NFS_LCK_GRANTED; - nlmclnt_reclaim(host, fl); - break; - } +restart: + tmp = file_lock_list.next; + while (tmp != &file_lock_list) { + struct file_lock *fl = list_entry(tmp, struct file_lock, fl_link); + struct inode *inode = fl->fl_file->f_dentry->d_inode; + if (inode->i_sb->s_magic == NFS_SUPER_MAGIC && + nlm_cmp_addr(NFS_ADDR(inode), &host->h_addr) && + fl->fl_u.nfs_fl.state != host->h_state && + (fl->fl_u.nfs_fl.flags & NFS_LCK_GRANTED)) { + fl->fl_u.nfs_fl.flags &= ~ NFS_LCK_GRANTED; + nlmclnt_reclaim(host, fl); + goto restart; } - } while (fl); + tmp = tmp->next; + } host->h_reclaiming = 0; wake_up(&host->h_gracewait); diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/lockd/svclock.c linux.ac/fs/lockd/svclock.c --- linux.t2/fs/lockd/svclock.c Sat Jun 24 13:36:11 2000 +++ linux.ac/fs/lockd/svclock.c Sun Jun 11 15:19:33 2000 @@ -347,7 +347,7 @@ /* Append to list of blocked */ nlmsvc_insert_block(block, NLM_NEVER); - if (!block->b_call.a_args.lock.fl.fl_prevblock) { + if (!list_empty(&block->b_call.a_args.lock.fl.fl_block)) { /* Now add block to block list of the conflicting lock if we haven't done so. */ dprintk("lockd: blocking on this lock.\n"); diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/locks.c linux.ac/fs/locks.c --- linux.t2/fs/locks.c Sat Jun 24 13:36:10 2000 +++ linux.ac/fs/locks.c Wed Jun 14 17:45:23 2000 @@ -108,530 +108,98 @@ #include #include #include +#include #include -static int flock_make_lock(struct file *filp, struct file_lock *fl, - unsigned int cmd); -static int posix_make_lock(struct file *filp, struct file_lock *fl, - struct flock *l); -static int flock_locks_conflict(struct file_lock *caller_fl, - struct file_lock *sys_fl); -static int posix_locks_conflict(struct file_lock *caller_fl, - struct file_lock *sys_fl); -static int locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl); -static int flock_lock_file(struct file *filp, struct file_lock *caller, - unsigned int wait); -static int posix_locks_deadlock(struct file_lock *caller, - struct file_lock *blocker); - -static struct file_lock *locks_empty_lock(void); -static struct file_lock *locks_init_lock(struct file_lock *, - struct file_lock *); -static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl); -static void locks_delete_lock(struct file_lock **thisfl_p, unsigned int wait); -static void lock_get_status(char* out, struct file_lock *fl, int id, char *pfx); - -static void locks_insert_block(struct file_lock *blocker, struct file_lock *waiter); -static void locks_delete_block(struct file_lock *blocker, struct file_lock *waiter); -static void locks_wake_up_blocks(struct file_lock *blocker, unsigned int wait); - -struct file_lock *file_lock_table = NULL; - -/* Allocate a new lock, and initialize its fields from fl. - * The lock is not inserted into any lists until locks_insert_lock() or - * locks_insert_block() are called. - */ -static inline struct file_lock *locks_alloc_lock(struct file_lock *fl) -{ - return locks_init_lock(locks_empty_lock(), fl); -} +LIST_HEAD(file_lock_list); +static LIST_HEAD(blocked_list); -/* Free lock not inserted in any queue. - */ -static inline void locks_free_lock(struct file_lock *fl) -{ - if (waitqueue_active(&fl->fl_wait)) - panic("Attempting to free lock with active wait queue"); - - if (fl->fl_nextblock != NULL || fl->fl_prevblock != NULL) - panic("Attempting to free lock with active block list"); - - kfree(fl); - return; -} - -/* Check if two locks overlap each other. - */ -static inline int locks_overlap(struct file_lock *fl1, struct file_lock *fl2) -{ - return ((fl1->fl_end >= fl2->fl_start) && - (fl2->fl_end >= fl1->fl_start)); -} - -/* - * Check whether two locks have the same owner - * N.B. Do we need the test on PID as well as owner? - * (Clone tasks should be considered as one "owner".) - */ -static inline int -locks_same_owner(struct file_lock *fl1, struct file_lock *fl2) -{ - return (fl1->fl_owner == fl2->fl_owner) && - (fl1->fl_pid == fl2->fl_pid); -} +static kmem_cache_t *filelock_cache; -/* Insert waiter into blocker's block list. - * We use a circular list so that processes can be easily woken up in - * the order they blocked. The documentation doesn't require this but - * it seems like the reasonable thing to do. - */ -static void locks_insert_block(struct file_lock *blocker, - struct file_lock *waiter) +/* Allocate an empty lock structure. */ +static struct file_lock *locks_alloc_lock(void) { - struct file_lock *prevblock; - - if (waiter->fl_prevblock) { - printk(KERN_ERR "locks_insert_block: remove duplicated lock " - "(pid=%d %Ld-%Ld type=%d)\n", - waiter->fl_pid, (long long)waiter->fl_start, - (long long)waiter->fl_end, waiter->fl_type); - locks_delete_block(waiter->fl_prevblock, waiter); - } - - if (blocker->fl_prevblock == NULL) - /* No previous waiters - list is empty */ - prevblock = blocker; - else - /* Previous waiters exist - add to end of list */ - prevblock = blocker->fl_prevblock; - - prevblock->fl_nextblock = waiter; - blocker->fl_prevblock = waiter; - waiter->fl_nextblock = blocker; - waiter->fl_prevblock = prevblock; - - return; + struct file_lock *fl; + fl = kmem_cache_alloc(filelock_cache, SLAB_KERNEL); + return fl; } -/* Remove waiter from blocker's block list. - * When blocker ends up pointing to itself then the list is empty. - */ -static void locks_delete_block(struct file_lock *blocker, - struct file_lock *waiter) +/* Free a lock which is not in use. */ +static inline void locks_free_lock(struct file_lock *fl) { - struct file_lock *nextblock; - struct file_lock *prevblock; - - nextblock = waiter->fl_nextblock; - prevblock = waiter->fl_prevblock; - - if (nextblock == NULL) + if (fl == NULL) { + BUG(); return; - - nextblock->fl_prevblock = prevblock; - prevblock->fl_nextblock = nextblock; - - waiter->fl_prevblock = waiter->fl_nextblock = NULL; - if (blocker->fl_nextblock == blocker) - /* No more locks on blocker's blocked list */ - blocker->fl_prevblock = blocker->fl_nextblock = NULL; - return; -} - -/* The following two are for the benefit of lockd. - */ -void -posix_block_lock(struct file_lock *blocker, struct file_lock *waiter) -{ - locks_insert_block(blocker, waiter); - return; -} - -void -posix_unblock_lock(struct file_lock *waiter) -{ - if (waiter->fl_prevblock) - locks_delete_block(waiter->fl_prevblock, waiter); - return; -} - -/* Wake up processes blocked waiting for blocker. - * If told to wait then schedule the processes until the block list - * is empty, otherwise empty the block list ourselves. - */ -static void locks_wake_up_blocks(struct file_lock *blocker, unsigned int wait) -{ - struct file_lock *waiter; - - while ((waiter = blocker->fl_nextblock) != NULL) { - /* N.B. Is it possible for the notify function to block?? */ - if (waiter->fl_notify) - waiter->fl_notify(waiter); - wake_up(&waiter->fl_wait); - if (wait) { - /* Let the blocked process remove waiter from the - * block list when it gets scheduled. - */ - current->policy |= SCHED_YIELD; - schedule(); - } else { - /* Remove waiter from the block list, because by the - * time it wakes up blocker won't exist any more. - */ - locks_delete_block(blocker, waiter); - } } - return; -} -/* flock() system call entry point. Apply a FL_FLOCK style lock to - * an open file descriptor. - */ -asmlinkage long sys_flock(unsigned int fd, unsigned int cmd) -{ - struct file_lock file_lock; - struct file *filp; - int error; - - lock_kernel(); - error = -EBADF; - filp = fget(fd); - if (!filp) - goto out; - error = -EINVAL; - if (!flock_make_lock(filp, &file_lock, cmd)) - goto out_putf; - error = -EBADF; - if ((file_lock.fl_type != F_UNLCK) && !(filp->f_mode & 3)) - goto out_putf; - error = flock_lock_file(filp, &file_lock, - (cmd & (LOCK_UN | LOCK_NB)) ? 0 : 1); -out_putf: - fput(filp); -out: - unlock_kernel(); - return (error); -} - -/* Report the first existing lock that would conflict with l. - * This implements the F_GETLK command of fcntl(). - */ -int fcntl_getlk(unsigned int fd, struct flock *l) -{ - struct file *filp; - struct file_lock *fl,file_lock; - struct flock flock; - int error; - - error = -EFAULT; - if (copy_from_user(&flock, l, sizeof(flock))) - goto out; - error = -EINVAL; - if ((flock.l_type != F_RDLCK) && (flock.l_type != F_WRLCK)) - goto out; - - error = -EBADF; - filp = fget(fd); - if (!filp) - goto out; - - if (!posix_make_lock(filp, &file_lock, &flock)) - goto out_putf; - - if (filp->f_op->lock) { - error = filp->f_op->lock(filp, F_GETLK, &file_lock); - if (error < 0) - goto out_putf; - else if (error == LOCK_USE_CLNT) - /* Bypass for NFS with no locking - 2.0.36 compat */ - fl = posix_test_lock(filp, &file_lock); - else - fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock); - } else { - fl = posix_test_lock(filp, &file_lock); - } - - flock.l_type = F_UNLCK; - if (fl != NULL) { - flock.l_pid = fl->fl_pid; - flock.l_start = fl->fl_start; - flock.l_len = fl->fl_end == OFFSET_MAX ? 0 : - fl->fl_end - fl->fl_start + 1; - flock.l_whence = 0; - flock.l_type = fl->fl_type; - } - error = -EFAULT; - if (!copy_to_user(l, &flock, sizeof(flock))) - error = 0; - -out_putf: - fput(filp); -out: - return error; -} - -/* Apply the lock described by l to an open file descriptor. - * This implements both the F_SETLK and F_SETLKW commands of fcntl(). - */ -int fcntl_setlk(unsigned int fd, unsigned int cmd, struct flock *l) -{ - struct file *filp; - struct file_lock file_lock; - struct flock flock; - struct inode *inode; - int error; - - /* - * This might block, so we do it before checking the inode. - */ - error = -EFAULT; - if (copy_from_user(&flock, l, sizeof(flock))) - goto out; - - /* Get arguments and validate them ... - */ - - error = -EBADF; - filp = fget(fd); - if (!filp) - goto out; - - error = -EINVAL; - inode = filp->f_dentry->d_inode; - - /* Don't allow mandatory locks on files that may be memory mapped - * and shared. - */ - if (IS_MANDLOCK(inode) && - (inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) { - struct vm_area_struct *vma; - struct address_space *mapping = inode->i_mapping; - spin_lock(&mapping->i_shared_lock); - for(vma = mapping->i_mmap;vma;vma = vma->vm_next_share) { - if (!(vma->vm_flags & VM_MAYSHARE)) - continue; - spin_unlock(&mapping->i_shared_lock); - error = -EAGAIN; - goto out_putf; - } - spin_unlock(&mapping->i_shared_lock); - } - - error = -EINVAL; - if (!posix_make_lock(filp, &file_lock, &flock)) - goto out_putf; - - error = -EBADF; - switch (flock.l_type) { - case F_RDLCK: - if (!(filp->f_mode & FMODE_READ)) - goto out_putf; - break; - case F_WRLCK: - if (!(filp->f_mode & FMODE_WRITE)) - goto out_putf; - break; - case F_UNLCK: - break; - case F_SHLCK: - case F_EXLCK: -#ifdef __sparc__ -/* warn a bit for now, but don't overdo it */ -{ - static int count = 0; - if (!count) { - count=1; - printk(KERN_WARNING - "fcntl_setlk() called by process %d (%s) with broken flock() emulation\n", - current->pid, current->comm); - } -} - if (!(filp->f_mode & 3)) - goto out_putf; - break; -#endif - default: - error = -EINVAL; - goto out_putf; - } + if (waitqueue_active(&fl->fl_wait)) + panic("Attempting to free lock with active wait queue"); - if (filp->f_op->lock != NULL) { - error = filp->f_op->lock(filp, cmd, &file_lock); - if (error < 0) - goto out_putf; - } - error = posix_lock_file(filp, &file_lock, cmd == F_SETLKW); + if (!list_empty(&fl->fl_block)) + panic("Attempting to free lock with active block list"); -out_putf: - fput(filp); -out: - return error; -} + if (!list_empty(&fl->fl_link)) + panic("Attempting to free lock on active lock list"); -/* - * This function is called when the file is being removed - * from the task's fd array. - */ -void locks_remove_posix(struct file *filp, fl_owner_t owner) -{ - struct inode * inode = filp->f_dentry->d_inode; - struct file_lock file_lock, *fl; - struct file_lock **before; - - /* - * For POSIX locks we free all locks on this file for the given task. - */ -repeat: - before = &inode->i_flock; - while ((fl = *before) != NULL) { - if ((fl->fl_flags & FL_POSIX) && fl->fl_owner == owner) { - int (*lock)(struct file *, int, struct file_lock *); - lock = filp->f_op->lock; - if (lock) { - file_lock = *fl; - file_lock.fl_type = F_UNLCK; - } - locks_delete_lock(before, 0); - if (lock) { - lock(filp, F_SETLK, &file_lock); - /* List may have changed: */ - goto repeat; - } - continue; - } - before = &fl->fl_next; - } + kmem_cache_free(filelock_cache, fl); } /* - * This function is called on the last close of an open file. + * Initialises the fields of the file lock which are invariant for + * free file_locks. */ -void locks_remove_flock(struct file *filp) +static void init_once(void *foo, kmem_cache_t *cache, unsigned long flags) { - struct inode * inode = filp->f_dentry->d_inode; - struct file_lock file_lock, *fl; - struct file_lock **before; + struct file_lock *lock = (struct file_lock *) foo; -repeat: - before = &inode->i_flock; - while ((fl = *before) != NULL) { - if ((fl->fl_flags & FL_FLOCK) && fl->fl_file == filp) { - int (*lock)(struct file *, int, struct file_lock *); - lock = NULL; - if (filp->f_op) - lock = filp->f_op->lock; - if (lock) { - file_lock = *fl; - file_lock.fl_type = F_UNLCK; - } - locks_delete_lock(before, 0); - if (lock) { - lock(filp, F_SETLK, &file_lock); - /* List may have changed: */ - goto repeat; - } - continue; - } - before = &fl->fl_next; - } -} - -struct file_lock * -posix_test_lock(struct file *filp, struct file_lock *fl) -{ - struct file_lock *cfl; - - for (cfl = filp->f_dentry->d_inode->i_flock; cfl; cfl = cfl->fl_next) { - if (!(cfl->fl_flags & FL_POSIX)) - continue; - if (posix_locks_conflict(cfl, fl)) - break; - } - - return (cfl); -} - -int locks_mandatory_locked(struct inode *inode) -{ - fl_owner_t owner = current->files; - struct file_lock *fl; + if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) != + SLAB_CTOR_CONSTRUCTOR) + return; - /* - * Search the lock list for this inode for any POSIX locks. - */ - lock_kernel(); - for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { - if (!(fl->fl_flags & FL_POSIX)) - continue; - if (fl->fl_owner != owner) - break; - } - unlock_kernel(); - return fl ? -EAGAIN : 0; + lock->fl_next = NULL; + INIT_LIST_HEAD(&lock->fl_link); + INIT_LIST_HEAD(&lock->fl_block); + init_waitqueue_head(&lock->fl_wait); } -int locks_mandatory_area(int read_write, struct inode *inode, - struct file *filp, loff_t offset, - size_t count) -{ - struct file_lock *fl; - struct file_lock tfl; - int error; - - memset(&tfl, 0, sizeof(tfl)); - - tfl.fl_file = filp; - tfl.fl_flags = FL_POSIX | FL_ACCESS; - tfl.fl_owner = current->files; - tfl.fl_pid = current->pid; - init_waitqueue_head(&tfl.fl_wait); - tfl.fl_type = (read_write == FLOCK_VERIFY_WRITE) ? F_WRLCK : F_RDLCK; - tfl.fl_start = offset; - tfl.fl_end = offset + count - 1; - - error = 0; - lock_kernel(); - -repeat: - /* Search the lock list for this inode for locks that conflict with - * the proposed read/write. - */ - for (fl = inode->i_flock; ; fl = fl->fl_next) { - error = 0; - if (!fl) - break; - if (!(fl->fl_flags & FL_POSIX)) - continue; - /* Block for writes against a "read" lock, - * and both reads and writes against a "write" lock. - */ - if (posix_locks_conflict(&tfl, fl)) { - error = -EAGAIN; - if (filp && (filp->f_flags & O_NONBLOCK)) - break; - error = -ERESTARTSYS; - if (signal_pending(current)) - break; - error = -EDEADLK; - if (posix_locks_deadlock(&tfl, fl)) - break; - - locks_insert_block(fl, &tfl); - interruptible_sleep_on(&tfl.fl_wait); - locks_delete_block(fl, &tfl); +/* + * Initialize a new lock from an existing file_lock structure. + */ +static void locks_copy_lock(struct file_lock *new, struct file_lock *fl) +{ + new->fl_owner = fl->fl_owner; + new->fl_pid = fl->fl_pid; + new->fl_file = fl->fl_file; + new->fl_flags = fl->fl_flags; + new->fl_type = fl->fl_type; + new->fl_start = fl->fl_start; + new->fl_end = fl->fl_end; + new->fl_notify = fl->fl_notify; + new->fl_insert = fl->fl_insert; + new->fl_remove = fl->fl_remove; + new->fl_u = fl->fl_u; +} + +/* Fill in a file_lock structure with an appropriate FLOCK lock. */ +static struct file_lock *flock_make_lock(struct file *filp, unsigned int type) +{ + struct file_lock *fl = locks_alloc_lock(); + if (fl == NULL) + return NULL; - /* - * If we've been sleeping someone might have - * changed the permissions behind our back. - */ - if ((inode->i_mode & (S_ISGID | S_IXGRP)) != S_ISGID) - break; - goto repeat; - } - } - unlock_kernel(); - return error; + fl->fl_owner = NULL; + fl->fl_file = filp; + fl->fl_pid = current->pid; + fl->fl_flags = FL_FLOCK; + fl->fl_type = type; + fl->fl_start = 0; + fl->fl_end = OFFSET_MAX; + fl->fl_notify = NULL; + fl->fl_insert = NULL; + fl->fl_remove = NULL; + + return fl; } /* Verify a "struct flock" and copy it to a "struct file_lock" as a POSIX @@ -642,21 +210,6 @@ { loff_t start; - memset(fl, 0, sizeof(*fl)); - - init_waitqueue_head(&fl->fl_wait); - fl->fl_flags = FL_POSIX; - - switch (l->l_type) { - case F_RDLCK: - case F_WRLCK: - case F_UNLCK: - fl->fl_type = l->l_type; - break; - default: - return (0); - } - switch (l->l_whence) { case 0: /*SEEK_SET*/ start = 0; @@ -680,44 +233,168 @@ if (l->l_len == 0) fl->fl_end = OFFSET_MAX; - fl->fl_file = filp; fl->fl_owner = current->files; fl->fl_pid = current->pid; + fl->fl_file = filp; + fl->fl_flags = FL_POSIX; + fl->fl_notify = NULL; + fl->fl_insert = NULL; + fl->fl_remove = NULL; + + switch (l->l_type) { + case F_RDLCK: + case F_WRLCK: + case F_UNLCK: + fl->fl_type = l->l_type; + break; + default: + return (0); + } return (1); } -/* Verify a call to flock() and fill in a file_lock structure with - * an appropriate FLOCK lock. +/* Check if two locks overlap each other. */ -static int flock_make_lock(struct file *filp, struct file_lock *fl, - unsigned int cmd) +static inline int locks_overlap(struct file_lock *fl1, struct file_lock *fl2) { - memset(fl, 0, sizeof(*fl)); + return ((fl1->fl_end >= fl2->fl_start) && + (fl2->fl_end >= fl1->fl_start)); +} - init_waitqueue_head(&fl->fl_wait); +/* + * Check whether two locks have the same owner + * N.B. Do we need the test on PID as well as owner? + * (Clone tasks should be considered as one "owner".) + */ +static inline int +locks_same_owner(struct file_lock *fl1, struct file_lock *fl2) +{ + return (fl1->fl_owner == fl2->fl_owner) && + (fl1->fl_pid == fl2->fl_pid); +} - switch (cmd & ~LOCK_NB) { - case LOCK_SH: - fl->fl_type = F_RDLCK; - break; - case LOCK_EX: - fl->fl_type = F_WRLCK; - break; - case LOCK_UN: +/* Remove waiter from blocker's block list. + * When blocker ends up pointing to itself then the list is empty. + */ +static void locks_delete_block(struct file_lock *waiter) +{ + list_del(&waiter->fl_block); + INIT_LIST_HEAD(&waiter->fl_block); + list_del(&waiter->fl_link); + INIT_LIST_HEAD(&waiter->fl_link); +} + +/* Insert waiter into blocker's block list. + * We use a circular list so that processes can be easily woken up in + * the order they blocked. The documentation doesn't require this but + * it seems like the reasonable thing to do. + */ +static void locks_insert_block(struct file_lock *blocker, + struct file_lock *waiter) +{ + if (!list_empty(&waiter->fl_block)) { + printk(KERN_ERR "locks_insert_block: removing duplicated lock " + "(pid=%d %Ld-%Ld type=%d)\n", waiter->fl_pid, + waiter->fl_start, waiter->fl_end, waiter->fl_type); + locks_delete_block(waiter); + } + list_add_tail(&waiter->fl_block, &blocker->fl_block); +// list_add(&waiter->fl_link, &blocked_list); +// waiter->fl_next = blocker; +} + +/* Wake up processes blocked waiting for blocker. + * If told to wait then schedule the processes until the block list + * is empty, otherwise empty the block list ourselves. + */ +static void locks_wake_up_blocks(struct file_lock *blocker, unsigned int wait) +{ + while (!list_empty(&blocker->fl_block)) { + struct file_lock *waiter = list_entry(blocker->fl_block.next, struct file_lock, fl_block); + /* N.B. Is it possible for the notify function to block?? */ + if (waiter->fl_notify) + waiter->fl_notify(waiter); + wake_up(&waiter->fl_wait); + if (wait) { + /* Let the blocked process remove waiter from the + * block list when it gets scheduled. + */ + current->policy |= SCHED_YIELD; + schedule(); + } else { + /* Remove waiter from the block list, because by the + * time it wakes up blocker won't exist any more. + */ + locks_delete_block(waiter); + } + } +} + +/* Insert file lock fl into an inode's lock list at the position indicated + * by pos. At the same time add the lock to the global file lock list. + */ +static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl) +{ + list_add(&fl->fl_link, &file_lock_list); + + /* insert into file's list */ + fl->fl_next = *pos; + *pos = fl; + + if (fl->fl_insert) + fl->fl_insert(fl); +} + +/* Delete a lock and free it. + * First remove our lock from the active lock lists. Then call + * locks_wake_up_blocks() to wake up processes that are blocked + * waiting for this lock. Finally free the lock structure. + */ +static void locks_delete_lock(struct file_lock **thisfl_p, unsigned int wait) +{ + int (*lock)(struct file *, int, struct file_lock *); + struct file_lock *fl = *thisfl_p; + + *thisfl_p = fl->fl_next; + fl->fl_next = NULL; + + list_del(&fl->fl_link); + INIT_LIST_HEAD(&fl->fl_link); + + if (fl->fl_remove) + fl->fl_remove(fl); + + locks_wake_up_blocks(fl, wait); + lock = fl->fl_file->f_op->lock; + if (lock) { fl->fl_type = F_UNLCK; - break; - default: - return (0); + lock(fl->fl_file, F_SETLK, fl); } + locks_free_lock(fl); +} - fl->fl_flags = FL_FLOCK; - fl->fl_start = 0; - fl->fl_end = OFFSET_MAX; - fl->fl_file = filp; - fl->fl_owner = NULL; - - return (1); +/* Determine if lock sys_fl blocks lock caller_fl. Common functionality + * checks for overlapping locks and shared/exclusive status. + */ +static int locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) +{ + if (!locks_overlap(caller_fl, sys_fl)) + return (0); + + switch (caller_fl->fl_type) { + case F_RDLCK: + return (sys_fl->fl_type == F_WRLCK); + + case F_WRLCK: + return (1); + + default: + printk("locks_conflict(): impossible lock type - %d\n", + caller_fl->fl_type); + break; + } + return (0); /* This should never happen */ } /* Determine if lock sys_fl blocks lock caller_fl. POSIX specific @@ -750,27 +427,19 @@ return (locks_conflict(caller_fl, sys_fl)); } -/* Determine if lock sys_fl blocks lock caller_fl. Common functionality - * checks for overlapping locks and shared/exclusive status. - */ -static int locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) +struct file_lock * +posix_test_lock(struct file *filp, struct file_lock *fl) { - if (!locks_overlap(caller_fl, sys_fl)) - return (0); - - switch (caller_fl->fl_type) { - case F_RDLCK: - return (sys_fl->fl_type == F_WRLCK); - - case F_WRLCK: - return (1); + struct file_lock *cfl; - default: - printk("locks_conflict(): impossible lock type - %d\n", - caller_fl->fl_type); - break; + for (cfl = filp->f_dentry->d_inode->i_flock; cfl; cfl = cfl->fl_next) { + if (!(cfl->fl_flags & FL_POSIX)) + continue; + if (posix_locks_conflict(cfl, fl)) + break; } - return (0); /* This should never happen */ + + return (cfl); } /* This function tests for deadlock condition before putting a process to @@ -790,8 +459,7 @@ static int posix_locks_deadlock(struct file_lock *caller_fl, struct file_lock *block_fl) { - struct file_lock *fl; - struct file_lock *bfl; + struct list_head *tmp; void *caller_owner, *blocked_owner; unsigned int caller_pid, blocked_pid; @@ -802,11 +470,14 @@ next_task: if (caller_owner == blocked_owner && caller_pid == blocked_pid) - return (1); - for (fl = file_lock_table; fl != NULL; fl = fl->fl_nextlink) { - if (fl->fl_owner == NULL || fl->fl_nextblock == NULL) + return 1; + list_for_each(tmp, &file_lock_list) { + struct list_head *btmp; + struct file_lock *fl = list_entry(tmp, struct file_lock, fl_link); + if (fl->fl_owner == NULL || list_empty(&fl->fl_block)) continue; - for (bfl = fl->fl_nextblock; bfl != fl; bfl = bfl->fl_nextblock) { + list_for_each(btmp, &fl->fl_block) { + struct file_lock *bfl = list_entry(tmp, struct file_lock, fl_block); if (bfl->fl_owner == blocked_owner && bfl->fl_pid == blocked_pid) { if (fl->fl_owner == caller_owner && @@ -819,14 +490,94 @@ } } } - return (0); + return 0; +} + +int locks_mandatory_locked(struct inode *inode) +{ + fl_owner_t owner = current->files; + struct file_lock *fl; + + /* + * Search the lock list for this inode for any POSIX locks. + */ + lock_kernel(); + for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { + if (!(fl->fl_flags & FL_POSIX)) + continue; + if (fl->fl_owner != owner) + break; + } + unlock_kernel(); + return fl ? -EAGAIN : 0; +} + +int locks_mandatory_area(int read_write, struct inode *inode, + struct file *filp, loff_t offset, + size_t count) +{ + struct file_lock *fl; + struct file_lock *new_fl = locks_alloc_lock(); + int error; + + new_fl->fl_owner = current->files; + new_fl->fl_pid = current->pid; + new_fl->fl_file = filp; + new_fl->fl_flags = FL_POSIX | FL_ACCESS; + new_fl->fl_type = (read_write == FLOCK_VERIFY_WRITE) ? F_WRLCK : F_RDLCK; + new_fl->fl_start = offset; + new_fl->fl_end = offset + count - 1; + + error = 0; + lock_kernel(); + +repeat: + /* Search the lock list for this inode for locks that conflict with + * the proposed read/write. + */ + for (fl = inode->i_flock; ; fl = fl->fl_next) { + error = 0; + if (!fl) + break; + if (!(fl->fl_flags & FL_POSIX)) + continue; + /* Block for writes against a "read" lock, + * and both reads and writes against a "write" lock. + */ + if (posix_locks_conflict(new_fl, fl)) { + error = -EAGAIN; + if (filp && (filp->f_flags & O_NONBLOCK)) + break; + error = -ERESTARTSYS; + if (signal_pending(current)) + break; + error = -EDEADLK; + if (posix_locks_deadlock(new_fl, fl)) + break; + + locks_insert_block(fl, new_fl); + interruptible_sleep_on(&new_fl->fl_wait); + locks_delete_block(new_fl); + + /* + * If we've been sleeping someone might have + * changed the permissions behind our back. + */ + if ((inode->i_mode & (S_ISGID | S_IXGRP)) != S_ISGID) + break; + goto repeat; + } + } + unlock_kernel(); + locks_free_lock(new_fl); + return error; } /* Try to create a FLOCK lock on filp. We always insert new FLOCK locks at * the head of the list, but that's secret knowledge known only to the next * two functions. */ -static int flock_lock_file(struct file *filp, struct file_lock *caller, +static int flock_lock_file(struct file *filp, unsigned int lock_type, unsigned int wait) { struct file_lock *fl; @@ -834,14 +585,14 @@ struct file_lock **before; struct inode * inode = filp->f_dentry->d_inode; int error, change; - int unlock = (caller->fl_type == F_UNLCK); + int unlock = (lock_type == F_UNLCK); /* * If we need a new lock, get it in advance to avoid races. */ if (!unlock) { error = -ENOLCK; - new_fl = locks_alloc_lock(caller); + new_fl = flock_make_lock(filp, lock_type); if (!new_fl) goto out; } @@ -851,8 +602,8 @@ change = 0; before = &inode->i_flock; while (((fl = *before) != NULL) && (fl->fl_flags & FL_FLOCK)) { - if (caller->fl_file == fl->fl_file) { - if (caller->fl_type == fl->fl_type) + if (filp == fl->fl_file) { + if (lock_type == fl->fl_type) goto out; change = 1; break; @@ -888,7 +639,7 @@ goto out; locks_insert_block(fl, new_fl); interruptible_sleep_on(&new_fl->fl_wait); - locks_delete_block(fl, new_fl); + locks_delete_block(new_fl); goto repeat; } locks_insert_lock(&inode->i_flock, new_fl); @@ -928,8 +679,8 @@ * We may need two file_lock structures for this operation, * so we get them in advance to avoid races. */ - new_fl = locks_empty_lock(); - new_fl2 = locks_empty_lock(); + new_fl = locks_alloc_lock(); + new_fl2 = locks_alloc_lock(); error = -ENOLCK; /* "no luck" */ if (!(new_fl && new_fl2)) goto out; @@ -952,7 +703,7 @@ goto out; locks_insert_block(fl, caller); interruptible_sleep_on(&caller->fl_wait); - locks_delete_block(fl, caller); + locks_delete_block(caller); goto repeat; } } @@ -1058,7 +809,7 @@ if (!added) { if (caller->fl_type == F_UNLCK) goto out; - locks_init_lock(new_fl, caller); + locks_copy_lock(new_fl, caller); locks_insert_lock(before, new_fl); new_fl = NULL; } @@ -1068,8 +819,9 @@ * so we have to use the second new lock (in this * case, even F_UNLCK may fail!). */ - left = locks_init_lock(new_fl2, right); + locks_copy_lock(new_fl2, right); locks_insert_lock(before, left); + left = new_fl2; new_fl2 = NULL; } right->fl_start = caller->fl_end + 1; @@ -1081,101 +833,288 @@ } out: /* - * Free any unused locks. (They haven't - * ever been used, so we use kfree().) + * Free any unused locks. */ if (new_fl) - kfree(new_fl); + locks_free_lock(new_fl); if (new_fl2) - kfree(new_fl2); + locks_free_lock(new_fl2); return error; } -/* - * Allocate an empty lock structure. We can use GFP_KERNEL now that - * all allocations are done in advance. +static inline int flock_translate_cmd(int cmd) { + switch (cmd &~ LOCK_NB) { + case LOCK_SH: + return F_RDLCK; + case LOCK_EX: + return F_WRLCK; + case LOCK_UN: + return F_UNLCK; + } + return -EINVAL; +} + +/* flock() system call entry point. Apply a FL_FLOCK style lock to + * an open file descriptor. */ -static struct file_lock *locks_empty_lock(void) +asmlinkage long sys_flock(unsigned int fd, unsigned int cmd) { - /* Okay, let's make a new file_lock structure... */ - return ((struct file_lock *) kmalloc(sizeof(struct file_lock), - GFP_KERNEL)); + struct file *filp; + int error, type; + + error = -EBADF; + filp = fget(fd); + if (!filp) + goto out; + + error = flock_translate_cmd(cmd); + if (error < 0) + goto out_putf; + type = error; + + error = -EBADF; + if ((type != F_UNLCK) && !(filp->f_mode & 3)) + goto out_putf; + + lock_kernel(); + error = flock_lock_file(filp, type, + (cmd & (LOCK_UN | LOCK_NB)) ? 0 : 1); + unlock_kernel(); + +out_putf: + fput(filp); +out: + return error; } -/* - * Initialize a new lock from an existing file_lock structure. +/* Report the first existing lock that would conflict with l. + * This implements the F_GETLK command of fcntl(). */ -static struct file_lock *locks_init_lock(struct file_lock *new, - struct file_lock *fl) +int fcntl_getlk(unsigned int fd, struct flock *l) { - if (new) { - memset(new, 0, sizeof(*new)); - new->fl_owner = fl->fl_owner; - new->fl_pid = fl->fl_pid; - init_waitqueue_head(&new->fl_wait); - new->fl_file = fl->fl_file; - new->fl_flags = fl->fl_flags; - new->fl_type = fl->fl_type; - new->fl_start = fl->fl_start; - new->fl_end = fl->fl_end; - new->fl_notify = fl->fl_notify; - new->fl_insert = fl->fl_insert; - new->fl_remove = fl->fl_remove; - new->fl_u = fl->fl_u; + struct file *filp; + struct file_lock *fl, *file_lock = locks_alloc_lock(); + struct flock flock; + int error; + + error = -EFAULT; + if (copy_from_user(&flock, l, sizeof(flock))) + goto out; + error = -EINVAL; + if ((flock.l_type != F_RDLCK) && (flock.l_type != F_WRLCK)) + goto out; + + error = -EBADF; + filp = fget(fd); + if (!filp) + goto out; + + if (!posix_make_lock(filp, file_lock, &flock)) + goto out_putf; + + if (filp->f_op->lock) { + error = filp->f_op->lock(filp, F_GETLK, file_lock); + if (error < 0) + goto out_putf; + else if (error == LOCK_USE_CLNT) + /* Bypass for NFS with no locking - 2.0.36 compat */ + fl = posix_test_lock(filp, file_lock); + else + fl = (file_lock->fl_type == F_UNLCK ? NULL : file_lock); + } else { + fl = posix_test_lock(filp, file_lock); } - return new; + + flock.l_type = F_UNLCK; + if (fl != NULL) { + flock.l_pid = fl->fl_pid; + flock.l_start = fl->fl_start; + flock.l_len = fl->fl_end == OFFSET_MAX ? 0 : + fl->fl_end - fl->fl_start + 1; + flock.l_whence = 0; + flock.l_type = fl->fl_type; + } + error = -EFAULT; + if (!copy_to_user(l, &flock, sizeof(flock))) + error = 0; + +out_putf: + fput(filp); +out: + locks_free_lock(file_lock); + return error; } -/* Insert file lock fl into an inode's lock list at the position indicated - * by pos. At the same time add the lock to the global file lock list. +/* Apply the lock described by l to an open file descriptor. + * This implements both the F_SETLK and F_SETLKW commands of fcntl(). */ -static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl) +int fcntl_setlk(unsigned int fd, unsigned int cmd, struct flock *l) { - fl->fl_nextlink = file_lock_table; - fl->fl_prevlink = NULL; - if (file_lock_table != NULL) - file_lock_table->fl_prevlink = fl; - file_lock_table = fl; - fl->fl_next = *pos; /* insert into file's list */ - *pos = fl; + struct file *filp; + struct file_lock *file_lock = locks_alloc_lock(); + struct flock flock; + struct inode *inode; + int error; - if (fl->fl_insert) - fl->fl_insert(fl); + /* + * This might block, so we do it before checking the inode. + */ + error = -EFAULT; + if (copy_from_user(&flock, l, sizeof(flock))) + goto out; - return; + /* Get arguments and validate them ... + */ + + error = -EBADF; + filp = fget(fd); + if (!filp) + goto out; + + error = -EINVAL; + inode = filp->f_dentry->d_inode; + + /* Don't allow mandatory locks on files that may be memory mapped + * and shared. + */ + if (IS_MANDLOCK(inode) && + (inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) { + struct vm_area_struct *vma; + struct address_space *mapping = inode->i_mapping; + spin_lock(&mapping->i_shared_lock); + for(vma = mapping->i_mmap;vma;vma = vma->vm_next_share) { + if (!(vma->vm_flags & VM_MAYSHARE)) + continue; + spin_unlock(&mapping->i_shared_lock); + error = -EAGAIN; + goto out_putf; + } + spin_unlock(&mapping->i_shared_lock); + } + + error = -EINVAL; + if (!posix_make_lock(filp, file_lock, &flock)) + goto out_putf; + + error = -EBADF; + switch (flock.l_type) { + case F_RDLCK: + if (!(filp->f_mode & FMODE_READ)) + goto out_putf; + break; + case F_WRLCK: + if (!(filp->f_mode & FMODE_WRITE)) + goto out_putf; + break; + case F_UNLCK: + break; + case F_SHLCK: + case F_EXLCK: +#ifdef __sparc__ +/* warn a bit for now, but don't overdo it */ +{ + static int count = 0; + if (!count) { + count=1; + printk(KERN_WARNING + "fcntl_setlk() called by process %d (%s) with broken flock() emulation\n", + current->pid, current->comm); + } +} + if (!(filp->f_mode & 3)) + goto out_putf; + break; +#endif + default: + error = -EINVAL; + goto out_putf; + } + + if (filp->f_op->lock != NULL) { + error = filp->f_op->lock(filp, cmd, file_lock); + if (error < 0) + goto out_putf; + } + error = posix_lock_file(filp, file_lock, cmd == F_SETLKW); + +out_putf: + fput(filp); +out: + locks_free_lock(file_lock); + return error; } -/* Delete a lock and free it. - * First remove our lock from the active lock lists. Then call - * locks_wake_up_blocks() to wake up processes that are blocked - * waiting for this lock. Finally free the lock structure. +/* + * This function is called when the file is being removed + * from the task's fd array. */ -static void locks_delete_lock(struct file_lock **thisfl_p, unsigned int wait) +void locks_remove_posix(struct file *filp, fl_owner_t owner) { - struct file_lock *thisfl; - struct file_lock *prevfl; - struct file_lock *nextfl; - - thisfl = *thisfl_p; - *thisfl_p = thisfl->fl_next; + struct inode * inode = filp->f_dentry->d_inode; + struct file_lock *fl; + struct file_lock **before; - prevfl = thisfl->fl_prevlink; - nextfl = thisfl->fl_nextlink; + /* + * For POSIX locks we free all locks on this file for the given task. + */ +repeat: + before = &inode->i_flock; + while ((fl = *before) != NULL) { + if ((fl->fl_flags & FL_POSIX) && fl->fl_owner == owner) { + locks_delete_lock(before, 0); + goto repeat; + } + before = &fl->fl_next; + } +} - if (nextfl != NULL) - nextfl->fl_prevlink = prevfl; +/* + * This function is called on the last close of an open file. + */ +void locks_remove_flock(struct file *filp) +{ + struct inode * inode = filp->f_dentry->d_inode; + struct file_lock file_lock, *fl; + struct file_lock **before; - if (prevfl != NULL) - prevfl->fl_nextlink = nextfl; - else - file_lock_table = nextfl; +repeat: + before = &inode->i_flock; + while ((fl = *before) != NULL) { + if ((fl->fl_flags & FL_FLOCK) && fl->fl_file == filp) { + int (*lock)(struct file *, int, struct file_lock *); + lock = NULL; + if (filp->f_op) + lock = filp->f_op->lock; + if (lock) { + file_lock = *fl; + file_lock.fl_type = F_UNLCK; + } + locks_delete_lock(before, 0); + if (lock) { + lock(filp, F_SETLK, &file_lock); + /* List may have changed: */ + goto repeat; + } + continue; + } + before = &fl->fl_next; + } +} - if (thisfl->fl_remove) - thisfl->fl_remove(thisfl); - - locks_wake_up_blocks(thisfl, wait); - locks_free_lock(thisfl); +/* The following two are for the benefit of lockd. + */ +void +posix_block_lock(struct file_lock *blocker, struct file_lock *waiter) +{ + lock_kernel(); + locks_insert_block(blocker, waiter); + unlock_kernel(); +} +void +posix_unblock_lock(struct file_lock *waiter) +{ + locks_delete_block(waiter); return; } @@ -1202,8 +1141,8 @@ kdevname(inode->i_dev), inode->i_ino, (long long)fl->fl_start, (long long)fl->fl_end); sprintf(out, "%08lx %08lx %08lx %08lx %08lx\n", - (long)fl, (long)fl->fl_prevlink, (long)fl->fl_nextlink, - (long)fl->fl_next, (long)fl->fl_nextblock); + (long)fl, (long)fl->fl_link.prev, (long)fl->fl_link.next, + (long)fl->fl_next, (long)fl->fl_block.next); } static void move_lock_status(char **p, off_t* pos, off_t offset) @@ -1230,35 +1169,43 @@ int get_locks_status(char *buffer, char **start, off_t offset, int length) { - struct file_lock *fl; - struct file_lock *bfl; + struct list_head *tmp; char *q = buffer; off_t pos = 0; - int i; + int i = 0; - for (fl = file_lock_table, i = 1; fl != NULL; fl = fl->fl_nextlink, i++) { - lock_get_status(q, fl, i, ""); + lock_kernel(); + list_for_each(tmp, &file_lock_list) { + struct list_head *btmp; + struct file_lock *fl = list_entry(tmp, struct file_lock, fl_link); + lock_get_status(q, fl, ++i, ""); move_lock_status(&q, &pos, offset); if(pos >= offset+length) goto done; - if ((bfl = fl->fl_nextblock) == NULL) - continue; - do { + list_for_each(btmp, &fl->fl_block) { + struct file_lock *bfl = list_entry(btmp, + struct file_lock, fl_block); lock_get_status(q, bfl, i, " ->"); move_lock_status(&q, &pos, offset); if(pos >= offset+length) goto done; - } while ((bfl = bfl->fl_nextblock) != fl); + } } done: + unlock_kernel(); *start = buffer; if(q-buffer < length) return (q-buffer); return length; } - - +void __init filelock_init(void) +{ + filelock_cache = kmem_cache_create("file lock cache", + sizeof(struct file_lock), 0, 0, init_once, NULL); + if (!filelock_cache) + panic("cannot create file lock slab cache"); +} diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/minix/bitmap.c linux.ac/fs/minix/bitmap.c --- linux.t2/fs/minix/bitmap.c Sat Jun 24 13:50:01 2000 +++ linux.ac/fs/minix/bitmap.c Sat Jun 10 22:18:33 2000 @@ -276,16 +276,13 @@ mark_inode_dirty(inode); unlock_super(sb); -printk("m_n_i: allocated inode "); if(DQUOT_ALLOC_INODE(sb, inode)) { -printk("fails quota test\n"); sb->dq_op->drop(inode); inode->i_nlink = 0; iput(inode); *error = -EDQUOT; return NULL; } -printk("is within quota\n"); *error = 0; return inode; diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/minix/fsync.c linux.ac/fs/minix/fsync.c --- linux.t2/fs/minix/fsync.c Sat Jun 24 13:36:10 2000 +++ linux.ac/fs/minix/fsync.c Sat Jun 10 21:51:07 2000 @@ -329,7 +329,7 @@ * NULL */ -int minix_sync_file(struct file * file, struct dentry *dentry) +int minix_sync_file(struct file * file, struct dentry *dentry, int datasync) { struct inode *inode = dentry->d_inode; diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/minix/inode.c linux.ac/fs/minix/inode.c --- linux.t2/fs/minix/inode.c Sat Jun 24 13:36:10 2000 +++ linux.ac/fs/minix/inode.c Sun Jun 11 15:28:50 2000 @@ -28,7 +28,7 @@ #include static void minix_read_inode(struct inode * inode); -static void minix_write_inode(struct inode * inode); +static void minix_write_inode(struct inode * inode, int); static int minix_statfs(struct super_block *sb, struct statfs *buf); static int minix_remount (struct super_block * sb, int * flags, char * data); @@ -1232,7 +1232,7 @@ return V2_minix_update_inode(inode); } -static void minix_write_inode(struct inode * inode) +static void minix_write_inode(struct inode * inode, int unused) { struct buffer_head *bh; diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/namei.c linux.ac/fs/namei.c --- linux.t2/fs/namei.c Sat Jun 24 13:36:10 2000 +++ linux.ac/fs/namei.c Mon Jun 12 12:33:49 2000 @@ -191,21 +191,35 @@ * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist * > 0: (i_writecount) users are writing to the file. * - * WARNING: as soon as we will move get_write_access(), do_mmap() or - * prepare_binfmt() out of the big lock we will need a spinlock protecting - * the checks in all 3. For the time being it is not needed. + * Normally we operate on that counter with atomic_{inc,dec} and it's safe + * except for the cases where we don't hold i_writecount yet. Then we need to + * use {get,deny}_write_access() - these functions check the sign and refuse + * to do the change if sign is wrong. Exclusion between them is provided by + * spinlock (arbitration_lock) and I'll rip the second arsehole to the first + * who will try to move it in struct inode - just leave it here. */ +static spinlock_t arbitration_lock = SPIN_LOCK_UNLOCKED; int get_write_access(struct inode * inode) { - if (atomic_read(&inode->i_writecount) < 0) + spin_lock(&arbitration_lock); + if (atomic_read(&inode->i_writecount) < 0) { + spin_unlock(&arbitration_lock); return -ETXTBSY; + } atomic_inc(&inode->i_writecount); + spin_unlock(&arbitration_lock); return 0; } - -void put_write_access(struct inode * inode) +int deny_write_access(struct file * file) { - atomic_dec(&inode->i_writecount); + spin_lock(&arbitration_lock); + if (atomic_read(&file->f_dentry->d_inode->i_writecount) > 0) { + spin_unlock(&arbitration_lock); + return -ETXTBSY; + } + atomic_dec(&file->f_dentry->d_inode->i_writecount); + spin_unlock(&arbitration_lock); + return 0; } void path_release(struct nameidata *nd) @@ -337,7 +351,34 @@ { return __follow_down(mnt,dentry); } - + +static inline void follow_dotdot(struct nameidata *nd) +{ + while(1) { + struct vfsmount *parent; + struct dentry *dentry; + if (nd->dentry == current->fs->root && + nd->mnt == current->fs->rootmnt) { + break; + } + if (nd->dentry != nd->mnt->mnt_root) { + dentry = dget(nd->dentry->d_parent); + dput(nd->dentry); + nd->dentry = dentry; + break; + } + parent=nd->mnt->mnt_parent; + if (parent == nd->mnt) { + break; + } + mntget(parent); + dentry=dget(nd->mnt->mnt_mountpoint); + dput(nd->dentry); + nd->dentry = dentry; + mntput(nd->mnt); + nd->mnt = parent; + } +} /* * Name resolution. * @@ -403,19 +444,7 @@ case 2: if (this.name[1] != '.') break; - while (1) { - if (nd->dentry == current->fs->root && - nd->mnt == current->fs->rootmnt) - break; - if (nd->dentry != nd->mnt->mnt_root) { - dentry = dget(nd->dentry->d_parent); - dput(nd->dentry); - nd->dentry = dentry; - break; - } - if (!__follow_up(&nd->mnt, &nd->dentry)) - break; - } + follow_dotdot(nd); inode = nd->dentry->d_inode; /* fallthrough */ case 1: @@ -483,19 +512,7 @@ case 2: if (this.name[1] != '.') break; - while (1) { - if (nd->dentry == current->fs->root && - nd->mnt == current->fs->rootmnt) - break; - if (nd->dentry != nd->mnt->mnt_root) { - dentry = dget(nd->dentry->d_parent); - dput(nd->dentry); - nd->dentry = dentry; - break; - } - if (!__follow_up(&nd->mnt, &nd->dentry)) - break; - } + follow_dotdot(nd); inode = nd->dentry->d_inode; /* fallthrough */ case 1: @@ -771,8 +788,6 @@ int error; if (!victim->d_inode || victim->d_parent->d_inode != dir) return -ENOENT; - if (IS_DEADDIR(dir)) - return -ENOENT; error = permission(dir,MAY_WRITE | MAY_EXEC); if (error) return error; @@ -786,8 +801,6 @@ return -ENOTDIR; if (IS_ROOT(victim)) return -EBUSY; - if (d_mountpoint(victim)) - return -EBUSY; } else if (S_ISDIR(victim->d_inode->i_mode)) return -EISDIR; return 0; @@ -917,6 +930,22 @@ error = -EEXIST; if (flag & O_EXCL) goto exit_dput; + if (flag & O_NOFOLLOW) { + error = -ELOOP; + if (dentry->d_inode->i_op && + dentry->d_inode->i_op->follow_link) + goto exit_dput; + if (d_mountpoint(dentry)) + goto exit_dput; + goto got_it; + } + /* Check mountpoints - it may be a binding on file. */ + while (d_mountpoint(dentry) && + __follow_down(&nd->mnt, &dentry)) + ; + error = -ENOENT; + if (!dentry->d_inode) + goto exit_dput; if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link) { /* @@ -930,6 +959,7 @@ return error; dentry = nd->dentry; } else { + got_it: dput(nd->dentry); nd->dentry = dentry; } @@ -962,6 +992,10 @@ if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE)) goto exit; + error = -EOPNOTSUPP; + if (S_ISSOCK(inode->i_mode)) + goto exit; + error = permission(inode,acc_mode); if (error) goto exit; @@ -1213,9 +1247,15 @@ double_down(&dir->i_zombie, &dentry->d_inode->i_zombie); d_unhash(dentry); - error = dir->i_op->rmdir(dir, dentry); - if (!error) - dentry->d_inode->i_flags |= S_DEAD; + if (IS_DEADDIR(dir)) + error = -ENOENT; + else if (d_mountpoint(dentry)) + error = -EBUSY; + else { + error = dir->i_op->rmdir(dir, dentry); + if (!error) + dentry->d_inode->i_flags |= S_DEAD; + } double_up(&dir->i_zombie, &dentry->d_inode->i_zombie); if (!error) d_delete(dentry); @@ -1275,9 +1315,13 @@ error = -EPERM; if (dir->i_op && dir->i_op->unlink) { DQUOT_INIT(dir); - error = dir->i_op->unlink(dir, dentry); - if (!error) - d_delete(dentry); + if (d_mountpoint(dentry)) + error = -EBUSY; + else { + error = dir->i_op->unlink(dir, dentry); + if (!error) + d_delete(dentry); + } } } up(&dir->i_zombie); @@ -1555,7 +1599,12 @@ } else double_down(&old_dir->i_zombie, &new_dir->i_zombie); - error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); + if (IS_DEADDIR(old_dir)||IS_DEADDIR(new_dir)) + error = -ENOENT; + else if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) + error = -EBUSY; + else + error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); if (target) { if (!error) target->i_flags |= S_DEAD; @@ -1603,7 +1652,10 @@ DQUOT_INIT(old_dir); DQUOT_INIT(new_dir); double_down(&old_dir->i_zombie, &new_dir->i_zombie); - error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); + if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) + error = -EBUSY; + else + error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); double_up(&old_dir->i_zombie, &new_dir->i_zombie); if (error) return error; diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/ncpfs/dir.c linux.ac/fs/ncpfs/dir.c --- linux.t2/fs/ncpfs/dir.c Sat Jun 24 13:36:11 2000 +++ linux.ac/fs/ncpfs/dir.c Tue Jun 6 12:29:05 2000 @@ -973,7 +973,7 @@ /* * Check whether to close the file ... */ - if (inode && NCP_FINFO(inode)->opened) { + if (inode) { PPRINTK("ncp_unlink: closing file\n"); ncp_make_closed(inode); } @@ -982,7 +982,7 @@ #ifdef CONFIG_NCPFS_STRONG /* 9C is Invalid path.. It should be 8F, 90 - read only, but it is not :-( */ - if (error == 0x9C && server->m.flags & NCP_MOUNT_STRONG) { /* R/O */ + if ((error == 0x9C || error == 0x90) && server->m.flags & NCP_MOUNT_STRONG) { /* R/O */ error = ncp_force_unlink(dir, dentry); } #endif @@ -1051,7 +1051,7 @@ error = ncp_ren_or_mov_file_or_subdir(server, old_dir, __old_name, new_dir, __new_name); #ifdef CONFIG_NCPFS_STRONG - if ((error == 0x90 || error == -EACCES) && + if ((error == 0x90 || error == 0x8B || error == -EACCES) && server->m.flags & NCP_MOUNT_STRONG) { /* RO */ error = ncp_force_rename(old_dir, old_dentry, __old_name, new_dir, new_dentry, __new_name); diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/ncpfs/file.c linux.ac/fs/ncpfs/file.c --- linux.t2/fs/ncpfs/file.c Sat Jun 24 13:36:11 2000 +++ linux.ac/fs/ncpfs/file.c Sat Jun 10 21:51:07 2000 @@ -26,7 +26,7 @@ return a < b ? a : b; } -static int ncp_fsync(struct file *file, struct dentry *dentry) +static int ncp_fsync(struct file *file, struct dentry *dentry, int datasync) { return 0; } @@ -46,12 +46,12 @@ } DPRINTK("ncp_make_open: opened=%d, volume # %u, dir entry # %u\n", - NCP_FINFO(inode)->opened, + atomic_read(&NCP_FINFO(inode)->opened), NCP_FINFO(inode)->volNumber, NCP_FINFO(inode)->dirEntNum); error = -EACCES; - lock_super(inode->i_sb); - if (!NCP_FINFO(inode)->opened) { + down(&NCP_FINFO(inode)->open_sem); + if (!atomic_read(&NCP_FINFO(inode)->opened)) { struct ncp_entry_info finfo; int result; @@ -88,15 +88,18 @@ */ update: ncp_update_inode(inode, &finfo); + atomic_set(&NCP_FINFO(inode)->opened, 1); } access = NCP_FINFO(inode)->access; PPRINTK("ncp_make_open: file open, access=%x\n", access); - if (access == right || access == O_RDWR) + if (access == right || access == O_RDWR) { + atomic_inc(&NCP_FINFO(inode)->opened); error = 0; + } out_unlock: - unlock_super(inode->i_sb); + up(&NCP_FINFO(inode)->open_sem); out: return error; } @@ -153,7 +156,7 @@ freelen = ncp_read_bounce_size(bufsize); freepage = kmalloc(freelen, GFP_NFS); if (!freepage) - goto out; + goto outrel; error = 0; /* First read in as much as possible for each bufsize. */ while (already_read < count) { @@ -166,9 +169,8 @@ pos, to_read, buf, &read_this_time, freepage, freelen); if (error) { - kfree(freepage); - error = -EIO; /* This is not exact, i know.. */ - goto out; + error = -EIO; /* NW errno -> Linux errno */ + break; } pos += read_this_time; buf += read_this_time; @@ -188,6 +190,8 @@ DPRINTK("ncp_file_read: exit %s/%s\n", dentry->d_parent->d_name.name, dentry->d_name.name); +outrel: + ncp_inode_close(inode); out: return already_read ? already_read : error; } @@ -236,8 +240,10 @@ already_written = 0; bouncebuffer = kmalloc(bufsize, GFP_NFS); - if (!bouncebuffer) - return -EIO; /* -ENOMEM */ + if (!bouncebuffer) { + errno = -EIO; /* -ENOMEM */ + goto outrel; + } while (already_written < count) { int written_this_time; size_t to_write = min(bufsize - (pos % bufsize), @@ -271,15 +277,15 @@ } DPRINTK("ncp_file_write: exit %s/%s\n", dentry->d_parent->d_name.name, dentry->d_name.name); +outrel: + ncp_inode_close(inode); out: return already_written ? already_written : errno; } static int ncp_release(struct inode *inode, struct file *file) { - if (NCP_FINFO(inode)->opened) { - if (ncp_make_closed(inode)) { - DPRINTK("ncp_release: failed to close\n"); - } + if (ncp_make_closed(inode)) { + DPRINTK("ncp_release: failed to close\n"); } return 0; } diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/ncpfs/inode.c linux.ac/fs/ncpfs/inode.c --- linux.t2/fs/ncpfs/inode.c Sat Jun 24 13:50:01 2000 +++ linux.ac/fs/ncpfs/inode.c Sat Jun 10 22:18:33 2000 @@ -61,7 +61,6 @@ #ifdef CONFIG_NCPFS_STRONG NCP_FINFO(inode)->nwattr = nwinfo->i.attributes; #endif - NCP_FINFO(inode)->opened = nwinfo->opened; NCP_FINFO(inode)->access = nwinfo->access; NCP_FINFO(inode)->server_file_handle = nwinfo->server_file_handle; memcpy(NCP_FINFO(inode)->file_handle, nwinfo->file_handle, @@ -76,7 +75,7 @@ struct nw_info_struct *nwi = &nwinfo->i; struct ncp_server *server = NCP_SERVER(inode); - if (!NCP_FINFO(inode)->opened) { + if (!atomic_read(&NCP_FINFO(inode)->opened)) { #ifdef CONFIG_NCPFS_STRONG NCP_FINFO(inode)->nwattr = nwi->attributes; #endif @@ -216,6 +215,9 @@ inode = get_empty_inode(); if (inode) { + init_MUTEX(&NCP_FINFO(inode)->open_sem); + atomic_set(&NCP_FINFO(inode)->opened, info->opened); + inode->i_sb = sb; inode->i_dev = sb->s_dev; inode->i_ino = info->ino; @@ -245,7 +247,7 @@ DDPRINTK("ncp_delete_inode: put directory %ld\n", inode->i_ino); } - if (NCP_FINFO(inode)->opened && ncp_make_closed(inode) != 0) { + if (ncp_make_closed(inode) != 0) { /* We can't do anything but complain. */ printk(KERN_ERR "ncp_delete_inode: could not close\n"); } @@ -318,7 +320,6 @@ sb->s_blocksize = 1024; /* Eh... Is this correct? */ sb->s_blocksize_bits = 10; sb->s_magic = NCP_SUPER_MAGIC; - sb->s_dev = dev; sb->s_op = &ncp_sops; server = NCP_SBP(sb); @@ -676,6 +677,7 @@ /* According to ndir, the changes only take effect after closing the file */ + ncp_inode_close(inode); result = ncp_make_closed(inode); if (!result) vmtruncate(inode, attr->ia_size); diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/ncpfs/ioctl.c linux.ac/fs/ncpfs/ioctl.c --- linux.t2/fs/ncpfs/ioctl.c Sat Jun 24 13:36:11 2000 +++ linux.ac/fs/ncpfs/ioctl.c Sun Jun 4 21:48:39 2000 @@ -335,18 +335,12 @@ { return result; } + result = -EIO; if (!ncp_conn_valid(server)) - { - return -EIO; - } + goto outrel; + result = -EISDIR; if (!S_ISREG(inode->i_mode)) - { - return -EISDIR; - } - if (!NCP_FINFO(inode)->opened) - { - return -EBADFD; - } + goto outrel; if (rqdata.cmd == NCP_LOCK_CLEAR) { result = ncp_ClearPhysicalRecord(NCP_SERVER(inode), @@ -373,6 +367,8 @@ rqdata.timeout); if (result > 0) result = -EAGAIN; } +outrel: + ncp_inode_close(inode); return result; } #endif /* CONFIG_NCPFS_IOCTL_LOCKING */ diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/ncpfs/mmap.c linux.ac/fs/ncpfs/mmap.c --- linux.t2/fs/ncpfs/mmap.c Sat Jun 24 13:36:11 2000 +++ linux.ac/fs/ncpfs/mmap.c Sun Jun 4 21:48:39 2000 @@ -82,6 +82,7 @@ break; } } + ncp_inode_close(inode); } diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/ncpfs/ncplib_kernel.c linux.ac/fs/ncpfs/ncplib_kernel.c --- linux.t2/fs/ncpfs/ncplib_kernel.c Sat Jun 24 13:36:11 2000 +++ linux.ac/fs/ncpfs/ncplib_kernel.c Sun Jun 4 21:48:39 2000 @@ -221,20 +221,23 @@ return result; } -/* - * Called with the superblock locked. - */ int ncp_make_closed(struct inode *inode) { int err; - NCP_FINFO(inode)->opened = 0; - err = ncp_close_file(NCP_SERVER(inode), NCP_FINFO(inode)->file_handle); - if (!err) - PPRINTK("ncp_make_closed: volnum=%d, dirent=%u, error=%d\n", - NCP_FINFO(inode)->volNumber, - NCP_FINFO(inode)->dirEntNum, err); + err = 0; + down(&NCP_FINFO(inode)->open_sem); + if (atomic_read(&NCP_FINFO(inode)->opened) == 1) { + atomic_set(&NCP_FINFO(inode)->opened, 0); + err = ncp_close_file(NCP_SERVER(inode), NCP_FINFO(inode)->file_handle); + + if (!err) + PPRINTK("ncp_make_closed: volnum=%d, dirent=%u, error=%d\n", + NCP_FINFO(inode)->volNumber, + NCP_FINFO(inode)->dirEntNum, err); + } + up(&NCP_FINFO(inode)->open_sem); return err; } @@ -613,7 +616,8 @@ if ((result = ncp_request(server, 87)) != 0) goto out; - target->opened = 1; + if (!(create_attributes & aDIR)) + target->opened = 1; target->server_file_handle = ncp_reply_dword(server, 0); target->open_create_action = ncp_reply_byte(server, 4); diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/ncpfs/ncplib_kernel.h linux.ac/fs/ncpfs/ncplib_kernel.h --- linux.t2/fs/ncpfs/ncplib_kernel.h Sat Jun 24 13:36:11 2000 +++ linux.ac/fs/ncpfs/ncplib_kernel.h Sat Jun 24 14:22:33 2000 @@ -57,6 +57,10 @@ int ncp_write_kernel(struct ncp_server *, const char *, __u32, __u16, const char *, int *); +static inline void ncp_inode_close(struct inode *inode) { + atomic_dec(&NCP_FINFO(inode)->opened); +} + int ncp_obtain_info(struct ncp_server *server, struct inode *, char *, struct nw_info_struct *target); int ncp_lookup_volume(struct ncp_server *, char *, struct nw_info_struct *); diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/ncpfs/symlink.c linux.ac/fs/ncpfs/symlink.c --- linux.t2/fs/ncpfs/symlink.c Sat Jun 24 13:36:11 2000 +++ linux.ac/fs/ncpfs/symlink.c Tue Jun 6 12:29:05 2000 @@ -50,10 +50,6 @@ char *link; char *buf = (char*)kmap(page); - error = -EIO; - if (ncp_make_open(inode,O_RDONLY)) - goto fail; - error = -ENOMEM; for (cnt = 0; (link=(char *)kmalloc(NCP_MAX_SYMLINK_SIZE, GFP_NFS))==NULL; cnt++) { if (cnt > 10) @@ -61,20 +57,22 @@ schedule(); } + if (ncp_make_open(inode,O_RDONLY)) + goto failEIO; + error=ncp_read_kernel(NCP_SERVER(inode),NCP_FINFO(inode)->file_handle, 0,NCP_MAX_SYMLINK_SIZE,link,&length); - if (error) { - kfree(link); - goto fail; - } + ncp_inode_close(inode); + /* Close file handle if no other users... */ + ncp_make_closed(inode); + if (error) + goto failEIO; + if (lengthd_inode; + if (ncp_make_open(inode, O_WRONLY)) + goto failfree; + ((__u32 *)link)[0]=NCP_SYMLINK_MAGIC0; ((__u32 *)link)[1]=NCP_SYMLINK_MAGIC1; @@ -134,19 +137,26 @@ symlink can point out of ncp filesystem */ length += 1; err = ncp_io2vol(NCP_SERVER(inode),link+8,&length,symname,length-1,0); - if (err) { - kfree(link); - return err; - } + if (err) + goto fail; if(ncp_write_kernel(NCP_SERVER(inode), NCP_FINFO(inode)->file_handle, 0, length+8, link, &i) || i!=length+8) { - kfree(link); - return -EIO; + err = -EIO; + goto fail; } + ncp_inode_close(inode); + ncp_make_closed(inode); kfree(link); return 0; + +fail: + ncp_inode_close(inode); + ncp_make_closed(inode); +failfree: + kfree(link); + return err; } #endif diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/nfs/file.c linux.ac/fs/nfs/file.c --- linux.t2/fs/nfs/file.c Sat Jun 24 13:50:02 2000 +++ linux.ac/fs/nfs/file.c Sat Jun 17 16:22:37 2000 @@ -38,7 +38,7 @@ static ssize_t nfs_file_read(struct file *, char *, size_t, loff_t *); static ssize_t nfs_file_write(struct file *, const char *, size_t, loff_t *); static int nfs_file_flush(struct file *); -static int nfs_fsync(struct file *, struct dentry *dentry); +static int nfs_fsync(struct file *, struct dentry *dentry, int); struct file_operations nfs_file_operations = { read: nfs_file_read, @@ -123,7 +123,7 @@ * whether any write errors occurred for this process. */ static int -nfs_fsync(struct file *file, struct dentry *dentry) +nfs_fsync(struct file *file, struct dentry *dentry, int datasync) { struct inode *inode = dentry->d_inode; int status; diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/nfsd/nfsctl.c linux.ac/fs/nfsd/nfsctl.c --- linux.t2/fs/nfsd/nfsctl.c Sat Jun 24 13:50:02 2000 +++ linux.ac/fs/nfsd/nfsctl.c Tue May 9 23:13:08 2000 @@ -218,7 +218,7 @@ }; #define CMD_MAX (sizeof(sizes)/sizeof(sizes[0])-1) -long +int asmlinkage handle_sys_nfsservctl(int cmd, void *opaque_argp, void *opaque_resp) { struct nfsctl_arg * argp = opaque_argp; diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/nfsd/nfssvc.c linux.ac/fs/nfsd/nfssvc.c --- linux.t2/fs/nfsd/nfssvc.c Sat Jun 24 13:36:11 2000 +++ linux.ac/fs/nfsd/nfssvc.c Fri Jun 9 15:51:51 2000 @@ -83,7 +83,10 @@ if (error < 0) goto failure; -#if 0 /* Don't even pretend that TCP works. It doesn't. */ +#if CONFIG_NFSD_TCP + /* This is developer-only at the moment, + * there are untracked bugs as of 2.4.0-test1-ac11 + */ error = svc_makesock(nfsd_serv, IPPROTO_TCP, port); if (error < 0) goto failure; diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/nfsd/vfs.c linux.ac/fs/nfsd/vfs.c --- linux.t2/fs/nfsd/vfs.c Sat Jun 24 13:50:02 2000 +++ linux.ac/fs/nfsd/vfs.c Tue Jun 13 23:35:50 2000 @@ -314,7 +314,7 @@ if (err) goto out_nfserr; if (EX_ISSYNC(fhp->fh_export)) - write_inode_now(inode); + write_inode_now(inode, 0); err = 0; /* Don't unlock inode; the nfssvc_release functions are supposed @@ -512,7 +512,7 @@ { dprintk("nfsd: sync file %s\n", filp->f_dentry->d_name.name); down(&filp->f_dentry->d_inode->i_sem); - filp->f_op->fsync(filp, filp->f_dentry); + filp->f_op->fsync(filp, filp->f_dentry,0); up(&filp->f_dentry->d_inode->i_sem); } @@ -520,10 +520,10 @@ nfsd_sync_dir(struct dentry *dp) { struct inode *inode = dp->d_inode; - int (*fsync) (struct file *, struct dentry *); + int (*fsync) (struct file *, struct dentry *, int); if (inode->i_fop && (fsync = inode->i_fop->fsync)) { - fsync(NULL, dp); + fsync(NULL, dp, 0); } } @@ -891,7 +891,7 @@ if (EX_ISSYNC(fhp->fh_export)) { nfsd_sync_dir(dentry); - write_inode_now(dchild->d_inode); + write_inode_now(dchild->d_inode, 0); } @@ -1118,7 +1118,7 @@ | S_IFLNK; err = notify_change(dnew, iap); if (!err && EX_ISSYNC(fhp->fh_export)) - write_inode_now(dentry->d_inode); + write_inode_now(dentry->d_inode, 0); } } } else @@ -1178,7 +1178,7 @@ if (!err) { if (EX_ISSYNC(ffhp->fh_export)) { nfsd_sync_dir(ddir); - write_inode_now(dest); + write_inode_now(dest, 0); } } else { if (err == -EXDEV && rqstp->rq_vers == 2) diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/ntfs/Makefile linux.ac/fs/ntfs/Makefile --- linux.t2/fs/ntfs/Makefile Sat Jun 24 13:36:12 2000 +++ linux.ac/fs/ntfs/Makefile Thu Jun 8 16:34:16 2000 @@ -3,7 +3,7 @@ O_TARGET := ntfs.o O_OBJS := fs.o sysctl.o support.o util.o inode.o dir.o super.o attr.o M_OBJS := $(O_TARGET) -EXTRA_CFLAGS = -DNTFS_IN_LINUX_KERNEL -DNTFS_VERSION=\"000502\" +EXTRA_CFLAGS = -DNTFS_IN_LINUX_KERNEL -DNTFS_VERSION=\"000607\" include $(TOPDIR)/Rules.make diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/ntfs/fs.c linux.ac/fs/ntfs/fs.c --- linux.t2/fs/ntfs/fs.c Sat Jun 24 13:50:02 2000 +++ linux.ac/fs/ntfs/fs.c Sun Jun 11 15:28:11 2000 @@ -80,7 +80,7 @@ io.param=buf; io.size=count; error=ntfs_read_attr(ino,ino->vol->at_data,NULL,*off,&io); - if(error)return -error; + if(error && !io.size)return -error; *off+=io.size; return io.size; @@ -707,7 +707,7 @@ #ifdef CONFIG_NTFS_RW static void -ntfs_write_inode (struct inode *ino) +ntfs_write_inode (struct inode *ino, int unused) { ntfs_debug (DEBUG_LINUX, "ntfs:write inode %x\n", ino->i_ino); ntfs_update_inode (NTFS_LINO2NINO (ino)); diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/pipe.c linux.ac/fs/pipe.c --- linux.t2/fs/pipe.c Sat Jun 24 13:36:10 2000 +++ linux.ac/fs/pipe.c Tue Jun 13 23:34:43 2000 @@ -607,6 +607,8 @@ root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR; root->i_uid = root->i_gid = 0; root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME; + root->i_sb = sb; + root->i_dev = sb->s_dev; sb->s_blocksize = 1024; sb->s_blocksize_bits = 10; sb->s_magic = PIPEFS_MAGIC; diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/proc/generic.c linux.ac/fs/proc/generic.c --- linux.t2/fs/proc/generic.c Sat Jun 24 13:50:02 2000 +++ linux.ac/fs/proc/generic.c Tue Jun 13 23:35:50 2000 @@ -44,7 +44,7 @@ #endif /* 4K page size but our output routines use some slack for overruns */ -#define PROC_BLOCK_SIZE (PAGE_SIZE - 1024) +#define PROC_BLOCK_SIZE (3*1024) static ssize_t proc_file_read(struct file * file, char * buf, size_t nbytes, loff_t *ppos) diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/proc/proc_misc.c linux.ac/fs/proc/proc_misc.c --- linux.t2/fs/proc/proc_misc.c Sat Jun 24 13:36:10 2000 +++ linux.ac/fs/proc/proc_misc.c Mon Jun 5 20:02:20 2000 @@ -325,14 +325,14 @@ for (major = 0; major < DK_MAX_MAJOR; major++) { for (disk = 0; disk < DK_MAX_DISK; disk++) { - int active = kstat.dk_drive_rio[major][disk] + + int active = kstat.dk_drive[major][disk] + kstat.dk_drive_rblk[major][disk] + - kstat.dk_drive_wio[major][disk] + kstat.dk_drive_wblk[major][disk]; if (active) len += sprintf(page + len, - "(%u,%u):(%u,%u,%u,%u) ", + "(%u,%u):(%u,%u,%u,%u,%u) ", major, disk, + kstat.dk_drive[major][disk], kstat.dk_drive_rio[major][disk], kstat.dk_drive_rblk[major][disk], kstat.dk_drive_wio[major][disk], diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/qnx4/fsync.c linux.ac/fs/qnx4/fsync.c --- linux.t2/fs/qnx4/fsync.c Sat Jun 24 13:36:11 2000 +++ linux.ac/fs/qnx4/fsync.c Sat Jun 10 21:51:08 2000 @@ -147,7 +147,7 @@ return err; } -int qnx4_sync_file(struct file *file, struct dentry *dentry) +int qnx4_sync_file(struct file *file, struct dentry *dentry, int datasync) { struct inode *inode = dentry->d_inode; int wait, err = 0; diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/qnx4/inode.c linux.ac/fs/qnx4/inode.c --- linux.t2/fs/qnx4/inode.c Sat Jun 24 13:36:11 2000 +++ linux.ac/fs/qnx4/inode.c Mon Jun 12 00:08:08 2000 @@ -73,7 +73,7 @@ sb->s_dirt = 0; } -static void qnx4_write_inode(struct inode *inode) +static void qnx4_write_inode(struct inode *inode, int unused) { struct qnx4_inode_entry *raw_inode; int block, ino; @@ -340,7 +340,6 @@ set_blocksize(dev, QNX4_BLOCK_SIZE); s->s_blocksize = QNX4_BLOCK_SIZE; s->s_blocksize_bits = QNX4_BLOCK_SIZE_BITS; - s->s_dev = dev; /* Check the boot signature. Since the qnx4 code is dangerous, we should leave as quickly as possible diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/ramfs/inode.c linux.ac/fs/ramfs/inode.c --- linux.t2/fs/ramfs/inode.c Sat Jun 24 13:50:02 2000 +++ linux.ac/fs/ramfs/inode.c Sat Jun 17 16:46:50 2000 @@ -22,13 +22,14 @@ * caches is sufficient. */ + #include #include #include #include #include #include - +#include #include /* some random number */ @@ -65,7 +66,8 @@ static int ramfs_readpage(struct file *file, struct page * page) { if (!Page_Uptodate(page)) { - memset((void *) page_address(page), 0, PAGE_CACHE_SIZE); + memset((void *) kmap(page), 0, PAGE_CACHE_SIZE); + kunmap(page); SetPageUptodate(page); } UnlockPage(page); @@ -86,7 +88,6 @@ { void *addr; - addr = (void *) kmap(page); if (!Page_Uptodate(page)) { memset(addr, 0, PAGE_CACHE_SIZE); SetPageUptodate(page); diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/readdir.c linux.ac/fs/readdir.c --- linux.t2/fs/readdir.c Sat Jun 24 13:50:02 2000 +++ linux.ac/fs/readdir.c Sun May 21 18:05:01 2000 @@ -90,8 +90,6 @@ #define NAME_OFFSET(de) ((int) ((de)->d_name - (char *) (de))) #define ROUND_UP(x) (((x)+sizeof(long)-1) & ~(sizeof(long)-1)) -#ifndef __ia64__ - struct old_linux_dirent { unsigned long d_ino; unsigned long d_offset; @@ -145,8 +143,6 @@ out: return error; } - -#endif /* !__ia64__ */ /* * New, all-improved, singing, dancing, iBCS2-compliant getdents() diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/select.c linux.ac/fs/select.c --- linux.t2/fs/select.c Sat Jun 24 13:36:10 2000 +++ linux.ac/fs/select.c Tue Jun 13 18:05:37 2000 @@ -18,6 +18,7 @@ #include #include #include +#include #include @@ -52,6 +53,7 @@ if(out==NULL) return NULL; out->nr = 0; + out->err = 0; out->entry = (struct poll_table_entry *)(out + 1); out->next = NULL; nfds -=__MAX_POLL_TABLE_ENTRIES; @@ -97,19 +99,36 @@ void __pollwait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p) { + poll_table* walk = p; for (;;) { - if (p->nr < __MAX_POLL_TABLE_ENTRIES) { + if (walk->nr < __MAX_POLL_TABLE_ENTRIES) { struct poll_table_entry * entry; - entry = p->entry + p->nr; +ok_table: + entry = walk->entry + walk->nr; get_file(filp); entry->filp = filp; entry->wait_address = wait_address; init_waitqueue_entry(&entry->wait, current); add_wait_queue(wait_address,&entry->wait); - p->nr++; + walk->nr++; return; } - p = p->next; + if (walk->next == NULL) { + poll_table *tmp; + current->state=TASK_RUNNING; + tmp = (poll_table *) __get_free_page(GFP_KERNEL); + if (!tmp) { + p->err=-ENOMEM; + return; + } + tmp->nr = 0; + tmp->entry = (struct poll_table_entry *)(tmp + 1); + tmp->next = NULL; + walk->next = tmp; + walk = tmp; + goto ok_table; + } + walk = walk->next; } } @@ -226,11 +245,16 @@ wait = NULL; } } - wait = NULL; if (retval || !__timeout || signal_pending(current)) break; + if(orig_wait->err) { + retval=orig_wait->err; + goto out; + } + wait = NULL; __timeout = schedule_timeout(__timeout); } +out: current->state = TASK_RUNNING; free_wait(orig_wait); @@ -294,7 +318,10 @@ */ ret = -ENOMEM; size = FDS_BYTES(n); - bits = kmalloc(6 * size, GFP_KERNEL); + if(size>8000) + bits = vmalloc(6 * size); + else + bits = kmalloc(6 * size, GFP_KERNEL); if (!bits) goto out_nofds; fds.in = (unsigned long *) bits; @@ -339,7 +366,10 @@ set_fd_set(n, exp, fds.res_ex); out: - kfree(bits); + if(size>8000) + vfree(bits); + else + kfree(bits); out_nofds: return ret; } @@ -382,6 +412,7 @@ struct pollfd *fds[], poll_table *wait, long timeout) { int count = 0; + poll_table* orig_wait = wait; for (;;) { unsigned int i; @@ -391,11 +422,16 @@ do_pollfd(POLLFD_PER_PAGE, fds[i], &wait, &count); if (nleft) do_pollfd(nleft, fds[nchunks], &wait, &count); - wait = NULL; if (count || !timeout || signal_pending(current)) break; + if(orig_wait->err) { + count=orig_wait->err; + goto out; + } + wait=NULL; timeout = schedule_timeout(timeout); } +out: current->state = TASK_RUNNING; return count; } diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/smbfs/file.c linux.ac/fs/smbfs/file.c --- linux.t2/fs/smbfs/file.c Sat Jun 24 13:36:11 2000 +++ linux.ac/fs/smbfs/file.c Sat Jun 10 21:51:08 2000 @@ -27,7 +27,7 @@ /* #define pr_debug printk */ static int -smb_fsync(struct file *file, struct dentry * dentry) +smb_fsync(struct file *file, struct dentry * dentry, int datasync) { #ifdef SMBFS_DEBUG_VERBOSE printk("smb_fsync: sync file %s/%s\n", diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/super.c linux.ac/fs/super.c --- linux.t2/fs/super.c Sat Jun 24 13:36:10 2000 +++ linux.ac/fs/super.c Tue Jun 13 23:34:43 2000 @@ -281,14 +281,28 @@ static LIST_HEAD(vfsmntlist); -static struct vfsmount *add_vfsmnt(struct super_block *sb, - struct dentry *mountpoint, +/** + * add_vfsmnt - add a new mount node + * @nd: location of mountpoint or %NULL if we want a root node + * @root: root of (sub)tree to be mounted + * @dev_name: device name to show in /proc/mounts + * + * This is VFS idea of mount. New node is allocated, bound to a tree + * we are mounting and optionally (OK, usually) registered as mounted + * on a given mountpoint. Returns a pointer to new node or %NULL in + * case of failure. + * + * Potential reason for failure (aside of trivial lack of memory) is a + * deleted mountpoint. Caller must hold ->i_zombie on mountpoint + * dentry (if any). + */ + +static struct vfsmount *add_vfsmnt(struct nameidata *nd, struct dentry *root, - struct vfsmount *parent, - const char *dev_name, - const char *dir_name) + const char *dev_name) { struct vfsmount *mnt; + struct super_block *sb = root->d_inode->i_sb; char *name; mnt = kmalloc(sizeof(struct vfsmount), GFP_KERNEL); @@ -296,13 +310,7 @@ goto out; memset(mnt, 0, sizeof(struct vfsmount)); - atomic_set(&mnt->mnt_count,1); - mnt->mnt_sb = sb; - mnt->mnt_mountpoint = dget(mountpoint); - mnt->mnt_root = dget(root); - mnt->mnt_parent = parent ? mntget(parent) : mnt; - - /* N.B. Is it really OK to have a vfsmount without names? */ + /* It may be NULL, but who cares? */ if (dev_name) { name = kmalloc(strlen(dev_name)+1, GFP_KERNEL); if (name) { @@ -310,51 +318,53 @@ mnt->mnt_devname = name; } } - name = kmalloc(strlen(dir_name)+1, GFP_KERNEL); - if (name) { - strcpy(name, dir_name); - mnt->mnt_dirname = name; - } mnt->mnt_owner = current->uid; + atomic_set(&mnt->mnt_count,1); + mnt->mnt_sb = sb; - if (parent) - list_add(&mnt->mnt_child, &parent->mnt_mounts); - else + if (nd && !IS_ROOT(nd->dentry) && d_unhashed(nd->dentry)) + goto fail; + mnt->mnt_root = dget(root); + mnt->mnt_mountpoint = nd ? dget(nd->dentry) : dget(root); + mnt->mnt_parent = nd ? mntget(nd->mnt) : mnt; + + if (nd) { + list_add(&mnt->mnt_child, &nd->mnt->mnt_mounts); + list_add(&mnt->mnt_clash, &nd->dentry->d_vfsmnt); + } else { INIT_LIST_HEAD(&mnt->mnt_child); + INIT_LIST_HEAD(&mnt->mnt_clash); + } INIT_LIST_HEAD(&mnt->mnt_mounts); list_add(&mnt->mnt_instances, &sb->s_mounts); - list_add(&mnt->mnt_clash, &mountpoint->d_vfsmnt); list_add(&mnt->mnt_list, vfsmntlist.prev); out: return mnt; +fail: + kfree(mnt->mnt_devname); + kfree(mnt); + return NULL; } static void move_vfsmnt(struct vfsmount *mnt, struct dentry *mountpoint, struct vfsmount *parent, - const char *dev_name, - const char *dir_name) + const char *dev_name) { - struct dentry *old_mountpoint = mnt->mnt_mountpoint; - struct vfsmount *old_parent = mnt->mnt_parent; - char *new_devname = NULL, *new_dirname = NULL; + struct dentry *old_mountpoint; + struct vfsmount *old_parent; + char *new_devname = NULL; if (dev_name) { new_devname = kmalloc(strlen(dev_name)+1, GFP_KERNEL); if (new_devname) strcpy(new_devname, dev_name); } - if (dir_name) { - new_dirname = kmalloc(strlen(dir_name)+1, GFP_KERNEL); - if (new_dirname) - strcpy(new_dirname, dir_name); - } + + old_mountpoint = mnt->mnt_mountpoint; + old_parent = mnt->mnt_parent; /* flip names */ - if (new_dirname) { - kfree(mnt->mnt_dirname); - mnt->mnt_dirname = new_dirname; - } if (new_devname) { kfree(mnt->mnt_devname); mnt->mnt_devname = new_devname; @@ -365,11 +375,13 @@ mnt->mnt_parent = parent ? mntget(parent) : mnt; list_del(&mnt->mnt_clash); list_del(&mnt->mnt_child); - list_add(&mnt->mnt_clash, &mountpoint->d_vfsmnt); - if (parent) + if (parent) { list_add(&mnt->mnt_child, &parent->mnt_mounts); - else + list_add(&mnt->mnt_clash, &mountpoint->d_vfsmnt); + } else { INIT_LIST_HEAD(&mnt->mnt_child); + INIT_LIST_HEAD(&mnt->mnt_clash); + } /* put the old stuff */ dput(old_mountpoint); @@ -391,7 +403,6 @@ dput(mnt->mnt_mountpoint); dput(mnt->mnt_root); kfree(mnt->mnt_devname); - kfree(mnt->mnt_dirname); kfree(mnt); } @@ -738,10 +749,6 @@ /* Done with lookups, semaphore down */ down(&mount_sem); dev = to_kdev_t(bdev->bd_dev); - check_disk_change(dev); - error = -EACCES; - if (!(flags & MS_RDONLY) && is_read_only(dev)) - goto out; sb = get_super(dev); if (sb) { if (fs_type == sb->s_type) { @@ -755,6 +762,10 @@ error = blkdev_get(bdev, mode, 0, BDEV_FS); if (error) goto out; + check_disk_change(dev); + error = -EACCES; + if (!(flags & MS_RDONLY) && is_read_only(dev)) + goto out1; error = -EINVAL; sb = read_super(dev, bdev, fs_type, flags, data, 0); if (sb) { @@ -762,6 +773,7 @@ path_release(&nd); return sb; } +out1: blkdev_put(bdev, BDEV_FS); } out: @@ -895,7 +907,7 @@ put_unnamed_dev(dev); return ERR_PTR(-EINVAL); } - mnt = add_vfsmnt(sb, sb->s_root, sb->s_root, NULL, "none", type->name); + mnt = add_vfsmnt(NULL, sb->s_root, "none"); if (!mnt) { kill_super(sb, 0); return ERR_PTR(-ENOMEM); @@ -909,10 +921,7 @@ void kern_umount(struct vfsmount *mnt) { struct super_block *sb = mnt->mnt_sb; - struct dentry *root = sb->s_root; remove_vfsmnt(mnt); - dput(root); - sb->s_root = NULL; kill_super(sb, 0); } @@ -932,6 +941,16 @@ { struct super_block * sb = mnt->mnt_sb; + /* + * No sense to grab the lock for this test, but test itself looks + * somewhat bogus. Suggestions for better replacement? + * Ho-hum... In principle, we might treat that as umount + switch + * to rootfs. GC would eventually take care of the old vfsmount. + * The problem being: we have to implement rootfs and GC for that ;-) + * Actually it makes sense, especially if rootfs would contain a + * /reboot - static binary that would close all descriptors and + * call reboot(9). Then init(8) could umount root and exec /reboot. + */ if (mnt == current->fs->rootmnt && !umount_root) { int retval = 0; /* @@ -952,6 +971,7 @@ if (mnt->mnt_instances.next != mnt->mnt_instances.prev) { if (sb->s_type->fs_flags & FS_SINGLE) put_filesystem(sb->s_type); + /* We hold two references, so mntput() is safe */ mntput(mnt); remove_vfsmnt(mnt); return 0; @@ -988,14 +1008,14 @@ shrink_dcache_sb(sb); fsync_dev(sb->s_dev); - /* Something might grab it again - redo checks */ - - if (atomic_read(&mnt->mnt_count) > 2) { + if (sb->s_root->d_inode->i_state) { mntput(mnt); return -EBUSY; } - if (sb->s_root->d_inode->i_state) { + /* Something might grab it again - redo checks */ + + if (atomic_read(&mnt->mnt_count) > 2) { mntput(mnt); return -EBUSY; } @@ -1067,6 +1087,8 @@ { if (capable(CAP_SYS_ADMIN)) return 0; + return -EPERM; +#ifdef notyet if (S_ISLNK(nd->dentry->d_inode->i_mode)) return -EPERM; if (nd->dentry->d_inode->i_mode & S_ISVTX) { @@ -1076,6 +1098,7 @@ if (permission(nd->dentry->d_inode, MAY_WRITE)) return -EPERM; return 0; +#endif } /* @@ -1102,22 +1125,22 @@ if (S_ISDIR(new_nd.dentry->d_inode->i_mode) != S_ISDIR(old_nd.dentry->d_inode->i_mode)) goto out2; - - down(&mount_sem); - err = -ENOENT; - if (d_unhashed(old_nd.dentry) && !IS_ROOT(old_nd.dentry)) - goto out3; - if (d_unhashed(new_nd.dentry) && !IS_ROOT(new_nd.dentry)) - goto out3; - /* there we go */ + err = -ENOMEM; if (old_nd.mnt->mnt_sb->s_type->fs_flags & FS_SINGLE) get_filesystem(old_nd.mnt->mnt_sb->s_type); - if (add_vfsmnt(old_nd.mnt->mnt_sb, new_nd.dentry, old_nd.dentry, - new_nd.mnt, old_nd.mnt->mnt_devname, new_name)) + + down(&mount_sem); + /* there we go */ + down(&new_nd.dentry->d_inode->i_zombie); + if (IS_DEADDIR(new_nd.dentry->d_inode)) + err = -ENOENT; + else if (add_vfsmnt(&new_nd, old_nd.dentry, old_nd.mnt->mnt_devname)) err = 0; -out3: + up(&new_nd.dentry->d_inode->i_zombie); up(&mount_sem); + if (err && old_nd.mnt->mnt_sb->s_type->fs_flags & FS_SINGLE) + put_filesystem(old_nd.mnt->mnt_sb->s_type); out2: path_release(&new_nd); out1: @@ -1215,7 +1238,7 @@ { struct file_system_type * fstype; struct nameidata nd; - struct vfsmount *mnt; + struct vfsmount *mnt = NULL; struct super_block *sb; int retval = 0; unsigned long flags = 0; @@ -1224,8 +1247,6 @@ if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE)) return -EINVAL; - if (!type_page || !memchr(type_page, 0, PAGE_SIZE)) - return -EINVAL; if (dev_name && !memchr(dev_name, 0, PAGE_SIZE)) return -EINVAL; @@ -1239,6 +1260,11 @@ if ((new_flags & MS_MGC_MSK) == MS_MGC_VAL) flags = new_flags & ~MS_MGC_MSK; + /* For the rest we need the type */ + + if (!type_page || !memchr(type_page, 0, PAGE_SIZE)) + return -EINVAL; + /* loopback mount? This is special - requires fewer capabilities */ if (strcmp(type_page, "bind")==0) return do_loopback(dev_name, dir_name); @@ -1272,16 +1298,18 @@ if (IS_ERR(sb)) goto dput_out; - retval = -ENOENT; - if (d_unhashed(nd.dentry) && !IS_ROOT(nd.dentry)) - goto fail; - /* Something was mounted here while we slept */ while(d_mountpoint(nd.dentry) && follow_down(&nd.mnt, &nd.dentry)) ; - - retval = -ENOMEM; - mnt = add_vfsmnt(sb, nd.dentry, sb->s_root, nd.mnt, dev_name, dir_name); + retval = -ENOENT; + if (!nd.dentry->d_inode) + goto fail; + down(&nd.dentry->d_inode->i_zombie); + if (!IS_DEADDIR(nd.dentry->d_inode)) { + retval = -ENOMEM; + mnt = add_vfsmnt(&nd, sb->s_root, dev_name); + } + up(&nd.dentry->d_inode->i_zombie); if (!mnt) goto fail; retval = 0; @@ -1312,15 +1340,6 @@ if (retval < 0) return retval; - /* copy_mount_options allows a NULL user pointer, - * and just returns zero in that case. But if we - * allow the type to be NULL we will crash. - * Previously we did not check this case. - */ - if (type_page == 0) - return -EINVAL; - - lock_kernel(); dir_page = getname(dir_name); retval = PTR_ERR(dir_page); if (IS_ERR(dir_page)) @@ -1331,8 +1350,10 @@ goto out2; retval = copy_mount_options (data, &data_page); if (retval >= 0) { + lock_kernel(); retval = do_mount((char*)dev_page,dir_page,(char*)type_page, new_flags, (void*)data_page); + unlock_kernel(); free_page(data_page); } free_page(dev_page); @@ -1340,7 +1361,6 @@ putname(dir_page); out1: free_page(type_page); - unlock_kernel(); return retval; } @@ -1490,12 +1510,11 @@ path + 5 + path_start, 0, NULL, NULL); memcpy (path + path_start, "/dev/", 5); - vfsmnt = add_vfsmnt (sb, sb->s_root, sb->s_root, NULL, - path + path_start, "/"); + vfsmnt = add_vfsmnt(NULL, sb->s_root, path + path_start); } else - vfsmnt = add_vfsmnt (sb, sb->s_root, sb->s_root, NULL, - "/dev/root", "/"); + vfsmnt = add_vfsmnt(NULL, sb->s_root, "/dev/root"); + /* FIXME: if something will try to umount us right now... */ if (vfsmnt) { set_fs_root(current->fs, vfsmnt, sb->s_root); set_fs_pwd(current->fs, vfsmnt, sb->s_root); @@ -1516,6 +1535,7 @@ read_lock(&tasklist_lock); for_each_task(p) { + /* FIXME - unprotected usage of ->fs + (harmless) race */ if (!p->fs) continue; if (p->fs->root == old_root && p->fs->rootmnt == old_rootmnt) set_fs_root(p->fs, new_rootmnt, new_root); @@ -1576,7 +1596,10 @@ root_mnt = mntget(current->fs->rootmnt); root = dget(current->fs->root); down(&mount_sem); + down(&old_nd.dentry->d_inode->i_zombie); error = -ENOENT; + if (IS_DEADDIR(new_nd.dentry->d_inode)) + goto out2; if (d_unhashed(new_nd.dentry) && !IS_ROOT(new_nd.dentry)) goto out2; if (d_unhashed(old_nd.dentry) && !IS_ROOT(old_nd.dentry)) @@ -1599,19 +1622,12 @@ } else if (!is_subdir(old_nd.dentry, new_nd.dentry)) goto out2; - error = -ENOMEM; - name = __getname(); - if (!name) - goto out2; - - move_vfsmnt(new_nd.mnt, new_nd.dentry, NULL, NULL, "/"); - move_vfsmnt(root_mnt, old_nd.dentry, old_nd.mnt, NULL, - __d_path(old_nd.dentry, old_nd.mnt, new_nd.dentry, - new_nd.mnt, name, PAGE_SIZE)); - putname(name); + move_vfsmnt(new_nd.mnt, new_nd.dentry, NULL, NULL); + move_vfsmnt(root_mnt, old_nd.dentry, old_nd.mnt, NULL); chroot_fs_refs(root,root_mnt,new_nd.dentry,new_nd.mnt); error = 0; out2: + up(&old_nd.dentry->d_inode->i_zombie); up(&mount_sem); dput(root); mntput(root_mnt); @@ -1629,10 +1645,11 @@ int __init change_root(kdev_t new_root_dev,const char *put_old) { kdev_t old_root_dev = ROOT_DEV; - struct vfsmount *old_rootmnt = mntget(current->fs->rootmnt); + struct vfsmount *old_rootmnt; struct nameidata devfs_nd, nd; int error = 0; + old_rootmnt = mntget(current->fs->rootmnt); /* First unmount devfs if mounted */ if (path_init("/dev", LOOKUP_FOLLOW|LOOKUP_POSITIVE, &devfs_nd)) error = path_walk("/dev", &devfs_nd); @@ -1675,7 +1692,8 @@ printk(KERN_ERR "error %ld\n",blivet); return error; } - move_vfsmnt(old_rootmnt, nd.dentry, nd.mnt, "/dev/root.old", put_old); + /* FIXME: we should hold i_zombie on nd.dentry */ + move_vfsmnt(old_rootmnt, nd.dentry, nd.mnt, "/dev/root.old"); mntput(old_rootmnt); path_release(&nd); return 0; diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/sysv/fsync.c linux.ac/fs/sysv/fsync.c --- linux.t2/fs/sysv/fsync.c Sat Jun 24 13:36:11 2000 +++ linux.ac/fs/sysv/fsync.c Sat Jun 10 21:51:08 2000 @@ -178,7 +178,7 @@ return err; } -int sysv_sync_file(struct file * file, struct dentry *dentry) +int sysv_sync_file(struct file * file, struct dentry *dentry, int datasync) { int wait, err = 0; struct inode *inode = dentry->d_inode; diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/sysv/ialloc.c linux.ac/fs/sysv/ialloc.c --- linux.t2/fs/sysv/ialloc.c Sat Jun 24 13:50:02 2000 +++ linux.ac/fs/sysv/ialloc.c Sun Jun 11 15:28:29 2000 @@ -142,7 +142,7 @@ /* Change directory entry: */ inode->i_mode = 0; /* for sysv_write_inode() */ inode->i_size = 0; /* ditto */ - sysv_write_inode(inode); /* ensure inode not allocated again */ + sysv_write_inode(inode, 0); /* ensure inode not allocated again */ /* FIXME: caller may call this too. */ mark_inode_dirty(inode); /* cleared by sysv_write_inode() */ /* That's it. */ diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/sysv/inode.c linux.ac/fs/sysv/inode.c --- linux.t2/fs/sysv/inode.c Sat Jun 24 13:50:02 2000 +++ linux.ac/fs/sysv/inode.c Mon Jun 12 00:08:08 2000 @@ -496,7 +496,6 @@ sb->s_blocksize = sb->sv_block_size; sb->s_blocksize_bits = sb->sv_block_size_bits; /* set up enough so that it can read an inode */ - sb->s_dev = dev; sb->s_op = &sysv_sops; root_inode = iget(sb,SYSV_ROOT_INO); sb->s_root = d_alloc_root(root_inode); @@ -1154,7 +1153,7 @@ return bh; } -void sysv_write_inode(struct inode * inode) +void sysv_write_inode(struct inode * inode, int unused) { struct buffer_head *bh; bh = sysv_update_inode(inode); diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/udf/fsync.c linux.ac/fs/udf/fsync.c --- linux.t2/fs/udf/fsync.c Sat Jun 24 13:36:12 2000 +++ linux.ac/fs/udf/fsync.c Sun Jun 11 00:15:37 2000 @@ -96,7 +96,7 @@ * even pass file to fsync ? */ -int udf_sync_file(struct file * file, struct dentry *dentry) +int udf_sync_file(struct file * file, struct dentry *dentry, int dsync) { int wait, err = 0; struct inode *inode = dentry->d_inode; diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/udf/inode.c linux.ac/fs/udf/inode.c --- linux.t2/fs/udf/inode.c Sat Jun 24 13:36:12 2000 +++ linux.ac/fs/udf/inode.c Mon Jun 12 12:16:17 2000 @@ -1213,10 +1213,10 @@ * Written, tested, and released. */ -void udf_write_inode(struct inode * inode) +void udf_write_inode(struct inode * inode, int wait) { lock_kernel(); - udf_update_inode(inode, 0); + udf_update_inode(inode, wait); unlock_kernel(); } diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/udf/super.c linux.ac/fs/udf/super.c --- linux.t2/fs/udf/super.c Sat Jun 24 13:36:12 2000 +++ linux.ac/fs/udf/super.c Mon Jun 12 00:08:08 2000 @@ -1419,7 +1419,6 @@ return sb; error_out: - sb->s_dev = NODEV; if (UDF_SB_VAT(sb)) iput(UDF_SB_VAT(sb)); if (!(sb->s_flags & MS_RDONLY)) diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/udf/udfdecl.h linux.ac/fs/udf/udfdecl.h --- linux.t2/fs/udf/udfdecl.h Sat Jun 24 13:36:12 2000 +++ linux.ac/fs/udf/udfdecl.h Sat Jun 24 14:23:31 2000 @@ -134,7 +134,7 @@ extern void udf_read_inode(struct inode *); extern void udf_put_inode(struct inode *); extern void udf_delete_inode(struct inode *); -extern void udf_write_inode(struct inode *); +extern void udf_write_inode(struct inode *, int); extern long udf_locked_block_map(struct inode *, long); extern long udf_block_map(struct inode *, long); extern int inode_bmap(struct inode *, int, lb_addr *, Uint32 *, lb_addr *, Uint32 *, Uint32 *, struct buffer_head **); @@ -184,7 +184,7 @@ extern int udf_new_block(const struct inode *, Uint16, Uint32, int *); /* fsync.c */ -extern int udf_sync_file(struct file *, struct dentry *); +extern int udf_sync_file(struct file *, struct dentry *, int data); /* directory.c */ extern Uint8 * udf_filead_read(struct inode *, Uint8 *, Uint8, lb_addr, int *, int *, struct buffer_head **, int *); diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/ufs/inode.c linux.ac/fs/ufs/inode.c --- linux.t2/fs/ufs/inode.c Sat Jun 24 13:36:11 2000 +++ linux.ac/fs/ufs/inode.c Sun Jun 11 15:28:11 2000 @@ -744,7 +744,7 @@ return 0; } -void ufs_write_inode (struct inode * inode) +void ufs_write_inode (struct inode * inode, int unused) { ufs_update_inode (inode, 0); } diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/fs/umsdos/inode.c linux.ac/fs/umsdos/inode.c --- linux.t2/fs/umsdos/inode.c Sat Jun 24 13:50:02 2000 +++ linux.ac/fs/umsdos/inode.c Tue Jun 13 23:35:50 2000 @@ -29,7 +29,7 @@ /* - * Initialize a private filp + * Initialize a private filp. dentry is always a regular file one. */ void fill_new_filp (struct file *filp, struct dentry *dentry) { @@ -293,11 +293,11 @@ /* * Update the disk with the inode content */ -void UMSDOS_write_inode (struct inode *inode) +void UMSDOS_write_inode (struct inode *inode, int unused) { struct iattr newattrs; - fat_write_inode (inode); + fat_write_inode (inode, 0); newattrs.ia_mtime = inode->i_mtime; newattrs.ia_atime = inode->i_atime; newattrs.ia_ctime = inode->i_ctime; diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/include/linux/affs_fs.h linux.ac/include/linux/affs_fs.h --- linux.t2/include/linux/affs_fs.h Sat Jun 24 13:36:13 2000 +++ linux.ac/include/linux/affs_fs.h Sun Jun 11 15:27:27 2000 @@ -84,7 +84,7 @@ extern void affs_put_inode(struct inode *inode); extern void affs_delete_inode(struct inode *inode); extern void affs_read_inode(struct inode *inode); -extern void affs_write_inode(struct inode *inode); +extern void affs_write_inode(struct inode *inode, int); /* super.c */ diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/include/linux/ext2_fs.h linux.ac/include/linux/ext2_fs.h --- linux.t2/include/linux/ext2_fs.h Sat Jun 24 13:36:13 2000 +++ linux.ac/include/linux/ext2_fs.h Sat Jun 10 21:51:08 2000 @@ -548,7 +548,9 @@ extern int ext2_write (struct inode *, struct file *, char *, int); /* fsync.c */ -extern int ext2_sync_file (struct file *, struct dentry *); +extern int ext2_fsync_file (struct file *, struct dentry *, int); +extern int ext2_fsync_inode (struct inode *, int); +extern int ext2_osync_inode (struct inode *, int); /* ialloc.c */ extern struct inode * ext2_new_inode (const struct inode *, int, int *); @@ -562,7 +564,7 @@ extern struct buffer_head * ext2_bread (struct inode *, int, int, int *); extern void ext2_read_inode (struct inode *); -extern void ext2_write_inode (struct inode *); +extern void ext2_write_inode (struct inode *, int); extern void ext2_put_inode (struct inode *); extern void ext2_delete_inode (struct inode *); extern int ext2_sync_inode (struct inode *); diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/include/linux/fs.h linux.ac/include/linux/fs.h --- linux.t2/include/linux/fs.h Sat Jun 24 13:50:04 2000 +++ linux.ac/include/linux/fs.h Sat Jun 24 14:00:01 2000 @@ -236,6 +236,9 @@ unsigned long b_rsector; /* Real buffer location on disk */ wait_queue_head_t b_wait; struct kiobuf * b_kiobuf; /* kiobuf which owns this IO */ + + struct inode * b_inode; + struct list_head b_inode_buffers; /* doubly linked list of inode dirty buffers */ }; typedef void (bh_end_io_t)(struct buffer_head *bh, int uptodate); @@ -375,6 +378,8 @@ struct list_head i_list; struct list_head i_dentry; + struct list_head i_dirty_buffers; + unsigned long i_ino; atomic_t i_count; kdev_t i_dev; @@ -441,16 +446,24 @@ }; /* Inode state bits.. */ -#define I_DIRTY 1 -#define I_LOCK 2 -#define I_FREEING 4 -#define I_CLEAR 8 +#define I_DIRTY_SYNC 1 /* Not dirty enough for O_DATASYNC */ +#define I_DIRTY_DATASYNC 2 /* Data-related inode changes pending */ +#define I_LOCK 4 +#define I_FREEING 8 +#define I_CLEAR 16 -extern void __mark_inode_dirty(struct inode *); +#define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC) + +extern void __mark_inode_dirty(struct inode *, int); static inline void mark_inode_dirty(struct inode *inode) { - if (!(inode->i_state & I_DIRTY)) - __mark_inode_dirty(inode); + if ((inode->i_state & I_DIRTY) != I_DIRTY) + __mark_inode_dirty(inode, I_DIRTY); +} +static inline void mark_inode_dirty_sync(struct inode *inode) +{ + if (!(inode->i_state & I_DIRTY_SYNC)) + __mark_inode_dirty(inode, I_DIRTY_SYNC); } struct fown_struct { @@ -504,10 +517,8 @@ struct file_lock { struct file_lock *fl_next; /* singly linked list for this inode */ - struct file_lock *fl_nextlink; /* doubly linked list of all locks */ - struct file_lock *fl_prevlink; /* used to simplify lock removal */ - struct file_lock *fl_nextblock; /* circular list of blocked processes */ - struct file_lock *fl_prevblock; + struct list_head fl_link; /* doubly linked list of all locks */ + struct list_head fl_block; /* circular list of blocked processes */ fl_owner_t fl_owner; unsigned int fl_pid; wait_queue_head_t fl_wait; @@ -532,7 +543,7 @@ #define OFFSET_MAX INT_LIMIT(loff_t) #endif -extern struct file_lock *file_lock_table; +extern struct list_head file_lock_list; #include @@ -721,7 +732,7 @@ int (*open) (struct inode *, struct file *); int (*flush) (struct file *); int (*release) (struct inode *, struct file *); - int (*fsync) (struct file *, struct dentry *); + int (*fsync) (struct file *, struct dentry *, int datasync); int (*fasync) (int, struct file *, int); int (*lock) (struct file *, int, struct file_lock *); ssize_t (*readv) (struct file *, const struct iovec *, unsigned long, loff_t *); @@ -754,7 +765,7 @@ */ struct super_operations { void (*read_inode) (struct inode *); - void (*write_inode) (struct inode *); + void (*write_inode) (struct inode *, int); void (*put_inode) (struct inode *); void (*delete_inode) (struct inode *); void (*put_super) (struct super_block *); @@ -859,8 +870,8 @@ return locks_mandatory_area( FLOCK_VERIFY_WRITE, inode, filp, size < inode->i_size ? size : inode->i_size, - abs(inode->i_size - size) - ); + size < inode->i_size ? inode->i_size - size + : size - inode->i_size); return 0; } @@ -981,23 +992,44 @@ bh->b_end_io(bh, 0); } +extern void buffer_insert_inode_queue(struct buffer_head *, struct inode *); +static inline void mark_buffer_dirty_inode(struct buffer_head *bh, int flag, struct inode *inode) +{ + mark_buffer_dirty(bh, flag); + buffer_insert_inode_queue(bh, inode); +} + extern void balance_dirty(kdev_t); extern int check_disk_change(kdev_t); extern int invalidate_inodes(struct super_block *); extern void invalidate_inode_pages(struct inode *); +extern void invalidate_inode_buffers(struct inode *); #define invalidate_buffers(dev) __invalidate_buffers((dev), 0) #define destroy_buffers(dev) __invalidate_buffers((dev), 1) extern void __invalidate_buffers(kdev_t dev, int); extern void sync_inodes(kdev_t); -extern void write_inode_now(struct inode *); +extern void write_inode_now(struct inode *, int); extern void sync_dev(kdev_t); extern int fsync_dev(kdev_t); +extern int fsync_inode_buffers(struct inode *); +extern int osync_inode_buffers(struct inode *); +extern int generic_osync_inode(struct inode *, int); +extern int inode_has_buffers(struct inode *); extern void sync_supers(kdev_t); extern int bmap(struct inode *, int); extern int notify_change(struct dentry *, struct iattr *); extern int permission(struct inode *, int); extern int get_write_access(struct inode *); -extern void put_write_access(struct inode *); +extern int deny_write_access(struct file *); +static inline void put_write_access(struct inode * inode) +{ + atomic_dec(&inode->i_writecount); +} +static inline void allow_write_access(struct file *file) +{ + if (file) + atomic_inc(&file->f_dentry->d_inode->i_writecount); +} extern int do_pipe(int *); extern int open_namei(const char *, int, int, struct nameidata *); @@ -1119,6 +1151,7 @@ /* Generic buffer handling for block filesystems.. */ extern int block_flushpage(struct page *, unsigned long); +extern void block_destroy_buffers(struct page *); extern int block_symlink(struct inode *, const char *, int); extern int block_write_full_page(struct page*, get_block_t*); extern int block_read_full_page(struct page*, get_block_t*); @@ -1172,7 +1205,7 @@ extern ssize_t char_write(struct file *, const char *, size_t, loff_t *); extern ssize_t block_write(struct file *, const char *, size_t, loff_t *); -extern int file_fsync(struct file *, struct dentry *); +extern int file_fsync(struct file *, struct dentry *, int); extern int generic_buffer_fdatasync(struct inode *inode, unsigned long start_idx, unsigned long end_idx); extern int inode_change_ok(struct inode *, struct iattr *); @@ -1184,20 +1217,6 @@ * functions were in linux/fs/ C (VFS) files. * */ - -/* - * We need to do a check-parent every time - * after we have locked the parent - to verify - * that the parent is still our parent and - * that we are still hashed onto it.. - * - * This is required in case two processes race - * on removing (or moving) the same entry: the - * parent lock will serialize them, but the - * other process will be too late.. - */ -#define check_parent(dir, dentry) \ - ((dir) == (dentry)->d_parent && !d_unhashed(dentry)) /* * Locking the parent is needed to: diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/include/linux/minix_fs.h linux.ac/include/linux/minix_fs.h --- linux.t2/include/linux/minix_fs.h Sat Jun 24 13:36:12 2000 +++ linux.ac/include/linux/minix_fs.h Sat Jun 10 21:51:08 2000 @@ -101,7 +101,7 @@ extern void minix_truncate(struct inode *); extern int minix_sync_inode(struct inode *); -extern int minix_sync_file(struct file *, struct dentry *); +extern int minix_sync_file(struct file *, struct dentry *, int); extern struct address_space_operations minix_aops; extern struct inode_operations minix_file_inode_operations; diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/include/linux/mm.h linux.ac/include/linux/mm.h --- linux.t2/include/linux/mm.h Sat Jun 24 13:36:12 2000 +++ linux.ac/include/linux/mm.h Sat Jun 24 14:00:01 2000 @@ -153,6 +153,7 @@ struct buffer_head * buffers; unsigned long virtual; /* nonzero if kmapped */ struct zone_struct *zone; + unsigned int age; } mem_map_t; #define get_page(p) atomic_inc(&(p)->count) @@ -169,7 +170,7 @@ #define PG_dirty 4 #define PG_decr_after 5 #define PG_unused_01 6 -#define PG__unused_02 7 +#define PG_active 7 #define PG_slab 8 #define PG_swap_cache 9 #define PG_skip 10 @@ -185,6 +186,7 @@ #define ClearPageUptodate(page) clear_bit(PG_uptodate, &(page)->flags) #define PageDirty(page) test_bit(PG_dirty, &(page)->flags) #define SetPageDirty(page) set_bit(PG_dirty, &(page)->flags) +#define ClearPageDirty(page) clear_bit(PG_dirty, &(page)->flags) #define PageLocked(page) test_bit(PG_locked, &(page)->flags) #define LockPage(page) set_bit(PG_locked, &(page)->flags) #define TryLockPage(page) test_and_set_bit(PG_locked, &(page)->flags) @@ -192,11 +194,15 @@ clear_bit(PG_locked, &(page)->flags); \ wake_up(&page->wait); \ } while (0) +#define PageActive(page) test_bit(PG_active, &(page)->flags) +#define SetPageActive(page) set_bit(PG_active, &(page)->flags) +#define ClearPageActive(page) clear_bit(PG_active, &(page)->flags) #define PageError(page) test_bit(PG_error, &(page)->flags) #define SetPageError(page) set_bit(PG_error, &(page)->flags) #define ClearPageError(page) clear_bit(PG_error, &(page)->flags) #define PageReferenced(page) test_bit(PG_referenced, &(page)->flags) #define SetPageReferenced(page) set_bit(PG_referenced, &(page)->flags) +#define ClearPageReferenced(page) clear_bit(PG_referenced, &(page)->flags) #define PageTestandClearReferenced(page) test_and_clear_bit(PG_referenced, &(page)->flags) #define PageDecrAfter(page) test_bit(PG_decr_after, &(page)->flags) #define SetPageDecrAfter(page) set_bit(PG_decr_after, &(page)->flags) @@ -457,6 +463,7 @@ extern unsigned long page_unuse(struct page *); extern int shrink_mmap(int, int); extern void truncate_inode_pages(struct address_space *, loff_t); +extern void truncate_all_inode_pages(struct address_space *); /* generic vm_area_ops exported for stackable file systems */ extern int filemap_swapout(struct page * page, struct file *file); diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/include/linux/mount.h linux.ac/include/linux/mount.h --- linux.t2/include/linux/mount.h Sat Jun 24 13:36:13 2000 +++ linux.ac/include/linux/mount.h Mon Jun 12 00:10:25 2000 @@ -26,7 +26,6 @@ atomic_t mnt_count; char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */ - char *mnt_dirname; /* Name of directory mounted on */ struct list_head mnt_list; uid_t mnt_owner; }; diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/include/linux/msdos_fs.h linux.ac/include/linux/msdos_fs.h --- linux.t2/include/linux/msdos_fs.h Sat Jun 24 13:50:04 2000 +++ linux.ac/include/linux/msdos_fs.h Sat Jun 24 14:21:09 2000 @@ -248,7 +248,7 @@ extern struct super_block *fat_read_super(struct super_block *s, void *data, int silent, struct inode_operations *dir_ops); extern void msdos_put_super(struct super_block *sb); extern int fat_statfs(struct super_block *sb,struct statfs *buf); -extern void fat_write_inode(struct inode *inode); +extern void fat_write_inode(struct inode *inode, int); /* dir.c */ extern struct file_operations fat_dir_operations; diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/include/linux/ncp_fs_i.h linux.ac/include/linux/ncp_fs_i.h --- linux.t2/include/linux/ncp_fs_i.h Sat Jun 24 13:36:13 2000 +++ linux.ac/include/linux/ncp_fs_i.h Sun Jun 4 21:48:39 2000 @@ -19,7 +19,8 @@ __u32 DosDirNum __attribute__((packed)); __u32 volNumber __attribute__((packed)); __u32 nwattr; - int opened; + struct semaphore open_sem; + atomic_t opened; int access; __u32 server_file_handle __attribute__((packed)); __u8 open_create_action __attribute__((packed)); diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/include/linux/nfsd/syscall.h linux.ac/include/linux/nfsd/syscall.h --- linux.t2/include/linux/nfsd/syscall.h Sat Jun 24 13:50:04 2000 +++ linux.ac/include/linux/nfsd/syscall.h Sat Jun 24 14:22:02 2000 @@ -133,7 +133,7 @@ * Kernel syscall implementation. */ #if defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE) -extern asmlinkage long sys_nfsservctl(int, void *, void *); +extern asmlinkage int sys_nfsservctl(int, void *, void *); #else #define sys_nfsservctl sys_ni_syscall #endif diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/include/linux/poll.h linux.ac/include/linux/poll.h --- linux.t2/include/linux/poll.h Sat Jun 24 13:36:13 2000 +++ linux.ac/include/linux/poll.h Sat Jun 24 14:00:01 2000 @@ -20,6 +20,7 @@ typedef struct poll_table_struct { struct poll_table_struct * next; unsigned int nr; + int err; struct poll_table_entry * entry; } poll_table; diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/include/linux/qnx4_fs.h linux.ac/include/linux/qnx4_fs.h --- linux.t2/include/linux/qnx4_fs.h Sat Jun 24 13:36:13 2000 +++ linux.ac/include/linux/qnx4_fs.h Sat Jun 10 21:51:08 2000 @@ -116,7 +116,7 @@ extern void qnx4_free_inode(struct inode *inode); extern int qnx4_unlink(struct inode *dir, struct dentry *dentry); extern int qnx4_rmdir(struct inode *dir, struct dentry *dentry); -extern int qnx4_sync_file(struct file *file, struct dentry *dentry); +extern int qnx4_sync_file(struct file *file, struct dentry *dentry, int); extern int qnx4_sync_inode(struct inode *inode); extern int qnx4_get_block(struct inode *inode, long iblock, struct buffer_head *bh, int create); diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/include/linux/swap.h linux.ac/include/linux/swap.h --- linux.t2/include/linux/swap.h Sat Jun 24 13:36:13 2000 +++ linux.ac/include/linux/swap.h Thu Jun 22 17:31:01 2000 @@ -161,6 +161,16 @@ extern spinlock_t pagemap_lru_lock; /* + * Magic constants for page aging. If the system is programmed + * right, tweaking these should have almost no effect... + * The 2.4 code, however, is mostly simple and stable ;) + */ +#define PG_AGE_MAX 64 +#define PG_AGE_START 2 +#define PG_AGE_ADV 3 +#define PG_AGE_DECL 1 + +/* * Helper macros for lru_pages handling. */ #define lru_cache_add(page) \ @@ -168,12 +178,16 @@ spin_lock(&pagemap_lru_lock); \ list_add(&(page)->lru, &lru_cache); \ nr_lru_pages++; \ + page->age = PG_AGE_START; \ + ClearPageReferenced(page); \ + SetPageActive(page); \ spin_unlock(&pagemap_lru_lock); \ } while (0) #define __lru_cache_del(page) \ do { \ list_del(&(page)->lru); \ + ClearPageActive(page); \ nr_lru_pages--; \ } while (0) diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/include/linux/sysv_fs.h linux.ac/include/linux/sysv_fs.h --- linux.t2/include/linux/sysv_fs.h Sat Jun 24 13:36:13 2000 +++ linux.ac/include/linux/sysv_fs.h Sat Jun 24 14:22:20 2000 @@ -377,9 +377,9 @@ extern struct buffer_head * sysv_file_bread(struct inode *, int, int); extern void sysv_truncate(struct inode *); -extern void sysv_write_inode(struct inode *); +extern void sysv_write_inode(struct inode *, int); extern int sysv_sync_inode(struct inode *); -extern int sysv_sync_file(struct file *, struct dentry *); +extern int sysv_sync_file(struct file *, struct dentry *, int); extern int sysv_notify_change(struct dentry *, struct iattr *); extern struct inode_operations sysv_file_inode_operations; diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/include/linux/timer.h linux.ac/include/linux/timer.h --- linux.t2/include/linux/timer.h Sat Jun 24 13:36:12 2000 +++ linux.ac/include/linux/timer.h Sun Jun 18 15:47:26 2000 @@ -1,6 +1,8 @@ #ifndef _LINUX_TIMER_H #define _LINUX_TIMER_H +#ifdef __KERNEL__ + #include #include @@ -91,9 +93,11 @@ #define timer_set_running(t) (void)(t) #define timer_is_running(t) (0) #define timer_synchronize(t) do { (void)(t); barrier(); } while(0) -#define del_timer_sync(t) del_timer(t) +#define del_timer_sync del_timer #endif +#define del_timer_async del_timer + /* * These inlines deal with timer wrapping correctly. You are * strongly encouraged to use them @@ -111,4 +115,5 @@ #define time_after_eq(a,b) ((long)(a) - (long)(b) >= 0) #define time_before_eq(a,b) time_after_eq(b,a) +#endif #endif diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/include/linux/types.h linux.ac/include/linux/types.h --- linux.t2/include/linux/types.h Sat Jun 24 13:36:13 2000 +++ linux.ac/include/linux/types.h Sat May 27 22:01:22 2000 @@ -1,7 +1,10 @@ #ifndef _LINUX_TYPES_H #define _LINUX_TYPES_H +#ifdef __KERNEL__ #include +#endif + #include #include diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/include/linux/ufs_fs.h linux.ac/include/linux/ufs_fs.h --- linux.t2/include/linux/ufs_fs.h Sat Jun 24 13:36:13 2000 +++ linux.ac/include/linux/ufs_fs.h Thu Jun 22 17:30:55 2000 @@ -560,9 +560,8 @@ extern int ufs_frag_map (struct inode *, int); extern void ufs_read_inode (struct inode *); extern void ufs_put_inode (struct inode *); -extern void ufs_write_inode (struct inode *); +extern void ufs_write_inode (struct inode *, int); extern int ufs_sync_inode (struct inode *); -extern void ufs_write_inode (struct inode *); extern void ufs_delete_inode (struct inode *); extern struct buffer_head * ufs_getfrag (struct inode *, unsigned, int, int *); extern struct buffer_head * ufs_bread (struct inode *, unsigned, int, int *); diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/include/linux/umsdos_fs.p linux.ac/include/linux/umsdos_fs.p --- linux.t2/include/linux/umsdos_fs.p Sat Jun 24 13:36:13 2000 +++ linux.ac/include/linux/umsdos_fs.p Sun Jun 11 15:27:27 2000 @@ -48,7 +48,7 @@ /* inode.c 12/06/95 09.49.40 */ void fill_new_filp (struct file *filp, struct dentry *dentry); void UMSDOS_read_inode (struct inode *); -void UMSDOS_write_inode (struct inode *); +void UMSDOS_write_inode (struct inode *, int); int UMSDOS_notify_change (struct dentry *, struct iattr *attr); int umsdos_notify_change_locked(struct dentry *, struct iattr *attr); void UMSDOS_put_inode (struct inode *); diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/init/main.c linux.ac/init/main.c --- linux.t2/init/main.c Sat Jun 24 13:50:04 2000 +++ linux.ac/init/main.c Sat Jun 17 18:35:54 2000 @@ -97,6 +97,7 @@ extern void bdev_init(void); extern int init_pcmcia_ds(void); extern int usb_init(void); +extern void filelock_init(void); extern void free_initmem(void); extern void filesystem_setup(void); @@ -233,25 +234,6 @@ { "dasdg", (DASD_MAJOR << MINORBITS) + (6 << 2) }, { "dasdh", (DASD_MAJOR << MINORBITS) + (7 << 2) }, #endif -#if defined(CONFIG_BLK_CPQ_DA) || defined(CONFIG_BLK_CPQ_DA_MODULE) - { "ida/c0d0p",0x4800 }, - { "ida/c0d1p",0x4810 }, - { "ida/c0d2p",0x4820 }, - { "ida/c0d3p",0x4830 }, - { "ida/c0d4p",0x4840 }, - { "ida/c0d5p",0x4850 }, - { "ida/c0d6p",0x4860 }, - { "ida/c0d7p",0x4870 }, - { "ida/c0d8p",0x4880 }, - { "ida/c0d9p",0x4890 }, - { "ida/c0d10p",0x48A0 }, - { "ida/c0d11p",0x48B0 }, - { "ida/c0d12p",0x48C0 }, - { "ida/c0d13p",0x48D0 }, - { "ida/c0d14p",0x48E0 }, - { "ida/c0d15p",0x48F0 }, -#endif - { NULL, 0 } }; @@ -569,6 +551,7 @@ bdev_init(); inode_init(mempages); file_table_init(); + filelock_init(); #if defined(CONFIG_SYSVIPC) ipc_init(); #endif diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/ipc/shm.c linux.ac/ipc/shm.c --- linux.t2/ipc/shm.c Sat Jun 24 13:50:04 2000 +++ linux.ac/ipc/shm.c Mon Jun 19 19:54:42 2000 @@ -52,7 +52,7 @@ static void shm_put_super (struct super_block *); static int shm_remount_fs (struct super_block *, int *, char *); static void shm_read_inode (struct inode *); -static void shm_write_inode(struct inode *); +static void shm_write_inode(struct inode *, int); static int shm_statfs (struct super_block *, struct statfs *); static int shm_create (struct inode *,struct dentry *,int); static struct dentry *shm_lookup (struct inode *,struct dentry *); @@ -371,7 +371,7 @@ return 0; } -static void shm_write_inode(struct inode * inode) +static void shm_write_inode(struct inode * inode, int unused) { } diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/kernel/dma.c linux.ac/kernel/dma.c --- linux.t2/kernel/dma.c Sat Jun 24 13:36:12 2000 +++ linux.ac/kernel/dma.c Wed May 31 11:36:24 2000 @@ -115,9 +115,8 @@ return -EINVAL; } -int free_dma(unsigned int dmanr) +void free_dma(unsigned int dmanr) { - return -EINVAL; } int get_dma_list(char *buf) diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/kernel/exec_domain.c linux.ac/kernel/exec_domain.c --- linux.t2/kernel/exec_domain.c Sat Jun 24 13:36:12 2000 +++ linux.ac/kernel/exec_domain.c Sat Jun 10 22:59:46 2000 @@ -104,32 +104,37 @@ void __set_personality(unsigned long personality) { - struct exec_domain *it; + struct exec_domain *it, *prev; it = lookup_exec_domain(personality); - if (it) { - if (atomic_read(¤t->fs->count) != 1) { - struct fs_struct *new = copy_fs_struct(current->fs); - struct fs_struct *old; - if (!new) { - put_exec_domain(it); - return; - } - task_lock(current); - old = current->fs; - current->fs = new; - task_unlock(current); - put_fs_struct(old); - } - /* - * At that point we are guaranteed to be the sole owner of - * current->fs. - */ + if (it == current->exec_domain) { current->personality = personality; - current->exec_domain = it; - set_fs_altroot(); - put_exec_domain(current->exec_domain); + return; + } + if (!it) + return; + if (atomic_read(¤t->fs->count) != 1) { + struct fs_struct *new = copy_fs_struct(current->fs); + struct fs_struct *old; + if (!new) { + put_exec_domain(it); + return; + } + task_lock(current); + old = current->fs; + current->fs = new; + task_unlock(current); + put_fs_struct(old); } + /* + * At that point we are guaranteed to be the sole owner of + * current->fs. + */ + current->personality = personality; + prev = current->exec_domain; + current->exec_domain = it; + set_fs_altroot(); + put_exec_domain(prev); } asmlinkage long sys_personality(unsigned long personality) diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/kernel/ksyms.c linux.ac/kernel/ksyms.c --- linux.t2/kernel/ksyms.c Sat Jun 24 13:50:04 2000 +++ linux.ac/kernel/ksyms.c Thu Jun 22 17:31:48 2000 @@ -172,6 +172,8 @@ EXPORT_SYMBOL(invalidate_inodes); EXPORT_SYMBOL(invalidate_inode_pages); EXPORT_SYMBOL(truncate_inode_pages); +EXPORT_SYMBOL(fsync_inode_buffers); +EXPORT_SYMBOL(buffer_insert_inode_queue); EXPORT_SYMBOL(fsync_dev); EXPORT_SYMBOL(permission); EXPORT_SYMBOL(inode_setattr); @@ -205,7 +207,7 @@ EXPORT_SYMBOL(generic_buffer_fdatasync); EXPORT_SYMBOL(page_hash_bits); EXPORT_SYMBOL(page_hash_table); -EXPORT_SYMBOL(file_lock_table); +EXPORT_SYMBOL(file_lock_list); EXPORT_SYMBOL(posix_lock_file); EXPORT_SYMBOL(posix_test_lock); EXPORT_SYMBOL(posix_block_lock); @@ -293,6 +295,8 @@ EXPORT_SYMBOL(max_sectors); EXPORT_SYMBOL(max_readahead); EXPORT_SYMBOL(file_moveto); +EXPORT_SYMBOL(drive_stat_acct); +EXPORT_SYMBOL(set_bh_page); /* tty routines */ EXPORT_SYMBOL(tty_hangup); @@ -500,7 +504,6 @@ /* binfmt_aout */ EXPORT_SYMBOL(get_write_access); -EXPORT_SYMBOL(put_write_access); /* dynamic registering of consoles */ EXPORT_SYMBOL(register_console); diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/mm/filemap.c linux.ac/mm/filemap.c --- linux.t2/mm/filemap.c Sat Jun 24 13:36:12 2000 +++ linux.ac/mm/filemap.c Mon Jun 19 19:55:05 2000 @@ -56,6 +56,8 @@ #define CLUSTER_PAGES (1 << page_cluster) #define CLUSTER_OFFSET(x) (((x) >> page_cluster) << page_cluster) +#define min(a,b) ((a < b) ? a : b) + void __add_page_to_hash_queue(struct page * page, struct page **p) { atomic_inc(&page_cache_size); @@ -90,10 +92,16 @@ /* * Remove a page from the page cache and free it. Caller has to make * sure the page is locked and that nobody else uses it - or that usage - * is safe. + * is safe. We need that the page don't have any buffers. */ static inline void __remove_inode_page(struct page *page) { + if (!PageLocked(page)) + PAGE_BUG(page); + + if (page->buffers) + BUG(); + remove_page_from_inode_queue(page); remove_page_from_hash_queue(page); page->mapping = NULL; @@ -101,9 +109,6 @@ void remove_inode_page(struct page *page) { - if (!PageLocked(page)) - PAGE_BUG(page); - spin_lock(&pagecache_lock); __remove_inode_page(page); spin_unlock(&pagecache_lock); @@ -114,16 +119,16 @@ * @inode: the inode which pages we want to invalidate * * This function only removes the unlocked pages, if you want to - * remove all the pages of one inode, you must call truncate_inode_pages. + * remove all the pages of one inode, you must call + * truncate_inode_pages. This function is not supposed to be called + * by block based filesystems. */ - void invalidate_inode_pages(struct inode * inode) { struct list_head *head, *curr; struct page * page; head = &inode->i_mapping->pages; - spin_lock(&pagecache_lock); spin_lock(&pagemap_lru_lock); curr = head->next; @@ -135,20 +140,53 @@ /* We cannot invalidate a locked page */ if (TryLockPage(page)) continue; + /* We _should not be called_ by block based filesystems */ + if (page->buffers) + BUG(); - __lru_cache_del(page); __remove_inode_page(page); + __lru_cache_del(page); UnlockPage(page); page_cache_release(page); } - spin_unlock(&pagemap_lru_lock); spin_unlock(&pagecache_lock); } -/* +static inline void truncate_partial_page(struct page *page, unsigned partial) +{ + memclear_highpage_flush(page, partial, PAGE_CACHE_SIZE-partial); + + if (page->buffers) + block_flushpage(page, partial); + +} + +static inline void truncate_complete_page(struct page *page) +{ + if (page->buffers) + block_destroy_buffers(page); + lru_cache_del(page); + + /* + * We remove the page from the page cache _after_ we have + * destroyed all buffer-cache references to it. Otherwise some + * other process might think this inode page is not in the + * page cache and creates a buffer-cache alias to it causing + * all sorts of fun problems ... + */ + remove_inode_page(page); + page_cache_release(page); +} + +/** + * truncate_inode_pages - truncate *all* the pages from an offset + * @mapping: mapping to truncate + * @lstart: offset from with to truncate + * * Truncate the page cache at a set offset, removing the pages * that are beyond that offset (and zeroing out partial pages). + * If any page is locked we wait for it to become unlocked. */ void truncate_inode_pages(struct address_space * mapping, loff_t lstart) { @@ -168,11 +206,10 @@ page = list_entry(curr, struct page, list); curr = curr->next; - offset = page->index; - /* page wholly truncated - free it */ - if (offset >= start) { + /* Is one of the pages to truncate? */ + if ((offset >= start) || (partial && (offset + 1) == start)) { if (TryLockPage(page)) { page_cache_get(page); spin_unlock(&pagecache_lock); @@ -183,22 +220,14 @@ page_cache_get(page); spin_unlock(&pagecache_lock); - if (!page->buffers || block_flushpage(page, 0)) - lru_cache_del(page); - - /* - * We remove the page from the page cache - * _after_ we have destroyed all buffer-cache - * references to it. Otherwise some other process - * might think this inode page is not in the - * page cache and creates a buffer-cache alias - * to it causing all sorts of fun problems ... - */ - remove_inode_page(page); + if (partial && (offset + 1) == start) { + truncate_partial_page(page, partial); + partial = 0; + } else + truncate_complete_page(page); UnlockPage(page); page_cache_release(page); - page_cache_release(page); /* * We have done things without the pagecache lock, @@ -209,38 +238,59 @@ */ goto repeat; } - /* - * there is only one partial page possible. - */ - if (!partial) - continue; + } + spin_unlock(&pagecache_lock); +} - /* and it's the one preceeding the first wholly truncated page */ - if ((offset + 1) != start) - continue; +/** + * truncate_all_inode_pages - truncate *all* the pages + * @mapping: mapping to truncate + * + * Truncate all the inode pages. If any page is locked we wait for it + * to become unlocked. This function can block. + */ +void truncate_all_inode_pages(struct address_space * mapping) +{ + struct list_head *head, *curr; + struct page * page; + + head = &mapping->pages; +repeat: + spin_lock(&pagecache_lock); + spin_lock(&pagemap_lru_lock); + curr = head->next; + + while (curr != head) { + page = list_entry(curr, struct page, list); + curr = curr->next; - /* partial truncate, clear end of page */ if (TryLockPage(page)) { + page_cache_get(page); + spin_unlock(&pagemap_lru_lock); spin_unlock(&pagecache_lock); + wait_on_page(page); + page_cache_release(page); goto repeat; } - page_cache_get(page); - spin_unlock(&pagecache_lock); - - memclear_highpage_flush(page, partial, PAGE_CACHE_SIZE-partial); - if (page->buffers) - block_flushpage(page, partial); - - partial = 0; - - /* - * we have dropped the spinlock so we have to - * restart. - */ + if (page->buffers) { + page_cache_get(page); + spin_unlock(&pagemap_lru_lock); + spin_unlock(&pagecache_lock); + block_destroy_buffers(page); + remove_inode_page(page); + lru_cache_del(page); + page_cache_release(page); + UnlockPage(page); + page_cache_release(page); + goto repeat; + } + __lru_cache_del(page); + __remove_inode_page(page); UnlockPage(page); page_cache_release(page); - goto repeat; } + + spin_unlock(&pagemap_lru_lock); spin_unlock(&pagecache_lock); } @@ -264,7 +314,15 @@ page = list_entry(page_lru, struct page, lru); list_del(page_lru); - if (PageTestandClearReferenced(page)) + if (PageTestandClearReferenced(page)) { + page->age += PG_AGE_ADV; + if (page->age > PG_AGE_MAX) + page->age = PG_AGE_MAX; + goto dispose_continue; + } + page->age -= min(PG_AGE_DECL, page->age); + + if (page->age) goto dispose_continue; count--; @@ -322,17 +380,23 @@ * were to be marked referenced.. */ if (PageSwapCache(page)) { - spin_unlock(&pagecache_lock); - __delete_from_swap_cache(page); - goto made_inode_progress; - } - - /* - * Page is from a zone we don't care about. - * Don't drop page cache entries in vain. - */ - if (page->zone->free_pages > page->zone->pages_high) + if (!PageDirty(page)) { + spin_unlock(&pagecache_lock); + __delete_from_swap_cache(page); + goto made_inode_progress; + } + /* PageDeferswap -> we swap out the page now. */ + if (gfp_mask & __GFP_IO) { + spin_unlock(&pagecache_lock); + /* Do NOT unlock the page ... brw_page does. */ + ClearPageDirty(page); + rw_swap_page(WRITE, page, 0); + spin_lock(&pagemap_lru_lock); + page_cache_release(page); + goto dispose_continue; + } goto cache_unlock_continue; + } /* is it a page-cache page? */ if (page->mapping) { @@ -500,7 +564,7 @@ /* * This adds a page to the page cache, starting out as locked, - * owned by us, referenced, but not uptodate and with no errors. + * owned by us, but not uptodate and with no errors. */ static inline void __add_to_page_cache(struct page * page, struct address_space *mapping, unsigned long offset, @@ -512,8 +576,8 @@ if (PageLocked(page)) BUG(); - flags = page->flags & ~((1 << PG_uptodate) | (1 << PG_error) | (1 << PG_dirty)); - page->flags = flags | (1 << PG_locked) | (1 << PG_referenced); + flags = page->flags & ~((1 << PG_uptodate) | (1 << PG_error)); + page->flags = flags | (1 << PG_locked); page_cache_get(page); page->index = offset; add_page_to_inode_queue(mapping, page); @@ -1744,7 +1808,7 @@ if (!error && (flags & MS_SYNC)) { struct file * file = vma->vm_file; if (file && file->f_op && file->f_op->fsync) - error = file->f_op->fsync(file, file->f_dentry); + error = file->f_op->fsync(file, file->f_dentry, 1); } return error; } @@ -2483,7 +2547,7 @@ if (count) { remove_suid(inode); inode->i_ctime = inode->i_mtime = CURRENT_TIME; - mark_inode_dirty(inode); + mark_inode_dirty_sync(inode); } while (count) { @@ -2540,7 +2604,13 @@ if (cached_page) page_cache_free(cached_page); + /* For now, when the user asks for O_SYNC, we'll actually + * provide O_DSYNC. */ + if ((status >= 0) && (file->f_flags & O_SYNC)) + status = generic_osync_inode(inode, 1); /* 1 means datasync */ + err = written ? written : status; + out: up(&inode->i_sem); return err; diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/mm/memory.c linux.ac/mm/memory.c --- linux.t2/mm/memory.c Sat Jun 24 13:36:12 2000 +++ linux.ac/mm/memory.c Mon Jun 19 19:54:42 2000 @@ -847,7 +847,7 @@ UnlockPage(old_page); break; } - delete_from_swap_cache_nolock(old_page); + SetPageDirty(old_page); UnlockPage(old_page); /* FallThrough */ case 1: @@ -1058,7 +1058,7 @@ */ lock_page(page); swap_free(entry); - if (write_access && !is_page_shared(page)) { + if (write_access && !is_page_shared(page) && nr_free_highpages()) { delete_from_swap_cache_nolock(page); UnlockPage(page); page = replace_with_highmem(page); diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/mm/mmap.c linux.ac/mm/mmap.c --- linux.t2/mm/mmap.c Sat Jun 24 13:50:04 2000 +++ linux.ac/mm/mmap.c Mon Jun 19 19:54:42 2000 @@ -166,6 +166,7 @@ { struct mm_struct * mm = current->mm; struct vm_area_struct * vma; + int correct_wcount = 0; int error; if (file && (!file->f_op || !file->f_op->mmap)) @@ -296,32 +297,19 @@ goto free_vma; if (file) { - int correct_wcount = 0; if (vma->vm_flags & VM_DENYWRITE) { - if (atomic_read(&file->f_dentry->d_inode->i_writecount) > 0) { - error = -ETXTBSY; + error = deny_write_access(file); + if (error) goto free_vma; - } - /* f_op->mmap might possibly sleep - * (generic_file_mmap doesn't, but other code - * might). In any case, this takes care of any - * race that this might cause. - */ - atomic_dec(&file->f_dentry->d_inode->i_writecount); correct_wcount = 1; } vma->vm_file = file; get_file(file); error = file->f_op->mmap(file, vma); - /* Fix up the count if necessary, then check for an error */ - if (correct_wcount) - atomic_inc(&file->f_dentry->d_inode->i_writecount); if (error) goto unmap_and_free_vma; } else if (flags & MAP_SHARED) { error = map_zero_setup(vma); - if (error) - goto free_vma; } /* @@ -332,6 +320,8 @@ addr = vma->vm_start; /* can addr have changed?? */ vmlist_modify_lock(mm); insert_vm_struct(mm, vma); + if (correct_wcount) + atomic_inc(&file->f_dentry->d_inode->i_writecount); merge_segments(mm, vma->vm_start, vma->vm_end); vmlist_modify_unlock(mm); @@ -343,6 +333,8 @@ return addr; unmap_and_free_vma: + if (correct_wcount) + atomic_inc(&file->f_dentry->d_inode->i_writecount); vma->vm_file = NULL; fput(file); /* Undo any partial mapping done by a device driver. */ @@ -694,9 +686,11 @@ * so release them, and unmap the page range.. * If the one of the segments is only being partially unmapped, * it will put new vm_area_struct(s) into the address space. + * In that case we have to be careful with VM_DENYWRITE. */ while ((mpnt = free) != NULL) { unsigned long st, end, size; + struct file *file = NULL; free = free->vm_next; @@ -708,6 +702,11 @@ if (mpnt->vm_ops && mpnt->vm_ops->unmap) mpnt->vm_ops->unmap(mpnt, st, size); + if (mpnt->vm_flags & VM_DENYWRITE && + (st != mpnt->vm_start || end != mpnt->vm_end) && + (file = mpnt->vm_file) != NULL) { + atomic_dec(&file->f_dentry->d_inode->i_writecount); + } remove_shared_vm_struct(mpnt); mm->map_count--; @@ -719,6 +718,8 @@ * Fix the mapping, and free the old area if it wasn't reused. */ extra = unmap_fixup(mm, mpnt, st, size, extra); + if (file) + atomic_inc(&file->f_dentry->d_inode->i_writecount); } /* Release the extra vma struct if it wasn't used */ diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/mm/page_alloc.c linux.ac/mm/page_alloc.c --- linux.t2/mm/page_alloc.c Sat Jun 24 13:36:12 2000 +++ linux.ac/mm/page_alloc.c Mon Jun 19 19:54:42 2000 @@ -29,7 +29,7 @@ pg_data_t *pgdat_list; static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" }; -static int zone_balance_ratio[MAX_NR_ZONES] = { 128, 128, 128, }; +static int zone_balance_ratio[MAX_NR_ZONES] = { 128, 128, 512, }; static int zone_balance_min[MAX_NR_ZONES] = { 10 , 10, 10, }; static int zone_balance_max[MAX_NR_ZONES] = { 255 , 255, 255, }; @@ -93,6 +93,8 @@ BUG(); if (PageDecrAfter(page)) BUG(); + if (PageDirty(page)) + BUG(); zone = page->zone; @@ -139,10 +141,13 @@ spin_unlock_irqrestore(&zone->lock, flags); - if (zone->free_pages > zone->pages_high) { - zone->zone_wake_kswapd = 0; + if (zone->free_pages >= zone->pages_low) { zone->low_on_memory = 0; } + + if (zone->free_pages >= zone->pages_high) { + zone->zone_wake_kswapd = 0; + } } #define MARK_USED(index, order, area) \ @@ -217,6 +222,9 @@ { zone_t **zone = zonelist->zones; extern wait_queue_head_t kswapd_wait; + static int last_woke_kswapd; + static int kswapd_pause = HZ; + int gfp_mask = zonelist->gfp_mask; /* * (If anyone calls gfp from interrupts nonatomically then it @@ -237,8 +245,6 @@ struct page *page = rmqueue(z, order); if (z->free_pages < z->pages_low) { z->zone_wake_kswapd = 1; - if (waitqueue_active(&kswapd_wait)) - wake_up_interruptible(&kswapd_wait); } if (page) return page; @@ -246,9 +252,27 @@ } /* + * Kswapd should be freeing enough memory to satisfy all allocations + * immediately. Calling try_to_free_pages from processes will slow + * down the system a lot. On the other hand, waking up kswapd too + * often means wasted memory and cpu time. + * + * We tune the kswapd pause interval in such a way that kswapd is + * always just agressive enough to free the amount of memory we + * want freed. + */ + if (waitqueue_active(&kswapd_wait) && + time_after(jiffies, last_woke_kswapd + kswapd_pause)) { + kswapd_pause++; + last_woke_kswapd = jiffies; + wake_up_interruptible(&kswapd_wait); + } + + /* * Ok, we don't have any zones that don't need some * balancing.. See if we have any that aren't critical.. */ +again: zone = zonelist->zones; for (;;) { zone_t *z = *(zone++); @@ -256,20 +280,33 @@ break; if (!z->low_on_memory) { struct page *page = rmqueue(z, order); - if (z->free_pages < z->pages_min) + if (z->free_pages < (z->pages_min + z->pages_low) / 2) z->low_on_memory = 1; if (page) return page; + } else { + if (kswapd_pause > 0) + kswapd_pause--; } } + /* We didn't kick kswapd often enough... */ + kswapd_pause /= 2; + if (waitqueue_active(&kswapd_wait)) + wake_up_interruptible(&kswapd_wait); + /* If we're low priority, we just wait a bit and try again later. */ + if ((gfp_mask & __GFP_WAIT) && current->need_resched && + current->state == TASK_RUNNING) { + schedule(); + goto again; + } + /* * Uhhuh. All the zones have been critical, which means that * we'd better do some synchronous swap-out. kswapd has not * been able to cope.. */ if (!(current->flags & PF_MEMALLOC)) { - int gfp_mask = zonelist->gfp_mask; if (!try_to_free_pages(gfp_mask)) { if (!(gfp_mask & __GFP_HIGH)) goto fail; @@ -277,7 +314,7 @@ } /* - * Final phase: allocate anything we can! + * We freed something, so we're allowed to allocate anything we can! */ zone = zonelist->zones; for (;;) { @@ -292,6 +329,18 @@ } fail: + /* Last try, zone->low_on_memory isn't reset until we hit pages_low */ + zone = zonelist->zones; + for (;;) { + zone_t *z = *(zone++); + if (!z) + break; + if (z->free_pages > z->pages_min) { + struct page *page = rmqueue(z, order); + if (page) + return page; + } + } /* No luck.. */ return NULL; } diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/mm/swap_state.c linux.ac/mm/swap_state.c --- linux.t2/mm/swap_state.c Sat Jun 24 13:36:12 2000 +++ linux.ac/mm/swap_state.c Mon Jun 19 19:55:05 2000 @@ -58,8 +58,8 @@ BUG(); if (page->mapping) BUG(); - flags = page->flags & ~((1 << PG_error) | (1 << PG_dirty)); - page->flags = flags | (1 << PG_referenced) | (1 << PG_uptodate); + flags = page->flags & ~(1 << PG_error); + page->flags = flags | (1 << PG_uptodate); add_to_page_cache_locked(page, &swapper_space, entry.val); } @@ -73,6 +73,7 @@ PAGE_BUG(page); PageClearSwapCache(page); + ClearPageDirty(page); remove_inode_page(page); } @@ -102,9 +103,10 @@ if (!PageLocked(page)) BUG(); - if (block_flushpage(page, 0)) - lru_cache_del(page); + if (page->buffers) + block_destroy_buffers(page); + lru_cache_del(page); __delete_from_swap_cache(page); page_cache_release(page); } diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/mm/swapfile.c linux.ac/mm/swapfile.c --- linux.t2/mm/swapfile.c Sat Jun 24 13:36:12 2000 +++ linux.ac/mm/swapfile.c Mon Jun 19 19:54:42 2000 @@ -407,11 +407,11 @@ if (!capable(CAP_SYS_ADMIN)) return -EPERM; - lock_kernel(); err = user_path_walk(specialfile, &nd); if (err) goto out; + lock_kernel(); prev = -1; swap_list_lock(); for (type = swap_list.head; type >= 0; type = swap_info[type].next) { @@ -478,9 +478,9 @@ err = 0; out_dput: + unlock_kernel(); path_release(&nd); out: - unlock_kernel(); return err; } @@ -555,7 +555,6 @@ unsigned long maxpages; int swapfilesize; struct block_device *bdev = NULL; - char *name; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -586,14 +585,7 @@ } else { p->prio = --least_priority; } - name = getname(specialfile); - error = PTR_ERR(name); - if (IS_ERR(name)) - goto bad_swap_2; - error = 0; - if (path_init(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd)) - error = path_walk(name, &nd); - putname(name); + error = user_path_walk(specialfile, &nd); if (error) goto bad_swap_2; diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.t2/mm/vmscan.c linux.ac/mm/vmscan.c --- linux.t2/mm/vmscan.c Sat Jun 24 13:36:12 2000 +++ linux.ac/mm/vmscan.c Mon Jun 19 19:55:05 2000 @@ -62,6 +62,10 @@ goto out_failed; } + /* Can only do this if we age all active pages. */ + if (PageActive(page) && page->age > 1) + goto out_failed; + if (TryLockPage(page)) goto out_failed; @@ -74,6 +78,8 @@ * memory, and we should just continue our scan. */ if (PageSwapCache(page)) { + if (pte_dirty(pte)) + SetPageDirty(page); entry.val = page->index; swap_duplicate(entry); set_pte(page_table, swp_entry_to_pte(entry)); @@ -181,7 +187,10 @@ vmlist_access_unlock(vma->vm_mm); /* OK, do a physical asynchronous write to swap. */ - rw_swap_page(WRITE, page, 0); + // rw_swap_page(WRITE, page, 0); + /* Let shrink_mmap handle this swapout. */ + SetPageDirty(page); + UnlockPage(page); out_free_success: page_cache_release(page); @@ -430,12 +439,13 @@ * latency. */ #define FREE_COUNT 8 -#define SWAP_COUNT 16 static int do_try_to_free_pages(unsigned int gfp_mask) { int priority; int count = FREE_COUNT; - int swap_count; + int swap_count = 0; + int made_progress = 0; + int ret = 0; /* Always trim SLAB caches when memory gets low. */ kmem_cache_reap(gfp_mask); @@ -443,6 +453,7 @@ priority = 64; do { while (shrink_mmap(priority, gfp_mask)) { + made_progress = 1; if (!--count) goto done; } @@ -457,9 +468,12 @@ */ count -= shrink_dcache_memory(priority, gfp_mask); count -= shrink_icache_memory(priority, gfp_mask); - if (count <= 0) + if (count <= 0) { + made_progress = 1; goto done; + } while (shm_swap(priority, gfp_mask)) { + made_progress = 1; if (!--count) goto done; } @@ -471,24 +485,44 @@ * This will not actually free any pages (they get * put in the swap cache), so we must not count this * as a "count" success. + * + * The amount we page out is the amount of pages we're + * short freeing, amplified by the number of times we + * failed above. This generates a negative feedback loop: + * the more difficult it was to free pages, the easier we + * will make it. */ - swap_count = SWAP_COUNT; - while (swap_out(priority, gfp_mask)) + swap_count += count; + while (swap_out(priority, gfp_mask)) { + made_progress = 1; if (--swap_count < 0) break; + } - } while (--priority >= 0); + /* + * If we made progress at the current priority, the next + * loop will also be done at this priority level. There's + * absolutely no reason to drop to a lower priority and + * potentially upset the balance between shrink_mmap and + * swap_out. + */ + if (made_progress) { + made_progress = 0; + ret = 1; + } else { + priority--; + } + } while (priority >= 0); /* Always end on a shrink_mmap.. */ while (shrink_mmap(0, gfp_mask)) { + ret = 1; if (!--count) goto done; } - /* We return 1 if we are freed some page */ - return (count != FREE_COUNT); done: - return 1; + return ret; } DECLARE_WAIT_QUEUE_HEAD(kswapd_wait);