http://oss.sgi.com:8090/xfs-linux-2.6 nathans@sgi.com|ChangeSet|20050113002518|24388 nathans # This is a BitKeeper generated diff -Nru style patch. # # ChangeSet # 2005/01/13 17:11:43-08:00 akpm@bix.(none) # Merge http://oss.sgi.com:8090/xfs-linux-2.6 # into bix.(none):/usr/src/bk-xfs # # include/linux/fs.h # 2005/01/13 17:11:39-08:00 akpm@bix.(none) +0 -0 # Auto merged # # fs/Kconfig # 2005/01/13 17:11:39-08:00 akpm@bix.(none) +0 -0 # Auto merged # # ChangeSet # 2005/01/13 11:25:18+11:00 nathans@sgi.com # [XFS] Ensure the cluster hash size does not exceed the inode hash size. # # SGI-PV: 923092 # SGI-Modid: xfs-linux-melb:xfs-kern:21109a # Signed-off-by: Nathan Scott # # fs/xfs/xfs_iget.c # 2005/01/13 11:24:49+11:00 nathans@sgi.com +3 -2 # [XFS] Ensure the cluster hash size does not exceed the inode hash size. # # ChangeSet # 2005/01/13 11:24:01+11:00 nathans@sgi.com # [XFS] Remove write congestion check during metadata readahead. # # SGI-PV: 928392 # SGI-Modid: xfs-linux-melb:xfs-kern:21108a # Signed-off-by: Nathan Scott # # fs/xfs/linux-2.6/xfs_buf.c # 2005/01/13 11:23:32+11:00 nathans@sgi.com +0 -2 # [XFS] Remove write congestion check during metadata readahead. # # ChangeSet # 2005/01/13 11:16:34+11:00 nathans@sgi.com # [XFS] Fix page index to byte calculation result truncation in tracing # code. # # SGI-PV: 928388 # SGI-Modid: xfs-linux-melb:xfs-kern:21106a # Signed-off-by: Nathan Scott # # fs/xfs/linux-2.6/xfs_aops.c # 2005/01/13 11:16:05+11:00 nathans@sgi.com +1 -1 # [XFS] Fix page index to byte calculation result truncation in tracing # code. # # ChangeSet # 2005/01/13 11:13:28+11:00 hch@sgi.com # [XFS] Make AIO work again - wait on iocb completion for non-AIO only. # # SGI-PV: 927929 # SGI-Modid: xfs-linux:xfs-kern:185642a # Signed-off-by: Christoph Hellwig # Signed-off-by: Nathan Scott # # fs/xfs/linux-2.6/xfs_vnode.h # 2005/01/13 11:12:59+11:00 hch@sgi.com +1 -0 # [XFS] Make AIO work again - wait on iocb completion for non-AIO only. # # fs/xfs/linux-2.6/xfs_lrw.c # 2005/01/13 11:12:59+11:00 hch@sgi.com +2 -2 # [XFS] Make AIO work again - wait on iocb completion for non-AIO only. # # fs/xfs/linux-2.6/xfs_file.c # 2005/01/13 11:12:59+11:00 hch@sgi.com +12 -12 # [XFS] Make AIO work again - wait on iocb completion for non-AIO only. # # ChangeSet # 2005/01/13 11:06:06+11:00 nathans@sgi.com # Merge sgi.com:/source2/linux-2.6 into sgi.com:/source2/xfs-linux-2.6 # # include/linux/fs.h # 2005/01/13 11:05:49+11:00 nathans@sgi.com +0 -0 # Auto merged # # fs/Kconfig # 2005/01/13 11:05:48+11:00 nathans@sgi.com +0 -0 # Auto merged # # ChangeSet # 2005/01/11 22:23:39-08:00 akpm@bix.(none) # Merge bix.(none):/usr/src/bk25 into bix.(none):/usr/src/bk-xfs # # include/linux/fs.h # 2005/01/11 22:23:34-08:00 akpm@bix.(none) +0 -0 # Auto merged # # ChangeSet # 2005/01/12 12:23:14+11:00 nathans@sgi.com # [XFS] Switch to managing uptodate state on a region within a page, rather # than a sector within a page. Fixes 64K pagesize kernels with 512 byte # sectors. # # SGI-PV: 926724 # SGI-Modid: xfs-linux:xfs-kern:20990a # Signed-off-by: Nathan Scott # # fs/xfs/linux-2.6/xfs_buf.c # 2005/01/12 12:22:45+11:00 nathans@sgi.com +101 -43 # [XFS] Switch to managing uptodate state on a region within a page, rather # than a sector within a page. Fixes 64K pagesize kernels with 512 byte # sectors. # # ChangeSet # 2005/01/11 16:01:59-08:00 akpm@bix.(none) # Merge http://oss.sgi.com:8090/xfs-linux-2.6 # into bix.(none):/usr/src/bk-xfs # # include/linux/fs.h # 2005/01/11 16:01:55-08:00 akpm@bix.(none) +0 -0 # Auto merged # # fs/Kconfig # 2005/01/11 16:01:55-08:00 akpm@bix.(none) +0 -0 # Auto merged # # ChangeSet # 2005/01/12 09:23:38+11:00 hch@sgi.com # Move extern find_exported_dentry declaration into a common header. # # Signed-off-by: Christoph Hellwig # Signed-off-by: Nathan Scott # # include/linux/fs.h # 2005/01/12 09:23:08+11:00 hch@sgi.com +4 -0 # Move extern find_exported_dentry declaration into a common header. # # fs/nfsd/export.c # 2005/01/12 09:23:08+11:00 hch@sgi.com +0 -5 # Move extern find_exported_dentry declaration into a common header. # # ChangeSet # 2005/01/12 08:37:53+11:00 hch@sgi.com # [XFS] Update Makefile for separate export source file. # # Signed-off-by: Christoph Hellwig # Signed-off-by: Nathan Scott # # fs/xfs/Makefile # 2005/01/12 08:37:23+11:00 hch@sgi.com +1 -0 # [XFS] Update Makefile for separate export source file. # # ChangeSet # 2005/01/11 16:28:46+11:00 nathans@sgi.com # [XFS] Switch to using a separate Kconfig file for XFS. # # Signed-off-by: Russell Cattelan # Signed-off-by: Nathan Scott # # fs/Kconfig # 2005/01/11 16:28:16+11:00 nathans@sgi.com +1 -76 # [XFS] Switch to using a separate Kconfig file for XFS. # # ChangeSet # 2005/01/11 16:23:25+11:00 hch@sgi.com # [XFS] Fix compilations for parisc # # SGI-PV: 928101 # SGI-Modid: xfs-linux:xfs-kern:185439a # Signed-off-by: Christoph Hellwig # Signed-off-by: Nathan Scott # # fs/xfs/linux-2.6/xfs_buf.c # 2005/01/11 16:22:56+11:00 hch@sgi.com +2 -0 # [XFS] Fix compilations for parisc # # ChangeSet # 2005/01/11 16:22:04+11:00 hch@sgi.com # [XFS] Move support code for NFS exporting to a conditionally built file # # SGI-PV: 923968 # SGI-Modid: xfs-linux:xfs-kern:185437a # Signed-off-by: Christoph Hellwig # Signed-off-by: Nathan Scott # # fs/xfs/linux-2.6/xfs_export.c # 2005/01/11 16:21:33+11:00 hch@sgi.com +130 -0 # # fs/xfs/linux-2.6/xfs_export.c # 2005/01/11 16:21:33+11:00 hch@sgi.com +0 -0 # BitKeeper file /source2/xfs-linux-2.6/fs/xfs/linux-2.6/xfs_export.c # # fs/xfs/linux-2.6/xfs_super.h # 2005/01/11 16:21:32+11:00 hch@sgi.com +2 -0 # [XFS] Move support code for NFS exporting to a conditionally built file # # fs/xfs/linux-2.6/xfs_super.c # 2005/01/11 16:21:32+11:00 hch@sgi.com +2 -98 # [XFS] Move support code for NFS exporting to a conditionally built file # # ChangeSet # 2005/01/11 16:17:04+11:00 hch@sgi.com # [XFS] Use generic_readlink # # SGI-PV: 927939 # SGI-Modid: xfs-linux:xfs-kern:185295a # Signed-off-by: Christoph Hellwig # Signed-off-by: Nathan Scott # # fs/xfs/linux-2.6/xfs_iops.c # 2005/01/11 16:16:35+11:00 hch@sgi.com +1 -28 # [XFS] Use generic_readlink # # ChangeSet # 2005/01/11 16:13:36+11:00 cattelan@sgi.com # [XFS] Move xfs configs to xfs directory, different flavors of xfs have # different configs, this way fs/Kconfig does not have to changed if # different xfs's are swapped in and out # # SGI-PV: 926404 # SGI-Modid: xfs-linux:xfs-kern:185134a # Signed-off-by: Russell Cattelan # Signed-off-by: Nathan Scott # # fs/xfs/Kconfig # 2005/01/11 16:13:05+11:00 cattelan@sgi.com +80 -0 # # fs/xfs/Kconfig # 2005/01/11 16:13:04+11:00 cattelan@sgi.com +0 -0 # BitKeeper file /source2/xfs-linux-2.6/fs/xfs/Kconfig # # ChangeSet # 2005/01/11 16:06:14+11:00 hch@sgi.com # [XFS] Fix NFS inode data corruption # # SGI-PV: 923968 # SGI-Modid: xfs-linux:xfs-kern:185126a # Signed-off-by: Christoph Hellwig # Signed-off-by: Nathan Scott # # fs/xfs/xfs_vfsops.c # 2005/01/11 16:05:44+11:00 hch@sgi.com +2 -2 # [XFS] Fix NFS inode data corruption # # fs/xfs/linux-2.6/xfs_super.c # 2005/01/11 16:05:44+11:00 hch@sgi.com +34 -0 # [XFS] Fix NFS inode data corruption # # ChangeSet # 2005/01/11 15:17:24+11:00 nathans@sgi.com # [XFS] Add sanity checks before use of attr_multi opcount parameter. # # SGI-PV: 927535 # SGI-Modid: xfs-linux:xfs-kern:20991a # Signed-off-by: Nathan Scott # # fs/xfs/linux-2.6/xfs_ioctl.c # 2005/01/11 15:16:56+11:00 nathans@sgi.com +6 -1 # [XFS] Add sanity checks before use of attr_multi opcount parameter. # # ChangeSet # 2005/01/11 15:16:11+11:00 nathans@sgi.com # [XFS] Move to per-device hash tables (scalability), and use Bill Irwins # hash (quicker). # # SGI-PV: 927536 # SGI-Modid: xfs-linux:xfs-kern:20989a # Signed-off-by: Nathan Scott # # fs/xfs/xfs_vfsops.c # 2005/01/11 15:15:44+11:00 nathans@sgi.com +3 -3 # [XFS] Move to per-device hash tables (scalability), and use Bill Irwins # hash (quicker). # # fs/xfs/linux-2.6/xfs_buf.h # 2005/01/11 15:15:44+11:00 nathans@sgi.com +29 -21 # [XFS] Move to per-device hash tables (scalability), and use Bill Irwins # hash (quicker). # # fs/xfs/linux-2.6/xfs_buf.c # 2005/01/11 15:15:44+11:00 nathans@sgi.com +98 -82 # [XFS] Move to per-device hash tables (scalability), and use Bill Irwins # hash (quicker). # # ChangeSet # 2005/01/11 15:14:17+11:00 nathans@sgi.com # [XFS] Prevent attempts to mount 512 byte sector filesystems with 64KB # pagesizes, until fixed. # # SGI-PV: 926724 # SGI-Modid: xfs-linux:xfs-kern:20780a # Signed-off-by: Nathan Scott # # fs/xfs/linux-2.6/xfs_buf.c # 2005/01/11 15:13:48+11:00 nathans@sgi.com +12 -0 # [XFS] Prevent attempts to mount 512 byte sector filesystems with 64KB # pagesizes, until fixed. # # ChangeSet # 2005/01/11 15:12:26+11:00 hch@sgi.com # [XFS] make sure to always reclaim inodes in xfs_finish_reclaim # # SGI-PV: 921072 # SGI-Modid: xfs-linux:xfs-kern:184505a # Signed-off-by: Christoph Hellwig # Signed-off-by: Nathan Scott # # fs/xfs/xfs_vnodeops.c # 2005/01/11 15:11:56+11:00 hch@sgi.com +3 -3 # [XFS] make sure to always reclaim inodes in xfs_finish_reclaim # # ChangeSet # 2005/01/11 15:10:37+11:00 nathans@sgi.com # [XFS] Fix a performance and scaling problem in xfs_iget_core. Improved # the inode hash table sizing heuristics, and allow these to be manually # tweaked as well. # # SGI-PV: 923092 # SGI-Modid: xfs-linux:xfs-kern:20766a # Signed-off-by: Nathan Scott # # fs/xfs/xfs_vfsops.c # 2005/01/11 15:10:07+11:00 nathans@sgi.com +9 -0 # [XFS] Fix a performance and scaling problem in xfs_iget_core. Improved # the inode hash table sizing heuristics, and allow these to be manually # tweaked as well. # # fs/xfs/xfs_mount.h # 2005/01/11 15:10:07+11:00 nathans@sgi.com +2 -2 # [XFS] Fix a performance and scaling problem in xfs_iget_core. Improved # the inode hash table sizing heuristics, and allow these to be manually # tweaked as well. # # fs/xfs/xfs_inode.h # 2005/01/11 15:10:07+11:00 nathans@sgi.com +2 -11 # [XFS] Fix a performance and scaling problem in xfs_iget_core. Improved # the inode hash table sizing heuristics, and allow these to be manually # tweaked as well. # # fs/xfs/xfs_iget.c # 2005/01/11 15:10:07+11:00 nathans@sgi.com +22 -23 # [XFS] Fix a performance and scaling problem in xfs_iget_core. Improved # the inode hash table sizing heuristics, and allow these to be manually # tweaked as well. # # fs/xfs/xfs_clnt.h # 2005/01/11 15:10:07+11:00 nathans@sgi.com +1 -0 # [XFS] Fix a performance and scaling problem in xfs_iget_core. Improved # the inode hash table sizing heuristics, and allow these to be manually # tweaked as well. # diff -Nru a/fs/Kconfig b/fs/Kconfig --- a/fs/Kconfig 2005-01-13 17:12:38 -08:00 +++ b/fs/Kconfig 2005-01-13 17:12:38 -08:00 @@ -304,82 +304,7 @@ depends on EXT2_FS_POSIX_ACL || EXT3_FS_POSIX_ACL || JFS_POSIX_ACL || REISERFS_FS_POSIX_ACL || NFSD_V4 default y -config XFS_FS - tristate "XFS filesystem support" - help - XFS is a high performance journaling filesystem which originated - on the SGI IRIX platform. It is completely multi-threaded, can - support large files and large filesystems, extended attributes, - variable block sizes, is extent based, and makes extensive use of - Btrees (directories, extents, free space) to aid both performance - and scalability. - - Refer to the documentation at - for complete details. This implementation is on-disk compatible - with the IRIX version of XFS. - - To compile this file system support as a module, choose M here: the - module will be called xfs. Be aware, however, that if the file - system of your root partition is compiled as a module, you'll need - to use an initial ramdisk (initrd) to boot. - -config XFS_RT - bool "Realtime support (EXPERIMENTAL)" - depends on XFS_FS && EXPERIMENTAL - help - If you say Y here you will be able to mount and use XFS filesystems - which contain a realtime subvolume. The realtime subvolume is a - separate area of disk space where only file data is stored. The - realtime subvolume is designed to provide very deterministic - data rates suitable for media streaming applications. - - See the xfs man page in section 5 for a bit more information. - - This feature is unsupported at this time, is not yet fully - functional, and may cause serious problems. - - If unsure, say N. - -config XFS_QUOTA - bool "Quota support" - depends on XFS_FS - help - If you say Y here, you will be able to set limits for disk usage on - a per user and/or a per group basis under XFS. XFS considers quota - information as filesystem metadata and uses journaling to provide a - higher level guarantee of consistency. The on-disk data format for - quota is also compatible with the IRIX version of XFS, allowing a - filesystem to be migrated between Linux and IRIX without any need - for conversion. - - If unsure, say N. More comprehensive documentation can be found in - README.quota in the xfsprogs package. XFS quota can be used either - with or without the generic quota support enabled (CONFIG_QUOTA) - - they are completely independent subsystems. - -config XFS_SECURITY - bool "Security Label support" - depends on XFS_FS - help - Security labels support alternative access control models - implemented by security modules like SELinux. This option - enables an extended attribute namespace for inode security - labels in the XFS filesystem. - - If you are not using a security module that requires using - extended attributes for inode security labels, say N. - -config XFS_POSIX_ACL - bool "POSIX ACL support" - depends on XFS_FS - help - POSIX Access Control Lists (ACLs) support permissions for users and - groups beyond the owner/group/world scheme. - - To learn more about Access Control Lists, visit the POSIX ACLs for - Linux website . - - If you don't know what Access Control Lists are, say N. +source "fs/xfs/Kconfig" config MINIX_FS tristate "Minix fs support" diff -Nru a/fs/nfsd/export.c b/fs/nfsd/export.c --- a/fs/nfsd/export.c 2005-01-13 17:12:38 -08:00 +++ b/fs/nfsd/export.c 2005-01-13 17:12:38 -08:00 @@ -305,11 +305,6 @@ static struct svc_export *svc_export_lookup(struct svc_export *, int); -extern struct dentry * -find_exported_dentry(struct super_block *sb, void *obj, void *parent, - int (*acceptable)(void *context, struct dentry *de), - void *context); - static int check_export(struct inode *inode, int flags) { diff -Nru a/fs/xfs/Kconfig b/fs/xfs/Kconfig --- /dev/null Wed Dec 31 16:00:00 196900 +++ b/fs/xfs/Kconfig 2005-01-13 17:12:38 -08:00 @@ -0,0 +1,80 @@ +menu "XFS support" + +config XFS_FS + tristate "XFS filesystem support" + help + XFS is a high performance journaling filesystem which originated + on the SGI IRIX platform. It is completely multi-threaded, can + support large files and large filesystems, extended attributes, + variable block sizes, is extent based, and makes extensive use of + Btrees (directories, extents, free space) to aid both performance + and scalability. + + Refer to the documentation at + for complete details. This implementation is on-disk compatible + with the IRIX version of XFS. + + To compile this file system support as a module, choose M here: the + module will be called xfs. Be aware, however, that if the file + system of your root partition is compiled as a module, you'll need + to use an initial ramdisk (initrd) to boot. + +config XFS_RT + bool "Realtime support (EXPERIMENTAL)" + depends on XFS_FS && EXPERIMENTAL + help + If you say Y here you will be able to mount and use XFS filesystems + which contain a realtime subvolume. The realtime subvolume is a + separate area of disk space where only file data is stored. The + realtime subvolume is designed to provide very deterministic + data rates suitable for media streaming applications. + + See the xfs man page in section 5 for a bit more information. + + This feature is unsupported at this time, is not yet fully + functional, and may cause serious problems. + + If unsure, say N. + +config XFS_QUOTA + bool "Quota support" + depends on XFS_FS + help + If you say Y here, you will be able to set limits for disk usage on + a per user and/or a per group basis under XFS. XFS considers quota + information as filesystem metadata and uses journaling to provide a + higher level guarantee of consistency. The on-disk data format for + quota is also compatible with the IRIX version of XFS, allowing a + filesystem to be migrated between Linux and IRIX without any need + for conversion. + + If unsure, say N. More comprehensive documentation can be found in + README.quota in the xfsprogs package. XFS quota can be used either + with or without the generic quota support enabled (CONFIG_QUOTA) - + they are completely independent subsystems. + +config XFS_SECURITY + bool "Security Label support" + depends on XFS_FS + help + Security labels support alternative access control models + implemented by security modules like SELinux. This option + enables an extended attribute namespace for inode security + labels in the XFS filesystem. + + If you are not using a security module that requires using + extended attributes for inode security labels, say N. + +config XFS_POSIX_ACL + bool "POSIX ACL support" + depends on XFS_FS + help + POSIX Access Control Lists (ACLs) support permissions for users and + groups beyond the owner/group/world scheme. + + To learn more about Access Control Lists, visit the POSIX ACLs for + Linux website . + + If you don't know what Access Control Lists are, say N. + +endmenu diff -Nru a/fs/xfs/Makefile b/fs/xfs/Makefile --- a/fs/xfs/Makefile 2005-01-13 17:12:38 -08:00 +++ b/fs/xfs/Makefile 2005-01-13 17:12:38 -08:00 @@ -70,6 +70,7 @@ xfs-$(CONFIG_PROC_FS) += linux-2.6/xfs_stats.o xfs-$(CONFIG_SYSCTL) += linux-2.6/xfs_sysctl.o xfs-$(CONFIG_COMPAT) += linux-2.6/xfs_ioctl32.o +xfs-$(CONFIG_EXPORTFS) += linux-2.6/xfs_export.o xfs-y += xfs_alloc.o \ diff -Nru a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c --- a/fs/xfs/linux-2.6/xfs_aops.c 2005-01-13 17:12:38 -08:00 +++ b/fs/xfs/linux-2.6/xfs_aops.c 2005-01-13 17:12:38 -08:00 @@ -71,7 +71,7 @@ bhv_desc_t *bdp; vnode_t *vp = LINVFS_GET_VP(inode); loff_t isize = i_size_read(inode); - loff_t offset = page->index << PAGE_CACHE_SHIFT; + loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT; int delalloc = -1, unmapped = -1, unwritten = -1; if (page_has_buffers(page)) diff -Nru a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c --- a/fs/xfs/linux-2.6/xfs_buf.c 2005-01-13 17:12:38 -08:00 +++ b/fs/xfs/linux-2.6/xfs_buf.c 2005-01-13 17:12:38 -08:00 @@ -54,6 +54,7 @@ #include #include #include +#include #include "xfs_linux.h" @@ -128,34 +129,71 @@ kmem_zone_free(pagebuf_cache, (pb)); /* - * Pagebuf hashing - */ + * Page Region interfaces. + * + * For pages in filesystems where the blocksize is smaller than the + * pagesize, we use the page->private field (long) to hold a bitmap + * of uptodate regions within the page. + * + * Each such region is "bytes per page / bits per long" bytes long. + * + * NBPPR == number-of-bytes-per-page-region + * BTOPR == bytes-to-page-region (rounded up) + * BTOPRT == bytes-to-page-region-truncated (rounded down) + */ +#if (BITS_PER_LONG == 32) +#define PRSHIFT (PAGE_CACHE_SHIFT - 5) /* (32 == 1<<5) */ +#elif (BITS_PER_LONG == 64) +#define PRSHIFT (PAGE_CACHE_SHIFT - 6) /* (64 == 1<<6) */ +#else +#error BITS_PER_LONG must be 32 or 64 +#endif +#define NBPPR (PAGE_CACHE_SIZE/BITS_PER_LONG) +#define BTOPR(b) (((unsigned int)(b) + (NBPPR - 1)) >> PRSHIFT) +#define BTOPRT(b) (((unsigned int)(b) >> PRSHIFT)) -#define NBITS 8 -#define NHASH (1<pb_hash_index] + mask = ~0UL; + mask <<= BITS_PER_LONG - (final - first); + mask >>= BITS_PER_LONG - (final); -STATIC int -_bhash( - struct block_device *bdev, - loff_t base) -{ - int bit, hval; - - base >>= 9; - base ^= (unsigned long)bdev / L1_CACHE_BYTES; - for (bit = hval = 0; base && bit < sizeof(base) * 8; bit += NBITS) { - hval ^= (int)base & (NHASH-1); - base >>= NBITS; - } - return hval; + ASSERT(offset + length <= PAGE_CACHE_SIZE); + ASSERT((final - first) < BITS_PER_LONG && (final - first) >= 0); + + return mask; +} + +STATIC inline void +set_page_region( + struct page *page, + size_t offset, + size_t length) +{ + page->private |= page_region_mask(offset, length); + if (page->private == ~0UL) + SetPageUptodate(page); +} + +STATIC inline int +test_page_region( + struct page *page, + size_t offset, + size_t length) +{ + unsigned long mask = page_region_mask(offset, length); + + return (mask && (page->private & mask) == mask); } /* @@ -341,7 +379,6 @@ uint flags) { struct address_space *mapping = bp->pb_target->pbr_mapping; - unsigned int sectorshift = bp->pb_target->pbr_sshift; size_t blocksize = bp->pb_target->pbr_bsize; size_t size = bp->pb_count_desired; size_t nbytes, offset; @@ -401,22 +438,11 @@ if (!PageUptodate(page)) { page_count--; - if (blocksize == PAGE_CACHE_SIZE) { + if (blocksize >= PAGE_CACHE_SIZE) { if (flags & PBF_READ) bp->pb_locked = 1; } else if (!PagePrivate(page)) { - unsigned long j, range; - - /* - * In this case page->private holds a bitmap - * of uptodate sectors within the page - */ - ASSERT(blocksize < PAGE_CACHE_SIZE); - range = (offset + nbytes) >> sectorshift; - for (j = offset >> sectorshift; j < range; j++) - if (!test_bit(j, &page->private)) - break; - if (j == range) + if (test_page_region(page, offset, nbytes)) page_count++; } } @@ -484,8 +510,8 @@ * are unlocked. No I/O is implied by this call. */ xfs_buf_t * -_pagebuf_find( /* find buffer for block */ - xfs_buftarg_t *target,/* target for block */ +_pagebuf_find( + xfs_buftarg_t *btp, /* block device target */ loff_t ioff, /* starting offset of range */ size_t isize, /* length of range */ page_buf_flags_t flags, /* PBF_TRYLOCK */ @@ -493,59 +519,55 @@ { loff_t range_base; size_t range_length; - int hval; - pb_hash_t *h; + xfs_bufhash_t *hash; xfs_buf_t *pb, *n; - int not_locked; range_base = (ioff << BBSHIFT); range_length = (isize << BBSHIFT); - /* Ensure we never do IOs smaller than the sector size */ - BUG_ON(range_length < (1 << target->pbr_sshift)); + /* Check for IOs smaller than the sector size / not sector aligned */ + ASSERT(!(range_length < (1 << btp->pbr_sshift))); + ASSERT(!(range_base & (loff_t)btp->pbr_smask)); - /* Ensure we never do IOs that are not sector aligned */ - BUG_ON(range_base & (loff_t)target->pbr_smask); + hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)]; - hval = _bhash(target->pbr_bdev, range_base); - h = &pbhash[hval]; + spin_lock(&hash->bh_lock); - spin_lock(&h->pb_hash_lock); - list_for_each_entry_safe(pb, n, &h->pb_hash, pb_hash_list) { - if (pb->pb_target == target && - pb->pb_file_offset == range_base && + list_for_each_entry_safe(pb, n, &hash->bh_list, pb_hash_list) { + ASSERT(btp == pb->pb_target); + if (pb->pb_file_offset == range_base && pb->pb_buffer_length == range_length) { - /* If we look at something bring it to the - * front of the list for next time + /* + * If we look at something bring it to the + * front of the list for next time. */ atomic_inc(&pb->pb_hold); - list_move(&pb->pb_hash_list, &h->pb_hash); + list_move(&pb->pb_hash_list, &hash->bh_list); goto found; } } /* No match found */ if (new_pb) { - _pagebuf_initialize(new_pb, target, range_base, + _pagebuf_initialize(new_pb, btp, range_base, range_length, flags); - new_pb->pb_hash_index = hval; - list_add(&new_pb->pb_hash_list, &h->pb_hash); + new_pb->pb_hash = hash; + list_add(&new_pb->pb_hash_list, &hash->bh_list); } else { XFS_STATS_INC(pb_miss_locked); } - spin_unlock(&h->pb_hash_lock); - return (new_pb); + spin_unlock(&hash->bh_lock); + return new_pb; found: - spin_unlock(&h->pb_hash_lock); + spin_unlock(&hash->bh_lock); /* Attempt to get the semaphore without sleeping, * if this does not work then we need to drop the * spinlock and do a hard attempt on the semaphore. */ - not_locked = down_trylock(&pb->pb_sema); - if (not_locked) { + if (down_trylock(&pb->pb_sema)) { if (!(flags & PBF_TRYLOCK)) { /* wait for buffer ownership */ PB_TRACE(pb, "get_lock", 0); @@ -712,8 +734,6 @@ bdi = target->pbr_mapping->backing_dev_info; if (bdi_read_congested(bdi)) return; - if (bdi_write_congested(bdi)) - return; flags |= (PBF_TRYLOCK|PBF_ASYNC|PBF_READ_AHEAD); xfs_buf_read_flags(target, ioff, isize, flags); @@ -867,18 +887,29 @@ pagebuf_rele( xfs_buf_t *pb) { - pb_hash_t *hash = pb_hash(pb); + xfs_bufhash_t *hash = pb->pb_hash; PB_TRACE(pb, "rele", pb->pb_relse); - if (atomic_dec_and_lock(&pb->pb_hold, &hash->pb_hash_lock)) { + /* + * pagebuf_lookup buffers are not hashed, not delayed write, + * and don't have their own release routines. Special case. + */ + if (unlikely(!hash)) { + ASSERT(!pb->pb_relse); + if (atomic_dec_and_test(&pb->pb_hold)) + xfs_buf_free(pb); + return; + } + + if (atomic_dec_and_lock(&pb->pb_hold, &hash->bh_lock)) { int do_free = 1; if (pb->pb_relse) { atomic_inc(&pb->pb_hold); - spin_unlock(&hash->pb_hash_lock); + spin_unlock(&hash->bh_lock); (*(pb->pb_relse)) (pb); - spin_lock(&hash->pb_hash_lock); + spin_lock(&hash->bh_lock); do_free = 0; } @@ -893,10 +924,10 @@ if (do_free) { list_del_init(&pb->pb_hash_list); - spin_unlock(&hash->pb_hash_lock); + spin_unlock(&hash->bh_lock); pagebuf_free(pb); } else { - spin_unlock(&hash->pb_hash_lock); + spin_unlock(&hash->bh_lock); } } } @@ -936,6 +967,7 @@ return(locked ? 0 : -EBUSY); } +#ifdef DEBUG /* * pagebuf_lock_value * @@ -947,6 +979,7 @@ { return(atomic_read(&pb->pb_sema.count)); } +#endif /* * pagebuf_lock @@ -1217,7 +1250,6 @@ { xfs_buf_t *pb = (xfs_buf_t *)bio->bi_private; unsigned int i, blocksize = pb->pb_target->pbr_bsize; - unsigned int sectorshift = pb->pb_target->pbr_sshift; struct bio_vec *bvec = bio->bi_io_vec; if (bio->bi_size) @@ -1235,14 +1267,7 @@ SetPageUptodate(page); } else if (!PagePrivate(page) && (pb->pb_flags & _PBF_PAGE_CACHE)) { - unsigned long j, range; - - ASSERT(blocksize < PAGE_CACHE_SIZE); - range = (bvec->bv_offset + bvec->bv_len) >> sectorshift; - for (j = bvec->bv_offset >> sectorshift; j < range; j++) - set_bit(j, &page->private); - if (page->private == (unsigned long)(PAGE_CACHE_SIZE-1)) - SetPageUptodate(page); + set_page_region(page, bvec->bv_offset, bvec->bv_len); } if (_pagebuf_iolocked(pb)) { @@ -1471,28 +1496,59 @@ */ void xfs_wait_buftarg( - xfs_buftarg_t *target) + xfs_buftarg_t *btp) { - xfs_buf_t *pb, *n; - pb_hash_t *h; - int i; + xfs_buf_t *bp, *n; + xfs_bufhash_t *hash; + uint i; - for (i = 0; i < NHASH; i++) { - h = &pbhash[i]; + for (i = 0; i < (1 << btp->bt_hashshift); i++) { + hash = &btp->bt_hash[i]; again: - spin_lock(&h->pb_hash_lock); - list_for_each_entry_safe(pb, n, &h->pb_hash, pb_hash_list) { - if (pb->pb_target == target && - !(pb->pb_flags & PBF_FS_MANAGED)) { - spin_unlock(&h->pb_hash_lock); + spin_lock(&hash->bh_lock); + list_for_each_entry_safe(bp, n, &hash->bh_list, pb_hash_list) { + ASSERT(btp == bp->pb_target); + if (!(bp->pb_flags & PBF_FS_MANAGED)) { + spin_unlock(&hash->bh_lock); delay(100); goto again; } } - spin_unlock(&h->pb_hash_lock); + spin_unlock(&hash->bh_lock); } } +/* + * Allocate buffer hash table for a given target. + * For devices containing metadata (i.e. not the log/realtime devices) + * we need to allocate a much larger hash table. + */ +STATIC void +xfs_alloc_bufhash( + xfs_buftarg_t *btp, + int external) +{ + unsigned int i; + + btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */ + btp->bt_hashmask = (1 << btp->bt_hashshift) - 1; + btp->bt_hash = kmem_zalloc((1 << btp->bt_hashshift) * + sizeof(xfs_bufhash_t), KM_SLEEP); + for (i = 0; i < (1 << btp->bt_hashshift); i++) { + spin_lock_init(&btp->bt_hash[i].bh_lock); + INIT_LIST_HEAD(&btp->bt_hash[i].bh_list); + } +} + +STATIC void +xfs_free_bufhash( + xfs_buftarg_t *btp) +{ + kmem_free(btp->bt_hash, + (1 << btp->bt_hashshift) * sizeof(xfs_bufhash_t)); + btp->bt_hash = NULL; +} + void xfs_free_buftarg( xfs_buftarg_t *btp, @@ -1501,6 +1557,7 @@ xfs_flush_buftarg(btp, 1); if (external) xfs_blkdev_put(btp->pbr_bdev); + xfs_free_bufhash(btp); iput(btp->pbr_mapping->host); kmem_free(btp, sizeof(*btp)); } @@ -1515,11 +1572,12 @@ truncate_inode_pages(btp->pbr_mapping, 0LL); } -int -xfs_setsize_buftarg( +STATIC int +xfs_setsize_buftarg_flags( xfs_buftarg_t *btp, unsigned int blocksize, - unsigned int sectorsize) + unsigned int sectorsize, + int verbose) { btp->pbr_bsize = blocksize; btp->pbr_sshift = ffs(sectorsize) - 1; @@ -1531,9 +1589,42 @@ sectorsize, XFS_BUFTARG_NAME(btp)); return EINVAL; } + + if (verbose && + (PAGE_CACHE_SIZE / BITS_PER_LONG) > sectorsize) { + printk(KERN_WARNING + "XFS: %u byte sectors in use on device %s. " + "This is suboptimal; %u or greater is ideal.\n", + sectorsize, XFS_BUFTARG_NAME(btp), + (unsigned int)PAGE_CACHE_SIZE / BITS_PER_LONG); + } + return 0; } +/* +* When allocating the initial buffer target we have not yet +* read in the superblock, so don't know what sized sectors +* are being used is at this early stage. Play safe. +*/ +STATIC int +xfs_setsize_buftarg_early( + xfs_buftarg_t *btp, + struct block_device *bdev) +{ + return xfs_setsize_buftarg_flags(btp, + PAGE_CACHE_SIZE, bdev_hardsect_size(bdev), 0); +} + +int +xfs_setsize_buftarg( + xfs_buftarg_t *btp, + unsigned int blocksize, + unsigned int sectorsize) +{ + return xfs_setsize_buftarg_flags(btp, blocksize, sectorsize, 1); +} + STATIC int xfs_mapping_buftarg( xfs_buftarg_t *btp, @@ -1569,7 +1660,8 @@ xfs_buftarg_t * xfs_alloc_buftarg( - struct block_device *bdev) + struct block_device *bdev, + int external) { xfs_buftarg_t *btp; @@ -1577,10 +1669,11 @@ btp->pbr_dev = bdev->bd_dev; btp->pbr_bdev = bdev; - if (xfs_setsize_buftarg(btp, PAGE_CACHE_SIZE, bdev_hardsect_size(bdev))) + if (xfs_setsize_buftarg_early(btp, bdev)) goto error; if (xfs_mapping_buftarg(btp, bdev)) goto error; + xfs_alloc_bufhash(btp, external); return btp; error: @@ -1846,8 +1939,6 @@ int __init pagebuf_init(void) { - int i; - pagebuf_cache = kmem_cache_create("xfs_buf_t", sizeof(xfs_buf_t), 0, SLAB_HWCACHE_ALIGN, NULL, NULL); if (pagebuf_cache == NULL) { @@ -1866,11 +1957,6 @@ if (pagebuf_shake == NULL) { pagebuf_terminate(); return -ENOMEM; - } - - for (i = 0; i < NHASH; i++) { - spin_lock_init(&pbhash[i].pb_hash_lock); - INIT_LIST_HEAD(&pbhash[i].pb_hash); } return 0; diff -Nru a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h --- a/fs/xfs/linux-2.6/xfs_buf.h 2005-01-13 17:12:38 -08:00 +++ b/fs/xfs/linux-2.6/xfs_buf.h 2005-01-13 17:12:38 -08:00 @@ -95,6 +95,11 @@ #define PBF_NOT_DONE(pb) (((pb)->pb_flags & (PBF_PARTIAL|PBF_NONE)) != 0) #define PBF_DONE(pb) (((pb)->pb_flags & (PBF_PARTIAL|PBF_NONE)) == 0) +typedef struct xfs_bufhash { + struct list_head bh_list; + spinlock_t bh_lock; +} xfs_bufhash_t; + typedef struct xfs_buftarg { dev_t pbr_dev; struct block_device *pbr_bdev; @@ -102,32 +107,35 @@ unsigned int pbr_bsize; unsigned int pbr_sshift; size_t pbr_smask; + + /* per-device buffer hash table */ + uint bt_hashmask; + uint bt_hashshift; + xfs_bufhash_t *bt_hash; } xfs_buftarg_t; /* * xfs_buf_t: Buffer structure for page cache-based buffers * * This buffer structure is used by the page cache buffer management routines - * to refer to an assembly of pages forming a logical buffer. The actual - * I/O is performed with buffer_head or bio structures, as required by drivers, - * for drivers which do not understand this structure. The buffer structure is - * used on temporary basis only, and discarded when released. - * - * The real data storage is recorded in the page cache. Metadata is - * hashed to the inode for the block device on which the file system resides. - * File data is hashed to the inode for the file. Pages which are only - * partially filled with data have bits set in their block_map entry - * to indicate which disk blocks in the page are not valid. + * to refer to an assembly of pages forming a logical buffer. The actual I/O + * is performed with buffer_head structures, as required by drivers. + * + * The buffer structure is used on temporary basis only, and discarded when + * released. The real data storage is recorded in the page cache. Metadata is + * hashed to the block device on which the file system resides. */ struct xfs_buf; + +/* call-back function on I/O completion */ typedef void (*page_buf_iodone_t)(struct xfs_buf *); - /* call-back function on I/O completion */ +/* call-back function on I/O completion */ typedef void (*page_buf_relse_t)(struct xfs_buf *); - /* call-back function on I/O completion */ +/* pre-write function */ typedef int (*page_buf_bdstrat_t)(struct xfs_buf *); -#define PB_PAGES 4 +#define PB_PAGES 2 typedef struct xfs_buf { struct semaphore pb_sema; /* semaphore for lockables */ @@ -136,8 +144,9 @@ wait_queue_head_t pb_waiters; /* unpin waiters */ struct list_head pb_list; page_buf_flags_t pb_flags; /* status flags */ - struct list_head pb_hash_list; - xfs_buftarg_t *pb_target; /* logical object */ + struct list_head pb_hash_list; /* hash table list */ + xfs_bufhash_t *pb_hash; /* hash table list start */ + xfs_buftarg_t *pb_target; /* buffer target (device) */ atomic_t pb_hold; /* reference count */ xfs_daddr_t pb_bn; /* block number for I/O */ loff_t pb_file_offset; /* offset in file */ @@ -154,10 +163,9 @@ void *pb_fspriv2; void *pb_fspriv3; unsigned short pb_error; /* error code on I/O */ - unsigned short pb_page_count; /* size of page array */ - unsigned short pb_offset; /* page offset in first page */ - unsigned char pb_locked; /* page array is locked */ - unsigned char pb_hash_index; /* hash table index */ + unsigned short pb_locked; /* page array is locked */ + unsigned int pb_page_count; /* size of page array */ + unsigned int pb_offset; /* page offset in first page */ struct page **pb_pages; /* array of page pointers */ struct page *pb_page_array[PB_PAGES]; /* inline pages */ #ifdef PAGEBUF_LOCK_TRACKING @@ -455,7 +463,7 @@ pagebuf_associate_memory(bp, val, count) #define XFS_BUF_ADDR(bp) ((bp)->pb_bn) #define XFS_BUF_SET_ADDR(bp, blk) \ - ((bp)->pb_bn = (blk)) + ((bp)->pb_bn = (xfs_daddr_t)(blk)) #define XFS_BUF_OFFSET(bp) ((bp)->pb_file_offset) #define XFS_BUF_SET_OFFSET(bp, off) \ ((bp)->pb_file_offset = (off)) @@ -564,7 +572,7 @@ * Handling of buftargs. */ -extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *); +extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int); extern void xfs_free_buftarg(xfs_buftarg_t *, int); extern void xfs_wait_buftarg(xfs_buftarg_t *); extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); diff -Nru a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c --- /dev/null Wed Dec 31 16:00:00 196900 +++ b/fs/xfs/linux-2.6/xfs_export.c 2005-01-13 17:12:38 -08:00 @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include "xfs.h" + + +STATIC struct dentry * +linvfs_decode_fh( + struct super_block *sb, + __u32 *fh, + int fh_len, + int fileid_type, + int (*acceptable)( + void *context, + struct dentry *de), + void *context) +{ + __u32 parent[2]; + parent[0] = parent[1] = 0; + + if (fh_len < 2 || fileid_type > 2) + return NULL; + + if (fileid_type == 2 && fh_len > 2) { + if (fh_len == 3) { + printk(KERN_WARNING + "XFS: detected filehandle without " + "parent inode generation information."); + return ERR_PTR(-ESTALE); + } + + parent[0] = fh[2]; + parent[1] = fh[3]; + } + + return find_exported_dentry(sb, fh, parent, acceptable, context); + +} + +STATIC struct dentry * +linvfs_get_dentry( + struct super_block *sb, + void *data) +{ + vnode_t *vp; + struct inode *inode; + struct dentry *result; + xfs_fid2_t xfid; + vfs_t *vfsp = LINVFS_GET_VFS(sb); + int error; + + xfid.fid_len = sizeof(xfs_fid2_t) - sizeof(xfid.fid_len); + xfid.fid_pad = 0; + xfid.fid_gen = ((__u32 *)data)[1]; + xfid.fid_ino = ((__u32 *)data)[0]; + + VFS_VGET(vfsp, &vp, (fid_t *)&xfid, error); + if (error || vp == NULL) + return ERR_PTR(-ESTALE) ; + + inode = LINVFS_GET_IP(vp); + result = d_alloc_anon(inode); + if (!result) { + iput(inode); + return ERR_PTR(-ENOMEM); + } + return result; +} + +STATIC struct dentry * +linvfs_get_parent( + struct dentry *child) +{ + int error; + vnode_t *vp, *cvp; + struct dentry *parent; + struct dentry dotdot; + + dotdot.d_name.name = ".."; + dotdot.d_name.len = 2; + dotdot.d_inode = NULL; + + cvp = NULL; + vp = LINVFS_GET_VP(child->d_inode); + VOP_LOOKUP(vp, &dotdot, &cvp, 0, NULL, NULL, error); + if (unlikely(error)) + return ERR_PTR(-error); + + parent = d_alloc_anon(LINVFS_GET_IP(cvp)); + if (unlikely(!parent)) { + VN_RELE(cvp); + return ERR_PTR(-ENOMEM); + } + return parent; +} + +struct export_operations linvfs_export_ops = { + .decode_fh = linvfs_decode_fh, + .get_parent = linvfs_get_parent, + .get_dentry = linvfs_get_dentry, +}; diff -Nru a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c --- a/fs/xfs/linux-2.6/xfs_file.c 2005-01-13 17:12:38 -08:00 +++ b/fs/xfs/linux-2.6/xfs_file.c 2005-01-13 17:12:38 -08:00 @@ -81,23 +81,23 @@ STATIC ssize_t -linvfs_read( +linvfs_aio_read( struct kiocb *iocb, char __user *buf, size_t count, loff_t pos) { - return __linvfs_read(iocb, buf, 0, count, pos); + return __linvfs_read(iocb, buf, IO_ISAIO, count, pos); } STATIC ssize_t -linvfs_read_invis( +linvfs_aio_read_invis( struct kiocb *iocb, char __user *buf, size_t count, loff_t pos) { - return __linvfs_read(iocb, buf, IO_INVIS, count, pos); + return __linvfs_read(iocb, buf, IO_ISAIO|IO_INVIS, count, pos); } @@ -125,23 +125,23 @@ STATIC ssize_t -linvfs_write( +linvfs_aio_write( struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos) { - return __linvfs_write(iocb, buf, 0, count, pos); + return __linvfs_write(iocb, buf, IO_ISAIO, count, pos); } STATIC ssize_t -linvfs_write_invis( +linvfs_aio_write_invis( struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos) { - return __linvfs_write(iocb, buf, IO_INVIS, count, pos); + return __linvfs_write(iocb, buf, IO_ISAIO|IO_INVIS, count, pos); } @@ -492,8 +492,8 @@ .write = do_sync_write, .readv = linvfs_readv, .writev = linvfs_writev, - .aio_read = linvfs_read, - .aio_write = linvfs_write, + .aio_read = linvfs_aio_read, + .aio_write = linvfs_aio_write, .sendfile = linvfs_sendfile, .ioctl = linvfs_ioctl, .mmap = linvfs_file_mmap, @@ -508,8 +508,8 @@ .write = do_sync_write, .readv = linvfs_readv_invis, .writev = linvfs_writev_invis, - .aio_read = linvfs_read_invis, - .aio_write = linvfs_write_invis, + .aio_read = linvfs_aio_read_invis, + .aio_write = linvfs_aio_write_invis, .sendfile = linvfs_sendfile, .ioctl = linvfs_ioctl_invis, .mmap = linvfs_file_mmap, diff -Nru a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c --- a/fs/xfs/linux-2.6/xfs_ioctl.c 2005-01-13 17:12:38 -08:00 +++ b/fs/xfs/linux-2.6/xfs_ioctl.c 2005-01-13 17:12:38 -08:00 @@ -499,7 +499,7 @@ xfs_fsop_attrmulti_handlereq_t am_hreq; struct inode *inode; vnode_t *vp; - int i, size; + unsigned int i, size; error = xfs_vget_fsop_handlereq(mp, parinode, CAP_SYS_ADMIN, arg, sizeof(xfs_fsop_attrmulti_handlereq_t), @@ -509,6 +509,11 @@ return -error; size = am_hreq.opcount * sizeof(attr_multiop_t); + if (!size || size > 16 * PAGE_SIZE) { + VN_RELE(vp); + return -XFS_ERROR(E2BIG); + } + ops = (xfs_attr_multiop_t *)kmalloc(size, GFP_KERNEL); if (!ops) { VN_RELE(vp); diff -Nru a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c --- a/fs/xfs/linux-2.6/xfs_iops.c 2005-01-13 17:12:38 -08:00 +++ b/fs/xfs/linux-2.6/xfs_iops.c 2005-01-13 17:12:38 -08:00 @@ -369,33 +369,6 @@ return 0; } -STATIC int -linvfs_readlink( - struct dentry *dentry, - char __user *buf, - int size) -{ - vnode_t *vp = LINVFS_GET_VP(dentry->d_inode); - uio_t uio; - iovec_t iov; - int error; - - iov.iov_base = buf; - iov.iov_len = size; - - uio.uio_iov = &iov; - uio.uio_offset = 0; - uio.uio_segflg = UIO_USERSPACE; - uio.uio_resid = size; - uio.uio_iovcnt = 1; - - VOP_READLINK(vp, &uio, 0, NULL, error); - if (error) - return -error; - - return (size - uio.uio_resid); -} - /* * careful here - this function can get called recursively, so * we need to be very careful about how much stack we use. @@ -694,7 +667,7 @@ }; struct inode_operations linvfs_symlink_inode_operations = { - .readlink = linvfs_readlink, + .readlink = generic_readlink, .follow_link = linvfs_follow_link, .put_link = linvfs_put_link, .permission = linvfs_permission, diff -Nru a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c --- a/fs/xfs/linux-2.6/xfs_lrw.c 2005-01-13 17:12:38 -08:00 +++ b/fs/xfs/linux-2.6/xfs_lrw.c 2005-01-13 17:12:38 -08:00 @@ -317,7 +317,7 @@ xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore, (void *)iovp, segs, *offset, ioflags); ret = __generic_file_aio_read(iocb, iovp, segs, offset); - if (ret == -EIOCBQUEUED) + if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO)) ret = wait_on_sync_kiocb(iocb); if (ret > 0) XFS_STATS_ADD(xs_read_bytes, ret); @@ -854,7 +854,7 @@ current->backing_dev_info = NULL; - if (ret == -EIOCBQUEUED) + if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO)) ret = wait_on_sync_kiocb(iocb); if ((ret == -ENOSPC) && diff -Nru a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c --- a/fs/xfs/linux-2.6/xfs_super.c 2005-01-13 17:12:38 -08:00 +++ b/fs/xfs/linux-2.6/xfs_super.c 2005-01-13 17:12:38 -08:00 @@ -76,7 +76,6 @@ STATIC struct quotactl_ops linvfs_qops; STATIC struct super_operations linvfs_sops; -STATIC struct export_operations linvfs_export_ops; STATIC kmem_zone_t *linvfs_inode_zone; STATIC kmem_shaker_t xfs_inode_shaker; @@ -661,63 +660,6 @@ VFS_FREEZE(LINVFS_GET_VFS(sb)); } -STATIC struct dentry * -linvfs_get_parent( - struct dentry *child) -{ - int error; - vnode_t *vp, *cvp; - struct dentry *parent; - struct dentry dotdot; - - dotdot.d_name.name = ".."; - dotdot.d_name.len = 2; - dotdot.d_inode = NULL; - - cvp = NULL; - vp = LINVFS_GET_VP(child->d_inode); - VOP_LOOKUP(vp, &dotdot, &cvp, 0, NULL, NULL, error); - if (unlikely(error)) - return ERR_PTR(-error); - - parent = d_alloc_anon(LINVFS_GET_IP(cvp)); - if (unlikely(!parent)) { - VN_RELE(cvp); - return ERR_PTR(-ENOMEM); - } - return parent; -} - -STATIC struct dentry * -linvfs_get_dentry( - struct super_block *sb, - void *data) -{ - vnode_t *vp; - struct inode *inode; - struct dentry *result; - xfs_fid2_t xfid; - vfs_t *vfsp = LINVFS_GET_VFS(sb); - int error; - - xfid.fid_len = sizeof(xfs_fid2_t) - sizeof(xfid.fid_len); - xfid.fid_pad = 0; - xfid.fid_gen = ((__u32 *)data)[1]; - xfid.fid_ino = ((__u32 *)data)[0]; - - VFS_VGET(vfsp, &vp, (fid_t *)&xfid, error); - if (error || vp == NULL) - return ERR_PTR(-ESTALE) ; - - inode = LINVFS_GET_IP(vp); - result = d_alloc_anon(inode); - if (!result) { - iput(inode); - return ERR_PTR(-ENOMEM); - } - return result; -} - STATIC int linvfs_show_options( struct seq_file *m, @@ -810,7 +752,9 @@ } sb_min_blocksize(sb, BBSIZE); +#ifdef CONFIG_EXPORTFS sb->s_export_op = &linvfs_export_ops; +#endif sb->s_qcop = &linvfs_qops; sb->s_op = &linvfs_sops; @@ -878,12 +822,6 @@ { return get_sb_bdev(fs_type, flags, dev_name, data, linvfs_fill_super); } - - -STATIC struct export_operations linvfs_export_ops = { - .get_parent = linvfs_get_parent, - .get_dentry = linvfs_get_dentry, -}; STATIC struct super_operations linvfs_sops = { .alloc_inode = linvfs_alloc_inode, diff -Nru a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h --- a/fs/xfs/linux-2.6/xfs_super.h 2005-01-13 17:12:38 -08:00 +++ b/fs/xfs/linux-2.6/xfs_super.h 2005-01-13 17:12:38 -08:00 @@ -133,4 +133,6 @@ struct block_device **); extern void xfs_blkdev_put(struct block_device *); +extern struct export_operations linvfs_export_ops; + #endif /* __XFS_SUPER_H__ */ diff -Nru a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h --- a/fs/xfs/linux-2.6/xfs_vnode.h 2005-01-13 17:12:38 -08:00 +++ b/fs/xfs/linux-2.6/xfs_vnode.h 2005-01-13 17:12:38 -08:00 @@ -379,6 +379,7 @@ /* * Flags for read/write calls - same values as IRIX */ +#define IO_ISAIO 0x00001 /* don't wait for completion */ #define IO_ISDIRECT 0x00004 /* bypass page cache */ #define IO_INVIS 0x00020 /* don't update inode timestamps */ diff -Nru a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h --- a/fs/xfs/xfs_clnt.h 2005-01-13 17:12:38 -08:00 +++ b/fs/xfs/xfs_clnt.h 2005-01-13 17:12:38 -08:00 @@ -64,6 +64,7 @@ int sunit; /* stripe unit (BBs) */ int swidth; /* stripe width (BBs), multiple of sunit */ uchar_t iosizelog; /* log2 of the preferred I/O size */ + int ihashsize; /* inode hash table size (buckets) */ }; /* diff -Nru a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c --- a/fs/xfs/xfs_iget.c 2005-01-13 17:12:38 -08:00 +++ b/fs/xfs/xfs_iget.c 2005-01-13 17:12:38 -08:00 @@ -55,22 +55,32 @@ #include "xfs_inode.h" #include "xfs_quota.h" #include "xfs_utils.h" +#include "xfs_bit.h" /* * Initialize the inode hash table for the newly mounted file system. - * - * mp -- this is the mount point structure for the file system being - * initialized + * Choose an initial table size based on user specified value, else + * use a simple algorithm using the maximum number of inodes as an + * indicator for table size, and cap it at 16 pages (gettin' big). */ void xfs_ihash_init(xfs_mount_t *mp) { - int i; + __uint64_t icount; + uint i, flags = KM_SLEEP | KM_MAYFAIL; - mp->m_ihsize = XFS_BUCKETS(mp); - mp->m_ihash = (xfs_ihash_t *)kmem_zalloc(mp->m_ihsize - * sizeof(xfs_ihash_t), KM_SLEEP); - ASSERT(mp->m_ihash != NULL); + if (!mp->m_ihsize) { + icount = mp->m_maxicount ? mp->m_maxicount : + (mp->m_sb.sb_dblocks << mp->m_sb.sb_inopblog); + mp->m_ihsize = 1 << max_t(uint, xfs_highbit64(icount) / 3, 8); + mp->m_ihsize = min_t(uint, mp->m_ihsize, 16 * PAGE_SIZE); + } + + while (!(mp->m_ihash = (xfs_ihash_t *)kmem_zalloc(mp->m_ihsize * + sizeof(xfs_ihash_t), flags))) { + if ((mp->m_ihsize >>= 1) <= NBPP) + flags = KM_SLEEP; + } for (i = 0; i < mp->m_ihsize; i++) { rwlock_init(&(mp->m_ihash[i].ih_lock)); } @@ -88,29 +98,19 @@ /* * Initialize the inode cluster hash table for the newly mounted file system. - * - * mp -- this is the mount point structure for the file system being - * initialized + * Its size is derived from the ihash table size. */ void xfs_chash_init(xfs_mount_t *mp) { - int i; + uint i; - /* - * m_chash size is based on m_ihash - * with a minimum of 37 entries - */ - mp->m_chsize = (XFS_BUCKETS(mp)) / - (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog); - if (mp->m_chsize < 37) { - mp->m_chsize = 37; - } + mp->m_chsize = max_t(uint, 1, mp->m_ihsize / + (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)); + mp->m_chsize = min_t(uint, mp->m_chsize, mp->m_ihsize); mp->m_chash = (xfs_chash_t *)kmem_zalloc(mp->m_chsize * sizeof(xfs_chash_t), KM_SLEEP); - ASSERT(mp->m_chash != NULL); - for (i = 0; i < mp->m_chsize; i++) { spinlock_init(&mp->m_chash[i].ch_lock,"xfshash"); } diff -Nru a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h --- a/fs/xfs/xfs_inode.h 2005-01-13 17:12:38 -08:00 +++ b/fs/xfs/xfs_inode.h 2005-01-13 17:12:38 -08:00 @@ -182,10 +182,6 @@ uint ih_version; } xfs_ihash_t; -/* - * Inode hashing and hash bucket locking. - */ -#define XFS_BUCKETS(mp) (37*(mp)->m_sb.sb_agcount-1) #define XFS_IHASH(mp,ino) ((mp)->m_ihash + (((uint)(ino)) % (mp)->m_ihsize)) /* @@ -193,7 +189,6 @@ * find inodes that share a cluster and can be flushed to disk at the same * time. */ - typedef struct xfs_chashlist { struct xfs_chashlist *chl_next; struct xfs_inode *chl_ip; @@ -207,6 +202,8 @@ lock_t ch_lock; } xfs_chash_t; +#define XFS_CHASH(mp,blk) ((mp)->m_chash + (((uint)blk) % (mp)->m_chsize)) + /* * This is the xfs in-core inode structure. @@ -448,12 +445,6 @@ #endif #define BHV_IS_XFS(bdp) (BHV_OPS(bdp) == &xfs_vnodeops) - -/* - * Pick the inode cluster hash bucket - * (m_chash is the same size as m_ihash) - */ -#define XFS_CHASH(mp,blk) ((mp)->m_chash + (((uint)blk) % (mp)->m_chsize)) /* * For multiple groups support: if S_ISGID bit is set in the parent diff -Nru a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h --- a/fs/xfs/xfs_mount.h 2005-01-13 17:12:38 -08:00 +++ b/fs/xfs/xfs_mount.h 2005-01-13 17:12:38 -08:00 @@ -296,7 +296,7 @@ xfs_agnumber_t m_agirotor; /* last ag dir inode alloced */ lock_t m_agirotor_lock;/* .. and lock protecting it */ xfs_agnumber_t m_maxagi; /* highest inode alloc group */ - int m_ihsize; /* size of next field */ + uint m_ihsize; /* size of next field */ struct xfs_ihash *m_ihash; /* fs private inode hash table*/ struct xfs_inode *m_inodes; /* active inode list */ struct list_head m_del_inodes; /* inodes to reclaim */ @@ -376,7 +376,7 @@ xfs_dablk_t m_dirdatablk; /* blockno of dir data v2 */ xfs_dablk_t m_dirleafblk; /* blockno of dir non-data v2 */ xfs_dablk_t m_dirfreeblk; /* blockno of dirfreeindex v2 */ - int m_chsize; /* size of next field */ + uint m_chsize; /* size of next field */ struct xfs_chash *m_chash; /* fs private inode per-cluster * hash table */ struct xfs_dmops m_dm_ops; /* vector of DMI ops */ diff -Nru a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c --- a/fs/xfs/xfs_vfsops.c 2005-01-13 17:12:38 -08:00 +++ b/fs/xfs/xfs_vfsops.c 2005-01-13 17:12:38 -08:00 @@ -252,6 +252,7 @@ ap->logbufsize); return XFS_ERROR(EINVAL); } + mp->m_ihsize = ap->ihashsize; mp->m_logbsize = ap->logbufsize; mp->m_fsname_len = strlen(ap->fsname) + 1; mp->m_fsname = kmem_alloc(mp->m_fsname_len, KM_SLEEP); @@ -468,19 +469,19 @@ * Setup xfs_mount buffer target pointers */ error = ENOMEM; - mp->m_ddev_targp = xfs_alloc_buftarg(ddev); + mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0); if (!mp->m_ddev_targp) { xfs_blkdev_put(logdev); xfs_blkdev_put(rtdev); return error; } if (rtdev) { - mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev); + mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1); if (!mp->m_rtdev_targp) goto error0; } mp->m_logdev_targp = (logdev && logdev != ddev) ? - xfs_alloc_buftarg(logdev) : mp->m_ddev_targp; + xfs_alloc_buftarg(logdev, 1) : mp->m_ddev_targp; if (!mp->m_logdev_targp) goto error0; @@ -1579,7 +1580,7 @@ } /* - * xfs_vget - called by DMAPI to get vnode from file handle + * xfs_vget - called by DMAPI and NFSD to get vnode from file handle */ STATIC int xfs_vget( @@ -1621,7 +1622,7 @@ return XFS_ERROR(EIO); } - if (ip->i_d.di_mode == 0 || (igen && (ip->i_d.di_gen != igen))) { + if (ip->i_d.di_mode == 0 || ip->i_d.di_gen != igen) { xfs_iput_new(ip, XFS_ILOCK_SHARED); *vpp = NULL; return XFS_ERROR(ENOENT); @@ -1646,6 +1647,7 @@ #define MNTOPT_SWIDTH "swidth" /* data volume stripe width */ #define MNTOPT_NOUUID "nouuid" /* ignore filesystem UUID */ #define MNTOPT_MTPT "mtpt" /* filesystem mount point */ +#define MNTOPT_IHASHSIZE "ihashsize" /* size of inode hash table */ #define MNTOPT_NORECOVERY "norecovery" /* don't run XFS recovery */ #define MNTOPT_NOLOGFLUSH "nologflush" /* don't hard flush on log writes */ #define MNTOPT_OSYNCISOSYNC "osyncisosync" /* o_sync is REALLY o_sync */ @@ -1734,6 +1736,13 @@ iosize = simple_strtoul(value, &eov, 10); args->flags |= XFSMNT_IOSIZE; args->iosizelog = (uint8_t) iosize; + } else if (!strcmp(this_char, MNTOPT_IHASHSIZE)) { + if (!value || !*value) { + printk("XFS: %s option requires an argument\n", + this_char); + return EINVAL; + } + args->ihashsize = simple_strtoul(value, &eov, 10); } else if (!strcmp(this_char, MNTOPT_WSYNC)) { args->flags |= XFSMNT_WSYNC; } else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) { diff -Nru a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c --- a/fs/xfs/xfs_vnodeops.c 2005-01-13 17:12:38 -08:00 +++ b/fs/xfs/xfs_vnodeops.c 2005-01-13 17:12:38 -08:00 @@ -3900,7 +3900,7 @@ int error; if (vp && VN_BAD(vp)) - return 0; + goto reclaim; /* The hash lock here protects a thread in xfs_iget_core from * racing with us on linking the inode back with a vnode. @@ -3948,8 +3948,7 @@ */ if (error) { xfs_iunlock(ip, XFS_ILOCK_EXCL); - xfs_ireclaim(ip); - return (0); + goto reclaim; } xfs_iflock(ip); /* synchronize with xfs_iflush_done */ } @@ -3968,6 +3967,7 @@ xfs_iunlock(ip, XFS_ILOCK_EXCL); } + reclaim: xfs_ireclaim(ip); return 0; } diff -Nru a/include/linux/fs.h b/include/linux/fs.h --- a/include/linux/fs.h 2005-01-13 17:12:38 -08:00 +++ b/include/linux/fs.h 2005-01-13 17:12:38 -08:00 @@ -1141,6 +1141,10 @@ }; +extern struct dentry * +find_exported_dentry(struct super_block *sb, void *obj, void *parent, + int (*acceptable)(void *context, struct dentry *de), + void *context); struct file_system_type { const char *name;